PyPI - hindsight-api - Versions diffs - 0.0.13__py3-none-any.whl - Mend

hindsight-api 0.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

hindsight_api/__init__.py +38 -0
hindsight_api/api/__init__.py +105 -0
hindsight_api/api/http.py +1872 -0
hindsight_api/api/mcp.py +157 -0
hindsight_api/engine/__init__.py +47 -0
hindsight_api/engine/cross_encoder.py +97 -0
hindsight_api/engine/db_utils.py +93 -0
hindsight_api/engine/embeddings.py +113 -0
hindsight_api/engine/entity_resolver.py +575 -0
hindsight_api/engine/llm_wrapper.py +269 -0
hindsight_api/engine/memory_engine.py +3095 -0
hindsight_api/engine/query_analyzer.py +519 -0
hindsight_api/engine/response_models.py +222 -0
hindsight_api/engine/retain/__init__.py +50 -0
hindsight_api/engine/retain/bank_utils.py +423 -0
hindsight_api/engine/retain/chunk_storage.py +82 -0
hindsight_api/engine/retain/deduplication.py +104 -0
hindsight_api/engine/retain/embedding_processing.py +62 -0
hindsight_api/engine/retain/embedding_utils.py +54 -0
hindsight_api/engine/retain/entity_processing.py +90 -0
hindsight_api/engine/retain/fact_extraction.py +1027 -0
hindsight_api/engine/retain/fact_storage.py +176 -0
hindsight_api/engine/retain/link_creation.py +121 -0
hindsight_api/engine/retain/link_utils.py +651 -0
hindsight_api/engine/retain/orchestrator.py +405 -0
hindsight_api/engine/retain/types.py +206 -0
hindsight_api/engine/search/__init__.py +15 -0
hindsight_api/engine/search/fusion.py +122 -0
hindsight_api/engine/search/observation_utils.py +132 -0
hindsight_api/engine/search/reranking.py +103 -0
hindsight_api/engine/search/retrieval.py +503 -0
hindsight_api/engine/search/scoring.py +161 -0
hindsight_api/engine/search/temporal_extraction.py +64 -0
hindsight_api/engine/search/think_utils.py +255 -0
hindsight_api/engine/search/trace.py +215 -0
hindsight_api/engine/search/tracer.py +447 -0
hindsight_api/engine/search/types.py +160 -0
hindsight_api/engine/task_backend.py +223 -0
hindsight_api/engine/utils.py +203 -0
hindsight_api/metrics.py +227 -0
hindsight_api/migrations.py +163 -0
hindsight_api/models.py +309 -0
hindsight_api/pg0.py +425 -0
hindsight_api/web/__init__.py +12 -0
hindsight_api/web/server.py +143 -0
hindsight_api-0.0.13.dist-info/METADATA +41 -0
hindsight_api-0.0.13.dist-info/RECORD +48 -0
hindsight_api-0.0.13.dist-info/WHEEL +4 -0

hindsight_api/migrations.py ADDED Viewed

@@ -0,0 +1,163 @@
+"""
+Database migration management using Alembic.
+This module provides programmatic access to run database migrations
+on application startup. It is designed to be safe for concurrent
+execution - Alembic uses PostgreSQL transactions to prevent
+conflicts when multiple instances start simultaneously.
+Important: All migrations must be backward-compatible to allow
+safe rolling deployments.
+No alembic.ini required - all configuration is done programmatically.
+"""
+import logging
+import os
+import shutil
+from pathlib import Path
+from typing import Optional
+from alembic import command
+from alembic.config import Config
+logger = logging.getLogger(__name__)
+def run_migrations(database_url: str, script_location: Optional[str] = None) -> None:
+    """
+    Run database migrations to the latest version using programmatic Alembic configuration.
+    This function is safe to call on every application startup:
+    - Alembic checks the current schema version in the database
+    - Only missing migrations are applied
+    - PostgreSQL transactions prevent concurrent migration conflicts
+    - If schema is already up-to-date, this is a fast no-op
+    Args:
+        database_url: SQLAlchemy database URL (e.g., "postgresql://user:pass@host/db")
+        script_location: Path to alembic migrations directory (e.g., "/path/to/alembic").
+                        If None, defaults to hindsight-api/alembic directory.
+    Raises:
+        RuntimeError: If migrations fail to complete
+        FileNotFoundError: If script_location doesn't exist
+    Example:
+        # Using default location (hindsight_api package)
+        run_migrations("postgresql://user:pass@host/db")
+        # Using custom location (when importing from another project)
+        run_migrations(
+            "postgresql://user:pass@host/db",
+            script_location="/path/to/copied/_alembic"
+        )
+    """
+    try:
+        # Determine script location
+        if script_location is None:
+            # Default: use the alembic directory in the hindsight_api package
+            # This file is in: hindsight-api/hindsight_api/migrations.py
+            # Default location is: hindsight-api/alembic
+            package_root = Path(__file__).parent.parent
+            script_location = str(package_root / "alembic")
+        script_path = Path(script_location)
+        if not script_path.exists():
+            raise FileNotFoundError(
+                f"Alembic script location not found at {script_location}. "
+                "Database migrations cannot be run."
+            )
+        logger.info(f"Running database migrations to head...")
+        logger.info(f"Database URL: {database_url}")
+        logger.info(f"Script location: {script_location}")
+        # Create Alembic configuration programmatically (no alembic.ini needed)
+        alembic_cfg = Config()
+        # Set the script location (where alembic versions are stored)
+        alembic_cfg.set_main_option("script_location", script_location)
+        # Set the database URL
+        alembic_cfg.set_main_option("sqlalchemy.url", database_url)
+        # Configure logging (optional, but helps with debugging)
+        # Uses Python's logging system instead of alembic.ini
+        alembic_cfg.set_main_option("prepend_sys_path", ".")
+        # Set path_separator to avoid deprecation warning
+        alembic_cfg.set_main_option("path_separator", "os")
+        # Run migrations to head (latest version)
+        # Note: Alembic may call sys.exit() on errors instead of raising exceptions
+        # We rely on the outer try/except and logging to catch issues
+        command.upgrade(alembic_cfg, "head")
+        logger.info("Database migrations completed successfully")
+    except FileNotFoundError:
+        logger.error(f"Alembic script location not found at {script_location}")
+        raise
+    except SystemExit as e:
+        # Catch sys.exit() calls from Alembic
+        logger.error(f"Alembic called sys.exit() with code: {e.code}", exc_info=True)
+        raise RuntimeError(f"Database migration failed with exit code {e.code}") from e
+    except Exception as e:
+        logger.error(f"Failed to run database migrations: {e}", exc_info=True)
+        raise RuntimeError("Database migration failed") from e
+def check_migration_status(database_url: Optional[str] = None, script_location: Optional[str] = None) -> tuple[str | None, str | None]:
+    """
+    Check current database schema version and latest available version.
+    Args:
+        database_url: SQLAlchemy database URL. If None, uses HINDSIGHT_API_DATABASE_URL env var.
+        script_location: Path to alembic migrations directory. If None, uses default location.
+    Returns:
+        Tuple of (current_revision, head_revision)
+        Returns (None, None) if unable to determine versions
+    """
+    try:
+        from alembic.runtime.migration import MigrationContext
+        from alembic.script import ScriptDirectory
+        from sqlalchemy import create_engine
+        # Get database URL
+        if database_url is None:
+            database_url = os.getenv("HINDSIGHT_API_DATABASE_URL")
+        if not database_url:
+            logger.warning("Database URL not provided and HINDSIGHT_API_DATABASE_URL not set, cannot check migration status")
+            return None, None
+        # Get current revision from database
+        engine = create_engine(database_url)
+        with engine.connect() as connection:
+            context = MigrationContext.configure(connection)
+            current_rev = context.get_current_revision()
+        # Get head revision from migration scripts
+        if script_location is None:
+            package_root = Path(__file__).parent.parent
+            script_location = str(package_root / "alembic")
+        script_path = Path(script_location)
+        if not script_path.exists():
+            logger.warning(f"Script location not found at {script_location}")
+            return current_rev, None
+        # Create config programmatically
+        alembic_cfg = Config()
+        alembic_cfg.set_main_option("script_location", script_location)
+        alembic_cfg.set_main_option("path_separator", "os")
+        script = ScriptDirectory.from_config(alembic_cfg)
+        head_rev = script.get_current_head()
+        return current_rev, head_rev
+    except Exception as e:
+        logger.warning(f"Unable to check migration status: {e}")
+        return None, None

hindsight_api/models.py ADDED Viewed

@@ -0,0 +1,309 @@
+"""
+SQLAlchemy models for the memory system.
+"""
+from datetime import datetime
+from typing import Optional
+from uuid import UUID as PyUUID, uuid4
+from sqlalchemy import (
+    CheckConstraint,
+    Column,
+    Float,
+    ForeignKey,
+    ForeignKeyConstraint,
+    Index,
+    Integer,
+    PrimaryKeyConstraint,
+    Text,
+    func,
+    text as sql_text,
+)
+from sqlalchemy.dialects.postgresql import JSONB, TIMESTAMP, UUID
+from sqlalchemy.ext.asyncio import AsyncAttrs
+from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
+from pgvector.sqlalchemy import Vector
+class Base(AsyncAttrs, DeclarativeBase):
+    """Base class for all models."""
+    pass
+class Document(Base):
+    """Source documents for memory units."""
+    __tablename__ = "documents"
+    id: Mapped[str] = mapped_column(Text, primary_key=True)
+    bank_id: Mapped[str] = mapped_column(Text, primary_key=True)
+    original_text: Mapped[Optional[str]] = mapped_column(Text)
+    content_hash: Mapped[Optional[str]] = mapped_column(Text)
+    doc_metadata: Mapped[dict] = mapped_column("metadata", JSONB, server_default=sql_text("'{}'::jsonb"))
+    created_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP(timezone=True), server_default=func.now()
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP(timezone=True), server_default=func.now()
+    )
+    # Relationships
+    memory_units = relationship("MemoryUnit", back_populates="document", cascade="all, delete-orphan")
+    __table_args__ = (
+        Index("idx_documents_bank_id", "bank_id"),
+        Index("idx_documents_content_hash", "content_hash"),
+    )
+class MemoryUnit(Base):
+    """Individual sentence-level memories."""
+    __tablename__ = "memory_units"
+    id: Mapped[PyUUID] = mapped_column(
+        UUID(as_uuid=True), primary_key=True, server_default=sql_text("uuid_generate_v4()")
+    )
+    bank_id: Mapped[str] = mapped_column(Text, nullable=False)
+    document_id: Mapped[Optional[str]] = mapped_column(Text)
+    text: Mapped[str] = mapped_column(Text, nullable=False)
+    embedding = mapped_column(Vector(384))  # pgvector type
+    context: Mapped[Optional[str]] = mapped_column(Text)
+    event_date: Mapped[datetime] = mapped_column(TIMESTAMP(timezone=True), nullable=False)  # Kept for backward compatibility
+    occurred_start: Mapped[Optional[datetime]] = mapped_column(TIMESTAMP(timezone=True))  # When fact occurred (range start)
+    occurred_end: Mapped[Optional[datetime]] = mapped_column(TIMESTAMP(timezone=True))    # When fact occurred (range end)
+    mentioned_at: Mapped[Optional[datetime]] = mapped_column(TIMESTAMP(timezone=True))    # When fact was mentioned
+    fact_type: Mapped[str] = mapped_column(Text, nullable=False, server_default="world")
+    confidence_score: Mapped[Optional[float]] = mapped_column(Float)
+    access_count: Mapped[int] = mapped_column(Integer, server_default="0")
+    unit_metadata: Mapped[dict] = mapped_column("metadata", JSONB, server_default=sql_text("'{}'::jsonb"))  # User-defined metadata (str->str)
+    created_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP(timezone=True), server_default=func.now()
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP(timezone=True), server_default=func.now()
+    )
+    # Relationships
+    document = relationship("Document", back_populates="memory_units")
+    unit_entities = relationship("UnitEntity", back_populates="memory_unit", cascade="all, delete-orphan")
+    outgoing_links = relationship(
+        "MemoryLink",
+        foreign_keys="MemoryLink.from_unit_id",
+        back_populates="from_unit",
+        cascade="all, delete-orphan"
+    )
+    incoming_links = relationship(
+        "MemoryLink",
+        foreign_keys="MemoryLink.to_unit_id",
+        back_populates="to_unit",
+        cascade="all, delete-orphan"
+    )
+    __table_args__ = (
+        ForeignKeyConstraint(
+            ["document_id", "bank_id"],
+            ["documents.id", "documents.bank_id"],
+            name="memory_units_document_fkey",
+            ondelete="CASCADE",
+        ),
+        CheckConstraint("fact_type IN ('world', 'bank', 'opinion', 'observation')"),
+        CheckConstraint("confidence_score IS NULL OR (confidence_score >= 0.0 AND confidence_score <= 1.0)"),
+        CheckConstraint(
+            "(fact_type = 'opinion' AND confidence_score IS NOT NULL) OR "
+            "(fact_type = 'observation') OR "
+            "(fact_type NOT IN ('opinion', 'observation') AND confidence_score IS NULL)",
+            name="confidence_score_fact_type_check"
+        ),
+        Index("idx_memory_units_bank_id", "bank_id"),
+        Index("idx_memory_units_document_id", "document_id"),
+        Index("idx_memory_units_event_date", "event_date", postgresql_ops={"event_date": "DESC"}),
+        Index("idx_memory_units_bank_date", "bank_id", "event_date", postgresql_ops={"event_date": "DESC"}),
+        Index("idx_memory_units_access_count", "access_count", postgresql_ops={"access_count": "DESC"}),
+        Index("idx_memory_units_fact_type", "fact_type"),
+        Index("idx_memory_units_bank_fact_type", "bank_id", "fact_type"),
+        Index("idx_memory_units_bank_type_date", "bank_id", "fact_type", "event_date", postgresql_ops={"event_date": "DESC"}),
+        Index(
+            "idx_memory_units_opinion_confidence",
+            "bank_id",
+            "confidence_score",
+            postgresql_where=sql_text("fact_type = 'opinion'"),
+            postgresql_ops={"confidence_score": "DESC"}
+        ),
+        Index(
+            "idx_memory_units_opinion_date",
+            "bank_id",
+            "event_date",
+            postgresql_where=sql_text("fact_type = 'opinion'"),
+            postgresql_ops={"event_date": "DESC"}
+        ),
+        Index(
+            "idx_memory_units_observation_date",
+            "bank_id",
+            "event_date",
+            postgresql_where=sql_text("fact_type = 'observation'"),
+            postgresql_ops={"event_date": "DESC"}
+        ),
+        Index(
+            "idx_memory_units_embedding",
+            "embedding",
+            postgresql_using="hnsw",
+            postgresql_ops={"embedding": "vector_cosine_ops"}
+        ),
+    )
+class Entity(Base):
+    """Resolved entities (people, organizations, locations, etc.)."""
+    __tablename__ = "entities"
+    id: Mapped[PyUUID] = mapped_column(
+        UUID(as_uuid=True), primary_key=True, server_default=sql_text("uuid_generate_v4()")
+    )
+    canonical_name: Mapped[str] = mapped_column(Text, nullable=False)
+    bank_id: Mapped[str] = mapped_column(Text, nullable=False)
+    entity_metadata: Mapped[dict] = mapped_column("metadata", JSONB, server_default=sql_text("'{}'::jsonb"))
+    first_seen: Mapped[datetime] = mapped_column(
+        TIMESTAMP(timezone=True), server_default=func.now()
+    )
+    last_seen: Mapped[datetime] = mapped_column(
+        TIMESTAMP(timezone=True), server_default=func.now()
+    )
+    mention_count: Mapped[int] = mapped_column(Integer, server_default="1")
+    # Relationships
+    unit_entities = relationship("UnitEntity", back_populates="entity", cascade="all, delete-orphan")
+    memory_links = relationship("MemoryLink", back_populates="entity", cascade="all, delete-orphan")
+    cooccurrences_1 = relationship(
+        "EntityCooccurrence",
+        foreign_keys="EntityCooccurrence.entity_id_1",
+        back_populates="entity_1",
+        cascade="all, delete-orphan"
+    )
+    cooccurrences_2 = relationship(
+        "EntityCooccurrence",
+        foreign_keys="EntityCooccurrence.entity_id_2",
+        back_populates="entity_2",
+        cascade="all, delete-orphan"
+    )
+    __table_args__ = (
+        Index("idx_entities_bank_id", "bank_id"),
+        Index("idx_entities_canonical_name", "canonical_name"),
+        Index("idx_entities_bank_name", "bank_id", "canonical_name"),
+    )
+class UnitEntity(Base):
+    """Association between memory units and entities."""
+    __tablename__ = "unit_entities"
+    unit_id: Mapped[PyUUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("memory_units.id", ondelete="CASCADE"), primary_key=True
+    )
+    entity_id: Mapped[PyUUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("entities.id", ondelete="CASCADE"), primary_key=True
+    )
+    # Relationships
+    memory_unit = relationship("MemoryUnit", back_populates="unit_entities")
+    entity = relationship("Entity", back_populates="unit_entities")
+    __table_args__ = (
+        Index("idx_unit_entities_unit", "unit_id"),
+        Index("idx_unit_entities_entity", "entity_id"),
+    )
+class EntityCooccurrence(Base):
+    """Materialized cache of entity co-occurrences."""
+    __tablename__ = "entity_cooccurrences"
+    entity_id_1: Mapped[PyUUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("entities.id", ondelete="CASCADE"), primary_key=True
+    )
+    entity_id_2: Mapped[PyUUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("entities.id", ondelete="CASCADE"), primary_key=True
+    )
+    cooccurrence_count: Mapped[int] = mapped_column(Integer, server_default="1")
+    last_cooccurred: Mapped[datetime] = mapped_column(
+        TIMESTAMP(timezone=True), server_default=func.now()
+    )
+    # Relationships
+    entity_1 = relationship("Entity", foreign_keys=[entity_id_1], back_populates="cooccurrences_1")
+    entity_2 = relationship("Entity", foreign_keys=[entity_id_2], back_populates="cooccurrences_2")
+    __table_args__ = (
+        CheckConstraint("entity_id_1 < entity_id_2", name="entity_cooccurrence_order_check"),
+        Index("idx_entity_cooccurrences_entity1", "entity_id_1"),
+        Index("idx_entity_cooccurrences_entity2", "entity_id_2"),
+        Index("idx_entity_cooccurrences_count", "cooccurrence_count", postgresql_ops={"cooccurrence_count": "DESC"}),
+    )
+class MemoryLink(Base):
+    """Links between memory units (temporal, semantic, entity)."""
+    __tablename__ = "memory_links"
+    from_unit_id: Mapped[PyUUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("memory_units.id", ondelete="CASCADE"), primary_key=True
+    )
+    to_unit_id: Mapped[PyUUID] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("memory_units.id", ondelete="CASCADE"), primary_key=True
+    )
+    link_type: Mapped[str] = mapped_column(Text, primary_key=True)
+    entity_id: Mapped[Optional[PyUUID]] = mapped_column(
+        UUID(as_uuid=True), ForeignKey("entities.id", ondelete="CASCADE"), primary_key=True
+    )
+    weight: Mapped[float] = mapped_column(Float, nullable=False, server_default="1.0")
+    created_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP(timezone=True), server_default=func.now()
+    )
+    # Relationships
+    from_unit = relationship("MemoryUnit", foreign_keys=[from_unit_id], back_populates="outgoing_links")
+    to_unit = relationship("MemoryUnit", foreign_keys=[to_unit_id], back_populates="incoming_links")
+    entity = relationship("Entity", back_populates="memory_links")
+    __table_args__ = (
+        CheckConstraint(
+            "link_type IN ('temporal', 'semantic', 'entity', 'causes', 'caused_by', 'enables', 'prevents')",
+            name="memory_links_link_type_check"
+        ),
+        CheckConstraint("weight >= 0.0 AND weight <= 1.0", name="memory_links_weight_check"),
+        Index("idx_memory_links_from", "from_unit_id"),
+        Index("idx_memory_links_to", "to_unit_id"),
+        Index("idx_memory_links_type", "link_type"),
+        Index("idx_memory_links_entity", "entity_id", postgresql_where=sql_text("entity_id IS NOT NULL")),
+        Index(
+            "idx_memory_links_from_weight",
+            "from_unit_id",
+            "weight",
+            postgresql_where=sql_text("weight >= 0.1"),
+            postgresql_ops={"weight": "DESC"}
+        ),
+    )
+class Bank(Base):
+    """Memory bank profiles with personality traits and background."""
+    __tablename__ = "banks"
+    bank_id: Mapped[str] = mapped_column(Text, primary_key=True)
+    personality: Mapped[dict] = mapped_column(
+        JSONB,
+        nullable=False,
+        server_default=sql_text(
+            '\'{"openness": 0.5, "conscientiousness": 0.5, "extraversion": 0.5, '
+            '"agreeableness": 0.5, "neuroticism": 0.5, "bias_strength": 0.5}\'::jsonb'
+        )
+    )
+    background: Mapped[str] = mapped_column(Text, nullable=False, server_default="")
+    created_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP(timezone=True), server_default=func.now()
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        TIMESTAMP(timezone=True), server_default=func.now()
+    )
+    __table_args__ = (
+        Index("idx_banks_bank_id", "bank_id"),
+    )