PyPI - local-deep-research - Versions diffs - 0.6.0__py3-none-any.whl → 0.6.4__py3-none-any.whl - Mend

local-deep-research 0.6.0py3-none-any.whl → 0.6.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

local_deep_research/web/database/migrations.py CHANGED Viewed

@@ -1,15 +1,18 @@
+import sqlite3
+import uuid
+from pathlib import Path
 from loguru import logger
-from sqlalchemy import inspect
+from sqlalchemy import inspect, text, create_engine
 from ..services.settings_manager import SettingsManager
 from .models import (
     Base,
-    Journal,
-    Setting,
-    ResearchLog,
-    Research,
-    ResearchHistory,
+    ResearchStrategy,
+    RateLimitAttempt,
+    RateLimitEstimate,
 )
+from ...utilities.db_utils import DB_PATH, get_db_session
 def import_default_settings_file(db_session):
@@ -33,40 +36,708 @@ def import_default_settings_file(db_session):
         # Update the saved version.
         settings_mgr.update_db_version()
+    except Exception:
+        logger.exception("Error importing settings from files")
+def remove_research_log_table(engine):
+    """
+    Remove the redundant research_log table if it exists
+    Args:
+        engine: SQLAlchemy engine
+    Returns:
+        bool: True if operation was successful, False otherwise
+    """
+    try:
+        inspector = inspect(engine)
+        # Check if table exists
+        if inspector.has_table("research_log"):
+            # For SQLite, we need to use raw SQL for DROP TABLE
+            with engine.connect() as conn:
+                conn.execute("DROP TABLE research_log")
+                conn.commit()
+            logger.info("Successfully removed redundant 'research_log' table")
+            return True
+        else:
+            logger.info("Table 'research_log' does not exist, no action needed")
+            return True
+    except Exception:
+        logger.exception("Error removing research_log table")
+        return False
+def create_research_strategy_table(engine):
+    """
+    Create the research_strategies table if it doesn't exist
+    Args:
+        engine: SQLAlchemy engine
+    Returns:
+        bool: True if operation was successful, False otherwise
+    """
+    try:
+        inspector = inspect(engine)
+        # Check if table exists
+        if not inspector.has_table("research_strategies"):
+            # Create the table using ORM
+            Base.metadata.create_all(
+                engine, tables=[ResearchStrategy.__table__]
+            )
+            logger.info("Successfully created 'research_strategies' table")
+            return True
+        else:
+            logger.info(
+                "Table 'research_strategies' already exists, no action needed"
+            )
+            return True
+    except Exception:
+        logger.exception("Error creating research_strategies table")
+        return False
+def create_rate_limiting_tables(engine):
+    """
+    Create rate limiting tables if they don't exist
+    Args:
+        engine: SQLAlchemy engine
+    Returns:
+        bool: True if operation was successful, False otherwise
+    """
+    try:
+        inspector = inspect(engine)
+        tables_to_create = []
+        # Check if rate_limit_attempts table exists
+        if not inspector.has_table("rate_limit_attempts"):
+            tables_to_create.append(RateLimitAttempt.__table__)
+            logger.info("Need to create 'rate_limit_attempts' table")
+        # Check if rate_limit_estimates table exists
+        if not inspector.has_table("rate_limit_estimates"):
+            tables_to_create.append(RateLimitEstimate.__table__)
+            logger.info("Need to create 'rate_limit_estimates' table")
+        if tables_to_create:
+            # Create the tables using ORM
+            Base.metadata.create_all(engine, tables=tables_to_create)
+            logger.info(
+                f"Successfully created {len(tables_to_create)} rate limiting tables"
+            )
+        else:
+            logger.info("Rate limiting tables already exist, no action needed")
+        return True
+    except Exception:
+        logger.exception("Error creating rate limiting tables")
+        return False
+def add_research_id_to_benchmark_results(engine):
+    """
+    Add research_id column to benchmark_results table if it doesn't exist.
+    """
+    try:
+        import sqlite3
+        # Get database path from engine
+        db_path = engine.url.database
+        logger.info("Checking if benchmark_results needs research_id column...")
+        conn = sqlite3.connect(db_path)
+        try:
+            cursor = conn.cursor()
+            # Check if table exists
+            cursor.execute(
+                "SELECT name FROM sqlite_master WHERE type='table' AND name='benchmark_results'"
+            )
+            if not cursor.fetchone():
+                logger.info("benchmark_results table does not exist, skipping")
+                return True
+            # Check if research_id column already exists
+            cursor.execute("PRAGMA table_info(benchmark_results)")
+            columns = cursor.fetchall()
+            has_research_id = any(col[1] == "research_id" for col in columns)
+            if has_research_id:
+                logger.info("benchmark_results already has research_id column")
+                return True
+            # Add research_id column
+            logger.info("Adding research_id column to benchmark_results table")
+            cursor.execute(
+                "ALTER TABLE benchmark_results ADD COLUMN research_id TEXT"
+            )
+            conn.commit()
+            logger.info(
+                "Successfully added research_id column to benchmark_results"
+            )
+            return True
+        finally:
+            conn.close()
+    except Exception:
+        logger.exception("Error adding research_id column to benchmark_results")
+        return False
+def add_uuid_id_column_to_research_history(engine):
+    """
+    Adds a new `uuid_id` string column to the `research_history` table if it
+    does not exist already.
+    """
+    try:
+        import sqlite3
+        # Get database path from engine
+        db_path = engine.url.database
+        logger.info("Checking if research_history needs uuid_id column...")
+        conn = sqlite3.connect(db_path)
+        try:
+            cursor = conn.cursor()
+            # Check if table exists
+            cursor.execute(
+                "SELECT name FROM sqlite_master WHERE type='table' AND name='research_history'"
+            )
+            if not cursor.fetchone():
+                logger.info("research_history table does not exist, skipping")
+                return True
+            # Check if uuid_id column already exists
+            cursor.execute("PRAGMA table_info(research_history)")
+            columns = cursor.fetchall()
+            has_uuid_id = any(col[1] == "uuid_id" for col in columns)
+            if has_uuid_id:
+                logger.info("research_history already has uuid_id column")
+                return True
+            # Add uuid_id column
+            logger.info("Adding uuid_id column to research_history table")
+            cursor.execute(
+                "ALTER TABLE research_history ADD COLUMN uuid_id CHAR(36)"
+            )
+            conn.commit()
+            logger.info("Successfully added uuid_id column to research_history")
+            return True
+        finally:
+            conn.close()
+    except Exception:
+        logger.exception("Error adding uuid_id column to research_history")
+        return False
+def convert_research_id_to_string_if_needed(engine):
+    """
+    Convert research_id columns from Integer to String in all tables.
+    Preserves existing data by converting integer IDs to string format.
+    Only runs if integer research_id columns are detected.
+    """
+    try:
+        import sqlite3
+        # Get database path from engine
+        db_path = engine.url.database
+        logger.info(
+            "Checking if research_id columns need conversion to string..."
+        )
+        conn = sqlite3.connect(db_path)
+        conn.execute("PRAGMA foreign_keys = OFF")
+        try:
+            cursor = conn.cursor()
+            # List of tables that might have research_id columns
+            tables_to_check = [
+                "token_usage",
+                "model_usage",
+                "search_calls",
+                "benchmark_results",
+            ]
+            tables_needing_conversion = []
+            # Check which tables need conversion
+            for table_name in tables_to_check:
+                # Check if table exists
+                cursor.execute(
+                    "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
+                    (table_name,),
+                )
+                if not cursor.fetchone():
+                    continue
+                # Check if research_id column exists and is integer type
+                cursor.execute(f"PRAGMA table_info({table_name})")
+                columns = cursor.fetchall()
+                for col in columns:
+                    col_name, col_type = col[1], col[2]
+                    if col_name == "research_id" and (
+                        "INTEGER" in col_type.upper()
+                        or "INT" in col_type.upper()
+                    ):
+                        tables_needing_conversion.append(table_name)
+                        break
+            if not tables_needing_conversion:
+                logger.info(
+                    "All research_id columns are already string type, no conversion needed"
+                )
+                return True
+            logger.info(
+                f"Converting research_id to string in tables: {tables_needing_conversion}"
+            )
+            # Convert each table
+            for table_name in tables_needing_conversion:
+                logger.info(f"Converting {table_name} table...")
+                # Get the current table schema
+                cursor.execute(
+                    f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table_name}'"
+                )
+                create_sql = cursor.fetchone()[0]
+                # Create new table name
+                new_table_name = f"{table_name}_new"
+                # Modify the CREATE TABLE statement to change research_id to TEXT
+                new_create_sql = create_sql.replace(
+                    f"CREATE TABLE {table_name}",
+                    f"CREATE TABLE {new_table_name}",
+                )
+                new_create_sql = new_create_sql.replace(
+                    "research_id INTEGER", "research_id TEXT"
+                )
+                new_create_sql = new_create_sql.replace(
+                    "research_id INT", "research_id TEXT"
+                )
+                # Create the new table
+                cursor.execute(new_create_sql)
+                # Copy data from old table to new table, converting research_id to string
+                cursor.execute(f"SELECT * FROM {table_name}")
+                old_rows = cursor.fetchall()
+                if old_rows:
+                    # Get column names
+                    cursor.execute(f"PRAGMA table_info({table_name})")
+                    columns = cursor.fetchall()
+                    column_names = [col[1] for col in columns]
+                    research_id_index = (
+                        column_names.index("research_id")
+                        if "research_id" in column_names
+                        else -1
+                    )
+                    # Prepare insert statement
+                    placeholders = ",".join(["?" for _ in column_names])
+                    insert_sql = f"INSERT INTO {new_table_name} ({','.join(column_names)}) VALUES ({placeholders})"
+                    # Convert rows and insert
+                    converted_rows = []
+                    for row in old_rows:
+                        row_list = list(row)
+                        # Convert research_id to string if it's not None
+                        if (
+                            research_id_index >= 0
+                            and row_list[research_id_index] is not None
+                        ):
+                            row_list[research_id_index] = str(
+                                row_list[research_id_index]
+                            )
+                        converted_rows.append(tuple(row_list))
+                    cursor.executemany(insert_sql, converted_rows)
+                    logger.info(
+                        f"Converted {len(converted_rows)} rows in {table_name}"
+                    )
+                # Drop old table and rename new table
+                cursor.execute(f"DROP TABLE {table_name}")
+                cursor.execute(
+                    f"ALTER TABLE {new_table_name} RENAME TO {table_name}"
+                )
+                logger.info(
+                    f"Successfully converted {table_name} research_id to string"
+                )
+            # Commit all changes
+            conn.commit()
+            logger.info(
+                "All research_id columns converted to string successfully!"
+            )
+            return True
+        finally:
+            conn.execute("PRAGMA foreign_keys = ON")
+            conn.close()
+    except Exception:
+        logger.exception("Error converting research_id columns to string")
+        return False
+def create_provider_models_table(engine):
+    """Create provider_models table for caching available models"""
+    with engine.connect() as conn:
+        result = conn.execute(
+            text(
+                "SELECT name FROM sqlite_master WHERE type='table' AND name='provider_models'"
+            )
+        )
+        if result.fetchone():
+            logger.info(
+                "Table 'provider_models' already exists, no action needed"
+            )
+            return
+        logger.info("Creating 'provider_models' table...")
+        # Create the table
+        conn.execute(
+            text(
+                """
+            CREATE TABLE provider_models (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                provider VARCHAR(50) NOT NULL,
+                model_key VARCHAR(255) NOT NULL,
+                model_label VARCHAR(255) NOT NULL,
+                model_metadata JSON,
+                last_updated DATETIME DEFAULT CURRENT_TIMESTAMP NOT NULL,
+                UNIQUE(provider, model_key)
+            )
+            """
+            )
+        )
+        # Create index on provider
+        conn.execute(
+            text(
+                "CREATE INDEX ix_provider_models_provider ON provider_models (provider)"
+            )
+        )
+        conn.commit()
+        logger.info("Table 'provider_models' created successfully")
+def migrate_to_uuid():
+    """
+    Migrate all research_id fields from integers to UUIDs.
+    Strategy:
+    1. Add new UUID columns alongside existing integer columns
+    2. Generate UUIDs for existing data (or keep as string versions of integers)
+    3. Update foreign key relationships
+    4. Drop old integer columns and rename UUID columns
+    """
+    db_path = Path(DB_PATH)
+    if not db_path.exists():
+        logger.info("Database doesn't exist yet, migration not needed")
+        return
+    logger.info(f"Starting UUID migration on {db_path}")
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        "PRAGMA foreign_keys = OFF"
+    )  # Disable FK constraints during migration
+    try:
+        cursor = conn.cursor()
+        # 1. Migrate research_history table (main research IDs)
+        logger.info("Migrating research_history table...")
+        # Check if the table exists and has the old structure
+        cursor.execute("PRAGMA table_info(research_history)")
+        columns = cursor.fetchall()
+        has_uuid_id = any(col[1] == "uuid_id" for col in columns)
+        if not has_uuid_id:
+            # Add UUID column
+            cursor.execute(
+                "ALTER TABLE research_history ADD COLUMN uuid_id TEXT"
+            )
+            # Generate UUIDs for existing records (convert integer ID to UUID format)
+            cursor.execute("SELECT id FROM research_history")
+            existing_ids = cursor.fetchall()
+            for (old_id,) in existing_ids:
+                # Generate a deterministic UUID based on the old ID
+                new_uuid = str(
+                    uuid.uuid5(uuid.NAMESPACE_OID, f"research_{old_id}")
+                )
+                cursor.execute(
+                    "UPDATE research_history SET uuid_id = ? WHERE id = ?",
+                    (new_uuid, old_id),
+                )
+            logger.info(
+                f"Generated UUIDs for {len(existing_ids)} research records"
+            )
+        # 2. Migrate metrics tables
+        logger.info("Migrating metrics tables...")
+        # Token usage table
+        cursor.execute("PRAGMA table_info(token_usage)")
+        columns = cursor.fetchall()
+        has_uuid_research_id = any(
+            col[1] == "uuid_research_id" for col in columns
+        )
+        if not has_uuid_research_id:
+            cursor.execute(
+                "ALTER TABLE token_usage ADD COLUMN uuid_research_id TEXT"
+            )
+            # Convert existing research_ids to UUIDs (deterministic conversion)
+            cursor.execute(
+                "SELECT DISTINCT research_id FROM token_usage WHERE research_id IS NOT NULL"
+            )
+            research_ids = cursor.fetchall()
+            for (research_id,) in research_ids:
+                if research_id:
+                    new_uuid = str(
+                        uuid.uuid5(
+                            uuid.NAMESPACE_OID, f"research_{research_id}"
+                        )
+                    )
+                    cursor.execute(
+                        "UPDATE token_usage SET uuid_research_id = ? WHERE research_id = ?",
+                        (new_uuid, research_id),
+                    )
+            logger.info(
+                f"Migrated {len(research_ids)} research IDs in token_usage"
+            )
+        # Model usage table
+        cursor.execute("PRAGMA table_info(model_usage)")
+        columns = cursor.fetchall()
+        has_uuid_research_id = any(
+            col[1] == "uuid_research_id" for col in columns
+        )
+        if not has_uuid_research_id:
+            cursor.execute(
+                "ALTER TABLE model_usage ADD COLUMN uuid_research_id TEXT"
+            )
+            cursor.execute(
+                "SELECT DISTINCT research_id FROM model_usage WHERE research_id IS NOT NULL"
+            )
+            research_ids = cursor.fetchall()
+            for (research_id,) in research_ids:
+                if research_id:
+                    new_uuid = str(
+                        uuid.uuid5(
+                            uuid.NAMESPACE_OID, f"research_{research_id}"
+                        )
+                    )
+                    cursor.execute(
+                        "UPDATE model_usage SET uuid_research_id = ? WHERE research_id = ?",
+                        (new_uuid, research_id),
+                    )
+            logger.info(
+                f"Migrated {len(research_ids)} research IDs in model_usage"
+            )
+        # Search calls table
+        cursor.execute("PRAGMA table_info(search_calls)")
+        columns = cursor.fetchall()
+        has_uuid_research_id = any(
+            col[1] == "uuid_research_id" for col in columns
+        )
+        if not has_uuid_research_id:
+            cursor.execute(
+                "ALTER TABLE search_calls ADD COLUMN uuid_research_id TEXT"
+            )
+            cursor.execute(
+                "SELECT DISTINCT research_id FROM search_calls WHERE research_id IS NOT NULL"
+            )
+            research_ids = cursor.fetchall()
+            for (research_id,) in research_ids:
+                if research_id:
+                    new_uuid = str(
+                        uuid.uuid5(
+                            uuid.NAMESPACE_OID, f"research_{research_id}"
+                        )
+                    )
+                    cursor.execute(
+                        "UPDATE search_calls SET uuid_research_id = ? WHERE research_id = ?",
+                        (new_uuid, research_id),
+                    )
+            logger.info(
+                f"Migrated {len(research_ids)} research IDs in search_calls"
+            )
+        # 3. Migrate benchmark tables
+        logger.info("Migrating benchmark tables...")
+        # Check if benchmark_results table exists
+        cursor.execute(
+            "SELECT name FROM sqlite_master WHERE type='table' AND name='benchmark_results'"
+        )
+        if cursor.fetchone():
+            cursor.execute("PRAGMA table_info(benchmark_results)")
+            columns = cursor.fetchall()
+            has_uuid_research_id = any(
+                col[1] == "uuid_research_id" for col in columns
+            )
+            if not has_uuid_research_id:
+                cursor.execute(
+                    "ALTER TABLE benchmark_results ADD COLUMN uuid_research_id TEXT"
+                )
+                cursor.execute(
+                    "SELECT DISTINCT research_id FROM benchmark_results WHERE research_id IS NOT NULL"
+                )
+                research_ids = cursor.fetchall()
+                for (research_id,) in research_ids:
+                    if research_id:
+                        new_uuid = str(
+                            uuid.uuid5(
+                                uuid.NAMESPACE_OID, f"research_{research_id}"
+                            )
+                        )
+                        cursor.execute(
+                            "UPDATE benchmark_results SET uuid_research_id = ? WHERE research_id = ?",
+                            (new_uuid, research_id),
+                        )
+                logger.info(
+                    f"Migrated {len(research_ids)} research IDs in benchmark_results"
+                )
+        # Commit all changes
+        conn.commit()
+        logger.info("UUID migration completed successfully!")
+        # Note: We're keeping both old and new columns for now
+        # The application will use the new UUID columns
+        # Old columns can be dropped in a future migration once everything is stable
     except Exception as e:
-        logger.error("Error importing settings from files: %s", e)
+        logger.error(f"Error during UUID migration: {e}")
+        conn.rollback()
+        raise
+    finally:
+        conn.execute("PRAGMA foreign_keys = ON")  # Re-enable FK constraints
+        conn.close()
+def cleanup_old_columns():
+    """
+    Cleanup migration - drops old integer columns after UUID migration is stable.
+    Run this only after confirming the UUID migration is working correctly.
+    """
+    logger.warning(
+        "This will permanently remove old integer research_id columns!"
+    )
+    logger.warning("Make sure to backup your database before running this!")
+    conn = sqlite3.connect(DB_PATH)
+    conn.execute("PRAGMA foreign_keys = OFF")
+    try:
+        # For SQLite, we need to recreate tables to drop columns
+        # This is complex, so we'll leave old columns for now
+        # They can be cleaned up manually if needed
+        logger.info("Cleanup deferred - old columns remain for safety")
+    finally:
+        conn.execute("PRAGMA foreign_keys = ON")
+        conn.close()
-def run_migrations(engine, db_session=None):
+def ensure_database_initialized(engine=None, db_session=None):
     """
-    Run any necessary database migrations
+    Initialize the database and run any necessary database migrations
     Args:
         engine: SQLAlchemy engine
         db_session: Optional SQLAlchemy session
     """
+    logger.info("Initializing database at {}.", DB_PATH)
+    if engine is None:
+        # Create SQLAlchemy engine
+        engine = create_engine(f"sqlite:///{DB_PATH}")
+    if db_session is None:
+        db_session = get_db_session()
     # Create all tables if they don't exist
-    inspector = inspect(engine)
-    if not inspector.has_table("settings"):
-        logger.info("Creating settings table")
-        Base.metadata.create_all(engine, tables=[Setting.__table__])
+    Base.metadata.create_all(engine)
+    # Import existing settings from files
+    import_default_settings_file(db_session)
-    if not inspector.has_table(Journal.__tablename__):
-        logger.info("Creating journals table.")
-        Base.metadata.create_all(engine, tables=[Journal.__table__])
+    try:
+        # 1. Remove the redundant research_log table
+        remove_research_log_table(engine)
-    if not inspector.has_table(ResearchLog.__tablename__):
-        logger.info("Creating research logs table.")
-        Base.metadata.create_all(engine, tables=[ResearchLog.__table__])
+        # 2. Create research_strategies table
+        create_research_strategy_table(engine)
-    if not inspector.has_table(Research.__tablename__):
-        logger.info("Creating research table.")
-        Base.metadata.create_all(engine, tables=[Research.__table__])
+        # 4. Create rate limiting tables
+        create_rate_limiting_tables(engine)
-    if not inspector.has_table(ResearchHistory.__tablename__):
-        logger.info("Creating research table.")
-        Base.metadata.create_all(engine, tables=[ResearchHistory.__table__])
+        # 5. Add research_id column to benchmark_results if missing
+        add_research_id_to_benchmark_results(engine)
-    # Import existing settings from files
-    if db_session:
-        import_default_settings_file(db_session)
+        # 6. Convert research_id columns from integer to string
+        convert_research_id_to_string_if_needed(engine)
+        # 7. Add uuid_id column to research_history if missing
+        add_uuid_id_column_to_research_history(engine)
+        # 8. Create provider_models table for caching
+        create_provider_models_table(engine)
+        logger.info("Schema upgrades completed successfully")
+        return True
+    except Exception:
+        logger.exception("Error during schema upgrades")
+        return False
+    finally:
+        db_session.close()

local-deep-research 0.6.0__py3-none-any.whl → 0.6.4__py3-none-any.whl

local-deep-research 0.6.0py3-none-any.whl → 0.6.4py3-none-any.whl