PyPI - local-deep-research - Versions diffs - 0.5.7__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

local-deep-research 0.5.7py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

local_deep_research/web/database/benchmark_schema.py ADDED Viewed

@@ -0,0 +1,230 @@
+"""Simple benchmark table definitions for schema creation."""
+import enum
+from sqlalchemy import (
+    JSON,
+    Boolean,
+    Column,
+    DateTime,
+    Enum,
+    Float,
+    ForeignKey,
+    Integer,
+    String,
+    Text,
+    UniqueConstraint,
+    Index,
+)
+from sqlalchemy.sql import func
+class BenchmarkStatus(enum.Enum):
+    """Status of a benchmark run."""
+    PENDING = "pending"
+    IN_PROGRESS = "in_progress"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    CANCELLED = "cancelled"
+    PAUSED = "paused"
+class DatasetType(enum.Enum):
+    """Supported dataset types."""
+    SIMPLEQA = "simpleqa"
+    BROWSECOMP = "browsecomp"
+    CUSTOM = "custom"
+# Simple table definitions for creation
+benchmark_runs_table = {
+    "table_name": "benchmark_runs",
+    "columns": [
+        Column("id", Integer, primary_key=True, index=True),
+        Column("run_name", String(255), nullable=True),
+        Column("config_hash", String(16), nullable=False, index=True),
+        Column("query_hash_list", JSON, nullable=False),
+        Column("search_config", JSON, nullable=False),
+        Column("evaluation_config", JSON, nullable=False),
+        Column("datasets_config", JSON, nullable=False),
+        Column(
+            "status",
+            Enum(BenchmarkStatus),
+            default=BenchmarkStatus.PENDING,
+            nullable=False,
+        ),
+        Column(
+            "created_at", DateTime, server_default=func.now(), nullable=False
+        ),
+        Column(
+            "updated_at",
+            DateTime,
+            server_default=func.now(),
+            onupdate=func.now(),
+            nullable=False,
+        ),
+        Column("start_time", DateTime, nullable=True),
+        Column("end_time", DateTime, nullable=True),
+        Column("total_examples", Integer, default=0, nullable=False),
+        Column("completed_examples", Integer, default=0, nullable=False),
+        Column("failed_examples", Integer, default=0, nullable=False),
+        Column("overall_accuracy", Float, nullable=True),
+        Column("processing_rate", Float, nullable=True),
+        Column("error_message", Text, nullable=True),
+    ],
+    "indexes": [
+        Index("idx_benchmark_runs_config_hash", "config_hash"),
+        Index("idx_benchmark_runs_status_created", "status", "created_at"),
+    ],
+}
+benchmark_results_table = {
+    "table_name": "benchmark_results",
+    "columns": [
+        Column("id", Integer, primary_key=True, index=True),
+        Column(
+            "benchmark_run_id",
+            Integer,
+            ForeignKey("benchmark_runs.id", ondelete="CASCADE"),
+            nullable=False,
+            index=True,
+        ),
+        Column("example_id", String(255), nullable=False),
+        Column("query_hash", String(32), nullable=False, index=True),
+        Column("dataset_type", Enum(DatasetType), nullable=False),
+        Column("question", Text, nullable=False),
+        Column("correct_answer", Text, nullable=False),
+        Column("response", Text, nullable=True),
+        Column("extracted_answer", Text, nullable=True),
+        Column("confidence", String(10), nullable=True),
+        Column("processing_time", Float, nullable=True),
+        Column("sources", JSON, nullable=True),
+        Column("is_correct", Boolean, nullable=True),
+        Column("graded_confidence", String(10), nullable=True),
+        Column("grader_response", Text, nullable=True),
+        Column(
+            "created_at", DateTime, server_default=func.now(), nullable=False
+        ),
+        Column("completed_at", DateTime, nullable=True),
+        Column("research_error", Text, nullable=True),
+        Column("evaluation_error", Text, nullable=True),
+        Column("task_index", Integer, nullable=True),
+        Column("result_metadata", JSON, nullable=True),
+    ],
+    "indexes": [
+        Index(
+            "idx_benchmark_results_run_dataset",
+            "benchmark_run_id",
+            "dataset_type",
+        ),
+        Index("idx_benchmark_results_query_hash", "query_hash"),
+        Index("idx_benchmark_results_completed", "completed_at"),
+    ],
+    "constraints": [
+        UniqueConstraint(
+            "benchmark_run_id", "query_hash", name="uix_run_query"
+        ),
+    ],
+}
+benchmark_configs_table = {
+    "table_name": "benchmark_configs",
+    "columns": [
+        Column("id", Integer, primary_key=True, index=True),
+        Column("name", String(255), nullable=False),
+        Column("description", Text, nullable=True),
+        Column("config_hash", String(16), nullable=False, index=True),
+        Column("search_config", JSON, nullable=False),
+        Column("evaluation_config", JSON, nullable=False),
+        Column("datasets_config", JSON, nullable=False),
+        Column(
+            "created_at", DateTime, server_default=func.now(), nullable=False
+        ),
+        Column(
+            "updated_at",
+            DateTime,
+            server_default=func.now(),
+            onupdate=func.now(),
+            nullable=False,
+        ),
+        Column("is_default", Boolean, default=False, nullable=False),
+        Column("is_public", Boolean, default=True, nullable=False),
+        Column("usage_count", Integer, default=0, nullable=False),
+        Column("last_used", DateTime, nullable=True),
+        Column("best_accuracy", Float, nullable=True),
+        Column("avg_processing_rate", Float, nullable=True),
+    ],
+    "indexes": [
+        Index("idx_benchmark_configs_name", "name"),
+        Index("idx_benchmark_configs_hash", "config_hash"),
+        Index("idx_benchmark_configs_default", "is_default"),
+    ],
+}
+benchmark_progress_table = {
+    "table_name": "benchmark_progress",
+    "columns": [
+        Column("id", Integer, primary_key=True, index=True),
+        Column(
+            "benchmark_run_id",
+            Integer,
+            ForeignKey("benchmark_runs.id", ondelete="CASCADE"),
+            nullable=False,
+            index=True,
+        ),
+        Column(
+            "timestamp", DateTime, server_default=func.now(), nullable=False
+        ),
+        Column("completed_examples", Integer, nullable=False),
+        Column("total_examples", Integer, nullable=False),
+        Column("overall_accuracy", Float, nullable=True),
+        Column("dataset_accuracies", JSON, nullable=True),
+        Column("processing_rate", Float, nullable=True),
+        Column("estimated_completion", DateTime, nullable=True),
+        Column("current_dataset", Enum(DatasetType), nullable=True),
+        Column("current_example_id", String(255), nullable=True),
+        Column("memory_usage", Float, nullable=True),
+        Column("cpu_usage", Float, nullable=True),
+    ],
+    "indexes": [
+        Index(
+            "idx_benchmark_progress_run_time", "benchmark_run_id", "timestamp"
+        ),
+    ],
+}
+def create_benchmark_tables_simple(engine):
+    """Create benchmark tables using simple table definitions."""
+    from sqlalchemy import Table, MetaData
+    metadata = MetaData()
+    # Create tables
+    tables_to_create = [
+        benchmark_runs_table,
+        benchmark_results_table,
+        benchmark_configs_table,
+        benchmark_progress_table,
+    ]
+    for table_def in tables_to_create:
+        table = Table(
+            table_def["table_name"],
+            metadata,
+            *table_def["columns"],
+            extend_existing=True,
+        )
+        # Add indexes
+        for index in table_def.get("indexes", []):
+            index.table = table
+        # Add constraints
+        for constraint in table_def.get("constraints", []):
+            table.append_constraint(constraint)
+    # Create all tables
+    metadata.create_all(engine, checkfirst=True)

local_deep_research/web/database/convert_research_id_to_string.py ADDED Viewed

@@ -0,0 +1,161 @@
+"""
+Convert research_id columns from Integer to String.
+This migration converts existing integer research_id values to string format
+while preserving all existing data. New records will use UUID strings.
+"""
+import sqlite3
+from pathlib import Path
+from loguru import logger
+def get_database_path():
+    """Get the path to the SQLite database."""
+    data_dir = Path(__file__).parents[3] / "data"
+    return data_dir / "ldr.db"
+def convert_research_id_to_string():
+    """
+    Convert research_id columns from Integer to String in all tables.
+    Preserves existing data by converting integer IDs to string format.
+    """
+    db_path = get_database_path()
+    if not db_path.exists():
+        logger.info("Database doesn't exist yet, migration not needed")
+        return
+    logger.info(f"Converting research_id columns to string in {db_path}")
+    conn = sqlite3.connect(db_path)
+    conn.execute(
+        "PRAGMA foreign_keys = OFF"
+    )  # Disable FK constraints during migration
+    try:
+        cursor = conn.cursor()
+        # List of tables that have research_id columns
+        tables_to_migrate = [
+            "token_usage",
+            "model_usage",
+            "search_calls",
+            "benchmark_results",  # If it exists
+        ]
+        for table_name in tables_to_migrate:
+            logger.info(f"Converting {table_name} table...")
+            # Check if table exists
+            cursor.execute(
+                "SELECT name FROM sqlite_master WHERE type='table' AND name=?",
+                (table_name,),
+            )
+            if not cursor.fetchone():
+                logger.info(f"Table {table_name} does not exist, skipping")
+                continue
+            # Check if research_id column exists
+            cursor.execute(f"PRAGMA table_info({table_name})")
+            columns = cursor.fetchall()
+            has_research_id = any(col[1] == "research_id" for col in columns)
+            if not has_research_id:
+                logger.info(
+                    f"Table {table_name} does not have research_id column, skipping"
+                )
+                continue
+            # For SQLite, we need to recreate the table to change column type
+            # 1. Create new table with string research_id
+            # 2. Copy data with research_id converted to string
+            # 3. Drop old table and rename new table
+            # Get the current table schema
+            cursor.execute(
+                f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table_name}'"
+            )
+            create_sql = cursor.fetchone()[0]
+            # Create new table name
+            new_table_name = f"{table_name}_new"
+            # Modify the CREATE TABLE statement to change research_id to TEXT
+            new_create_sql = create_sql.replace(
+                f"CREATE TABLE {table_name}", f"CREATE TABLE {new_table_name}"
+            )
+            new_create_sql = new_create_sql.replace(
+                "research_id INTEGER", "research_id TEXT"
+            )
+            new_create_sql = new_create_sql.replace(
+                "research_id INT", "research_id TEXT"
+            )
+            # Create the new table
+            cursor.execute(new_create_sql)
+            # Copy data from old table to new table, converting research_id to string
+            cursor.execute(f"SELECT * FROM {table_name}")
+            old_rows = cursor.fetchall()
+            if old_rows:
+                # Get column names
+                cursor.execute(f"PRAGMA table_info({table_name})")
+                columns = cursor.fetchall()
+                column_names = [col[1] for col in columns]
+                research_id_index = (
+                    column_names.index("research_id")
+                    if "research_id" in column_names
+                    else -1
+                )
+                # Prepare insert statement
+                placeholders = ",".join(["?" for _ in column_names])
+                insert_sql = f"INSERT INTO {new_table_name} ({','.join(column_names)}) VALUES ({placeholders})"
+                # Convert rows and insert
+                converted_rows = []
+                for row in old_rows:
+                    row_list = list(row)
+                    # Convert research_id to string if it's not None
+                    if (
+                        research_id_index >= 0
+                        and row_list[research_id_index] is not None
+                    ):
+                        row_list[research_id_index] = str(
+                            row_list[research_id_index]
+                        )
+                    converted_rows.append(tuple(row_list))
+                cursor.executemany(insert_sql, converted_rows)
+                logger.info(
+                    f"Converted {len(converted_rows)} rows in {table_name}"
+                )
+            # Drop old table and rename new table
+            cursor.execute(f"DROP TABLE {table_name}")
+            cursor.execute(
+                f"ALTER TABLE {new_table_name} RENAME TO {table_name}"
+            )
+            logger.info(
+                f"Successfully converted {table_name} research_id to string"
+            )
+        # Commit all changes
+        conn.commit()
+        logger.info("All research_id columns converted to string successfully!")
+    except Exception as e:
+        logger.error(f"Error during research_id conversion: {e}")
+        conn.rollback()
+        raise
+    finally:
+        conn.execute("PRAGMA foreign_keys = ON")  # Re-enable FK constraints
+        conn.close()
+if __name__ == "__main__":
+    convert_research_id_to_string()

local_deep_research/web/database/models.py CHANGED Viewed

@@ -39,8 +39,10 @@ class ResearchHistory(Base):
     __tablename__ = "research_history"
-    # Unique identifier for each record.
+    # Legacy integer ID (kept for migration compatibility)
     id = Column(Integer, primary_key=True, autoincrement=True)
+    # New UUID identifier (primary field to use for new records)
+    uuid_id = Column(String(36), unique=True, index=True)
     # The search query.
     query = Column(Text, nullable=False)
     # The mode of research (e.g., 'quick_summary', 'detailed_report').
@@ -184,3 +186,55 @@ class Journal(Base):
     quality_model = Column(String(255), nullable=True, index=True)
     # Time at which the quality was last analyzed.
     quality_analysis_time = Column(Integer, nullable=False)
+class RateLimitAttempt(Base):
+    """Database model for tracking individual rate limit retry attempts."""
+    __tablename__ = "rate_limit_attempts"
+    id = Column(Integer, primary_key=True, index=True)
+    engine_type = Column(String(100), nullable=False, index=True)
+    timestamp = Column(Float, nullable=False, index=True)
+    wait_time = Column(Float, nullable=False)
+    retry_count = Column(Integer, nullable=False)
+    success = Column(Boolean, nullable=False)
+    error_type = Column(String(100), nullable=True)
+    created_at = Column(DateTime, server_default=func.now(), nullable=False)
+class RateLimitEstimate(Base):
+    """Database model for storing current rate limit estimates per engine."""
+    __tablename__ = "rate_limit_estimates"
+    id = Column(Integer, primary_key=True, index=True)
+    engine_type = Column(String(100), nullable=False, unique=True, index=True)
+    base_wait_seconds = Column(Float, nullable=False)
+    min_wait_seconds = Column(Float, nullable=False)
+    max_wait_seconds = Column(Float, nullable=False)
+    last_updated = Column(Float, nullable=False)
+    total_attempts = Column(Integer, default=0, nullable=False)
+    success_rate = Column(Float, default=0.0, nullable=False)
+    created_at = Column(DateTime, server_default=func.now(), nullable=False)
+    updated_at = Column(
+        DateTime, server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+class ProviderModel(Base):
+    """Database model for caching available models from all providers."""
+    __tablename__ = "provider_models"
+    id = Column(Integer, primary_key=True, index=True)
+    provider = Column(String(50), nullable=False, index=True)
+    model_key = Column(String(255), nullable=False)
+    model_label = Column(String(255), nullable=False)
+    model_metadata = Column(JSON, nullable=True)  # For additional model info
+    last_updated = Column(DateTime, server_default=func.now(), nullable=False)
+    # Composite unique constraint to prevent duplicates
+    __table_args__ = (
+        UniqueConstraint("provider", "model_key", name="uix_provider_model"),
+    )

local-deep-research 0.5.7__py3-none-any.whl → 0.6.0__py3-none-any.whl

local-deep-research 0.5.7py3-none-any.whl → 0.6.0py3-none-any.whl