PyPI - local-deep-research - Versions diffs - 0.5.9__py3-none-any.whl → 0.6.1__py3-none-any.whl - Mend

local-deep-research 0.5.9py3-none-any.whl → 0.6.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

local_deep_research/benchmarks/models/__init__.py ADDED Viewed

@@ -0,0 +1,19 @@
+"""Benchmark database models for ORM."""
+from .benchmark_models import (
+    BenchmarkConfig,
+    BenchmarkProgress,
+    BenchmarkResult,
+    BenchmarkRun,
+    BenchmarkStatus,
+    DatasetType,
+)
+__all__ = [
+    "BenchmarkRun",
+    "BenchmarkResult",
+    "BenchmarkConfig",
+    "BenchmarkProgress",
+    "BenchmarkStatus",
+    "DatasetType",
+]

local_deep_research/benchmarks/models/benchmark_models.py ADDED Viewed

@@ -0,0 +1,283 @@
+"""Database models for benchmark system."""
+import enum
+from sqlalchemy import (
+    JSON,
+    Boolean,
+    Column,
+    DateTime,
+    Enum,
+    Float,
+    ForeignKey,
+    Integer,
+    String,
+    Text,
+    UniqueConstraint,
+    Index,
+)
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship
+from sqlalchemy.sql import func
+# Use the same base as the main app
+try:
+    from ...web.database.models import Base
+except ImportError:
+    # Fallback for different import contexts
+    from sqlalchemy.ext.declarative import declarative_base
+    Base = declarative_base()
+class BenchmarkStatus(enum.Enum):
+    """Status of a benchmark run."""
+    PENDING = "pending"
+    IN_PROGRESS = "in_progress"
+    COMPLETED = "completed"
+    FAILED = "failed"
+    CANCELLED = "cancelled"
+    PAUSED = "paused"
+class DatasetType(enum.Enum):
+    """Supported dataset types."""
+    SIMPLEQA = "simpleqa"
+    BROWSECOMP = "browsecomp"
+    CUSTOM = "custom"
+class BenchmarkRun(Base):
+    """Main benchmark run metadata."""
+    __tablename__ = "benchmark_runs"
+    id = Column(Integer, primary_key=True, index=True)
+    # Run identification
+    run_name = Column(String(255), nullable=True)  # User-friendly name
+    config_hash = Column(
+        String(16), nullable=False, index=True
+    )  # For compatibility matching
+    query_hash_list = Column(
+        JSON, nullable=False
+    )  # List of query hashes to avoid duplication
+    # Configuration
+    search_config = Column(
+        JSON, nullable=False
+    )  # Complete search configuration
+    evaluation_config = Column(JSON, nullable=False)  # Evaluation settings
+    datasets_config = Column(
+        JSON, nullable=False
+    )  # Dataset selection and quantities
+    # Status and timing
+    status = Column(
+        Enum(BenchmarkStatus), default=BenchmarkStatus.PENDING, nullable=False
+    )
+    created_at = Column(DateTime, server_default=func.now(), nullable=False)
+    updated_at = Column(
+        DateTime, server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+    start_time = Column(DateTime, nullable=True)
+    end_time = Column(DateTime, nullable=True)
+    # Progress tracking
+    total_examples = Column(Integer, default=0, nullable=False)
+    completed_examples = Column(Integer, default=0, nullable=False)
+    failed_examples = Column(Integer, default=0, nullable=False)
+    # Results summary
+    overall_accuracy = Column(Float, nullable=True)
+    processing_rate = Column(Float, nullable=True)  # Examples per minute
+    # Error handling
+    error_message = Column(Text, nullable=True)
+    # Relationships
+    results = relationship(
+        "BenchmarkResult",
+        back_populates="benchmark_run",
+        cascade="all, delete-orphan",
+        lazy="dynamic",
+    )
+    progress_updates = relationship(
+        "BenchmarkProgress",
+        back_populates="benchmark_run",
+        cascade="all, delete-orphan",
+        lazy="dynamic",
+    )
+    # Indexes for performance and extend existing
+    __table_args__ = (
+        Index("idx_benchmark_runs_config_hash", "config_hash"),
+        Index("idx_benchmark_runs_status_created", "status", "created_at"),
+        {"extend_existing": True},
+    )
+class BenchmarkResult(Base):
+    """Individual benchmark result for a single question."""
+    __tablename__ = "benchmark_results"
+    id = Column(Integer, primary_key=True, index=True)
+    # Foreign key
+    benchmark_run_id = Column(
+        Integer,
+        ForeignKey("benchmark_runs.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    # Question identification
+    example_id = Column(String(255), nullable=False)  # Original dataset ID
+    query_hash = Column(
+        String(32), nullable=False, index=True
+    )  # For deduplication
+    dataset_type = Column(Enum(DatasetType), nullable=False)
+    research_id = Column(
+        String(36), nullable=True, index=True
+    )  # UUID string or converted integer
+    # Question and answer
+    question = Column(Text, nullable=False)
+    correct_answer = Column(Text, nullable=False)
+    # Research results
+    response = Column(Text, nullable=True)
+    extracted_answer = Column(Text, nullable=True)
+    confidence = Column(String(10), nullable=True)
+    processing_time = Column(Float, nullable=True)
+    sources = Column(JSON, nullable=True)
+    # Evaluation results
+    is_correct = Column(Boolean, nullable=True)
+    graded_confidence = Column(String(10), nullable=True)
+    grader_response = Column(Text, nullable=True)
+    # Timestamps
+    created_at = Column(DateTime, server_default=func.now(), nullable=False)
+    completed_at = Column(DateTime, nullable=True)
+    # Error handling
+    research_error = Column(Text, nullable=True)
+    evaluation_error = Column(Text, nullable=True)
+    # Additional metadata
+    task_index = Column(Integer, nullable=True)  # Order in processing
+    result_metadata = Column(JSON, nullable=True)  # Additional data
+    # Relationships
+    benchmark_run = relationship("BenchmarkRun", back_populates="results")
+    # Indexes for performance
+    __table_args__ = (
+        Index(
+            "idx_benchmark_results_run_dataset",
+            "benchmark_run_id",
+            "dataset_type",
+        ),
+        Index("idx_benchmark_results_query_hash", "query_hash"),
+        Index("idx_benchmark_results_completed", "completed_at"),
+        UniqueConstraint(
+            "benchmark_run_id", "query_hash", name="uix_run_query"
+        ),
+        {"extend_existing": True},
+    )
+class BenchmarkConfig(Base):
+    """Saved benchmark configurations for reuse."""
+    __tablename__ = "benchmark_configs"
+    id = Column(Integer, primary_key=True, index=True)
+    # Configuration details
+    name = Column(String(255), nullable=False)
+    description = Column(Text, nullable=True)
+    config_hash = Column(String(16), nullable=False, index=True)
+    # Configuration data
+    search_config = Column(JSON, nullable=False)
+    evaluation_config = Column(JSON, nullable=False)
+    datasets_config = Column(JSON, nullable=False)
+    # Metadata
+    created_at = Column(DateTime, server_default=func.now(), nullable=False)
+    updated_at = Column(
+        DateTime, server_default=func.now(), onupdate=func.now(), nullable=False
+    )
+    is_default = Column(Boolean, default=False, nullable=False)
+    is_public = Column(Boolean, default=True, nullable=False)
+    # Usage tracking
+    usage_count = Column(Integer, default=0, nullable=False)
+    last_used = Column(DateTime, nullable=True)
+    # Performance data (if available)
+    best_accuracy = Column(Float, nullable=True)
+    avg_processing_rate = Column(Float, nullable=True)
+    # Indexes
+    __table_args__ = (
+        Index("idx_benchmark_configs_name", "name"),
+        Index("idx_benchmark_configs_hash", "config_hash"),
+        Index("idx_benchmark_configs_default", "is_default"),
+        {"extend_existing": True},
+    )
+class BenchmarkProgress(Base):
+    """Real-time progress tracking for benchmark runs."""
+    __tablename__ = "benchmark_progress"
+    id = Column(Integer, primary_key=True, index=True)
+    # Foreign key
+    benchmark_run_id = Column(
+        Integer,
+        ForeignKey("benchmark_runs.id", ondelete="CASCADE"),
+        nullable=False,
+        index=True,
+    )
+    # Progress data
+    timestamp = Column(DateTime, server_default=func.now(), nullable=False)
+    completed_examples = Column(Integer, nullable=False)
+    total_examples = Column(Integer, nullable=False)
+    # Accuracy tracking
+    overall_accuracy = Column(Float, nullable=True)
+    dataset_accuracies = Column(JSON, nullable=True)  # Per-dataset accuracy
+    # Performance metrics
+    processing_rate = Column(Float, nullable=True)  # Examples per minute
+    estimated_completion = Column(DateTime, nullable=True)
+    # Current status
+    current_dataset = Column(Enum(DatasetType), nullable=True)
+    current_example_id = Column(String(255), nullable=True)
+    # Additional metrics
+    memory_usage = Column(Float, nullable=True)  # MB
+    cpu_usage = Column(Float, nullable=True)  # Percentage
+    # Relationships
+    benchmark_run = relationship(
+        "BenchmarkRun", back_populates="progress_updates"
+    )
+    # Indexes for real-time queries
+    __table_args__ = (
+        Index(
+            "idx_benchmark_progress_run_time", "benchmark_run_id", "timestamp"
+        ),
+        {"extend_existing": True},
+    )

local_deep_research/benchmarks/ui/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Benchmark UI components package."""

local_deep_research/benchmarks/web_api/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Benchmark web API package."""
+from .benchmark_service import BenchmarkService
+from .benchmark_routes import benchmark_bp
+__all__ = ["BenchmarkService", "benchmark_bp"]

local-deep-research 0.5.9__py3-none-any.whl → 0.6.1__py3-none-any.whl

local-deep-research 0.5.9py3-none-any.whl → 0.6.1py3-none-any.whl