PyPI - local-deep-research - Versions diffs - 0.3.12__py3-none-any.whl → 0.4.0__py3-none-any.whl - Mend

local-deep-research 0.3.12py3-none-any.whl → 0.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (92) hide show

local_deep_research/web/routes/benchmark_routes.py ADDED Viewed

@@ -0,0 +1,427 @@
+"""
+Web routes for benchmarking.
+This module provides Flask routes for the benchmark web interface.
+"""
+import logging
+import os
+import threading
+from flask import Blueprint, jsonify, render_template, request
+from ...api.benchmark_functions import (
+    compare_configurations,
+    evaluate_browsecomp,
+    evaluate_simpleqa,
+    get_available_benchmarks,
+)
+logger = logging.getLogger(__name__)
+# Create blueprint
+benchmark_bp = Blueprint("benchmark", __name__, url_prefix="/benchmark")
+# Store running jobs
+running_jobs = {}
+def run_benchmark_task(job_id, benchmark_type, params, callback=None):
+    """
+    Run a benchmark task in a separate thread.
+    Args:
+        job_id: Unique job ID
+        benchmark_type: Type of benchmark to run
+        params: Parameters for the benchmark
+        callback: Optional callback to run when job completes
+    """
+    try:
+        # Update job status to running
+        running_jobs[job_id]["status"] = "running"
+        # Run the benchmark based on type
+        if benchmark_type == "simpleqa":
+            result = evaluate_simpleqa(**params)
+        elif benchmark_type == "browsecomp":
+            result = evaluate_browsecomp(**params)
+        elif benchmark_type == "compare":
+            result = compare_configurations(**params)
+        else:
+            result = {"error": f"Unknown benchmark type: {benchmark_type}"}
+        # Update job with result
+        running_jobs[job_id]["status"] = "completed"
+        running_jobs[job_id]["result"] = result
+        # Call callback if provided
+        if callback:
+            callback(job_id, result)
+    except Exception as e:
+        logger.error(f"Error running benchmark job {job_id}: {str(e)}")
+        running_jobs[job_id]["status"] = "error"
+        running_jobs[job_id]["error"] = str(e)
+@benchmark_bp.route("/", methods=["GET"])
+def benchmark_dashboard():
+    """Render benchmark dashboard."""
+    return render_template(
+        "benchmark/dashboard.html", benchmarks=get_available_benchmarks()
+    )
+@benchmark_bp.route("/run", methods=["POST"])
+def run_benchmark_endpoint():
+    """Run benchmark with specified parameters."""
+    data = request.json
+    # Extract benchmark type
+    benchmark_type = data.get("benchmark_type")
+    if not benchmark_type:
+        return jsonify({"error": "benchmark_type is required"}), 400
+    # Generate job ID
+    import uuid
+    job_id = str(uuid.uuid4())
+    # Extract parameters
+    params = {
+        "num_examples": data.get("num_examples", 100),
+        "search_iterations": data.get("search_iterations", 3),
+        "questions_per_iteration": data.get("questions_per_iteration", 3),
+        "search_tool": data.get("search_tool", "searxng"),
+        "human_evaluation": data.get("human_evaluation", False),
+        "output_dir": os.path.join("benchmark_results", job_id),
+    }
+    # Add optional parameters if present
+    if "evaluation_model" in data:
+        params["evaluation_model"] = data["evaluation_model"]
+    if "evaluation_provider" in data:
+        params["evaluation_provider"] = data["evaluation_provider"]
+    # Store job info
+    running_jobs[job_id] = {
+        "id": job_id,
+        "benchmark_type": benchmark_type,
+        "params": params,
+        "status": "pending",
+        "start_time": import_time().time(),
+    }
+    # Start job in background thread
+    thread = threading.Thread(
+        target=run_benchmark_task, args=(job_id, benchmark_type, params)
+    )
+    thread.daemon = True
+    thread.start()
+    return jsonify(
+        {
+            "status": "started",
+            "job_id": job_id,
+            "message": f"Benchmark job started: {benchmark_type}",
+        }
+    )
+@benchmark_bp.route("/status/<job_id>", methods=["GET"])
+def benchmark_status(job_id):
+    """Get status of a benchmark job."""
+    if job_id not in running_jobs:
+        return jsonify({"error": "Job not found"}), 404
+    job = running_jobs[job_id]
+    # Calculate runtime if job is running
+    if job["status"] == "running":
+        job["runtime"] = import_time().time() - job["start_time"]
+    return jsonify(job)
+@benchmark_bp.route("/results/<job_id>", methods=["GET"])
+def benchmark_results(job_id):
+    """Get results of a completed benchmark job."""
+    if job_id not in running_jobs:
+        return jsonify({"error": "Job not found"}), 404
+    job = running_jobs[job_id]
+    if job["status"] != "completed":
+        return jsonify({"error": f"Job is not completed: {job['status']}"}), 400
+    # Return job result
+    return jsonify(job["result"])
+@benchmark_bp.route("/list", methods=["GET"])
+def list_benchmarks():
+    """List available benchmarks."""
+    return jsonify(get_available_benchmarks())
+@benchmark_bp.route("/jobs", methods=["GET"])
+def list_jobs():
+    """List all benchmark jobs."""
+    return jsonify(running_jobs)
+@benchmark_bp.route("/config", methods=["GET"])
+def get_benchmark_config():
+    """Get benchmark configuration options."""
+    return jsonify(
+        {
+            "search_tools": [
+                {"id": "searxng", "name": "SearXNG"},
+                {"id": "wikipedia", "name": "Wikipedia"},
+                {"id": "arxiv", "name": "ArXiv"},
+                {"id": "pubmed", "name": "PubMed"},
+                {"id": "auto", "name": "Auto (Multiple Engines)"},
+            ],
+            "evaluation_providers": [
+                {"id": "openai_endpoint", "name": "Claude (via OpenRouter)"},
+                {"id": "openai", "name": "OpenAI"},
+                {"id": "anthropic", "name": "Anthropic"},
+                {"id": "ollama", "name": "Ollama (Local)"},
+            ],
+            "evaluation_models": {
+                "openai_endpoint": [
+                    {"id": "anthropic/claude-3.7-sonnet", "name": "Claude 3.7 Sonnet"}
+                ],
+                "openai": [
+                    {"id": "gpt-4o", "name": "GPT-4o"},
+                    {"id": "gpt-4-turbo", "name": "GPT-4 Turbo"},
+                    {"id": "gpt-3.5-turbo", "name": "GPT-3.5 Turbo"},
+                ],
+                "anthropic": [
+                    {"id": "claude-3-opus", "name": "Claude 3 Opus"},
+                    {"id": "claude-3-sonnet", "name": "Claude 3 Sonnet"},
+                    {"id": "claude-3-haiku", "name": "Claude 3 Haiku"},
+                ],
+                "ollama": [
+                    {"id": "llama3", "name": "Llama 3"},
+                    {"id": "gemma:7b", "name": "Gemma 7B"},
+                    {"id": "mistral", "name": "Mistral"},
+                ],
+            },
+        }
+    )
+# Utility function for importing time dynamically to avoid circular imports
+def import_time():
+    """Import time module dynamically."""
+    import time
+    return time
+# Function to register routes with the main app
+def register_blueprint(app):
+    """Register benchmark routes with the Flask app."""
+    app.register_blueprint(benchmark_bp)
+    # Create templates directory if it doesn't exist
+    template_dir = os.path.join(
+        os.path.dirname(app.root_path), "templates", "benchmark"
+    )
+    os.makedirs(template_dir, exist_ok=True)
+    # Create dashboard template if it doesn't exist
+    dashboard_template = os.path.join(template_dir, "dashboard.html")
+    if not os.path.exists(dashboard_template):
+        with open(dashboard_template, "w") as f:
+            f.write(
+                """
+{% extends "base.html" %}
+{% block title %}Benchmarks{% endblock %}
+{% block content %}
+<div class="container">
+    <h1>LDR Benchmarks</h1>
+    <div class="card mb-4">
+        <div class="card-header">
+            <h2>Run Benchmark</h2>
+        </div>
+        <div class="card-body">
+            <form id="benchmarkForm">
+                <div class="form-group">
+                    <label for="benchmarkType">Benchmark</label>
+                    <select class="form-control" id="benchmarkType" required>
+                        {% for benchmark in benchmarks %}
+                            <option value="{{ benchmark.id }}">{{ benchmark.name }} - {{ benchmark.description }}</option>
+                        {% endfor %}
+                    </select>
+                </div>
+                <div class="form-group">
+                    <label for="numExamples">Number of Examples</label>
+                    <input type="number" class="form-control" id="numExamples" value="10" min="1" max="1000">
+                    <small class="form-text text-muted">Higher numbers give more accurate results but take longer</small>
+                </div>
+                <div class="form-group">
+                    <label for="searchIterations">Search Iterations</label>
+                    <input type="number" class="form-control" id="searchIterations" value="3" min="1" max="10">
+                </div>
+                <div class="form-group">
+                    <label for="questionsPerIteration">Questions Per Iteration</label>
+                    <input type="number" class="form-control" id="questionsPerIteration" value="3" min="1" max="10">
+                </div>
+                <div class="form-group">
+                    <label for="searchTool">Search Tool</label>
+                    <select class="form-control" id="searchTool">
+                        <option value="searxng">SearXNG</option>
+                        <option value="wikipedia">Wikipedia</option>
+                        <option value="auto">Auto (Multiple Engines)</option>
+                    </select>
+                </div>
+                <button type="submit" class="btn btn-primary">Start Benchmark</button>
+            </form>
+        </div>
+    </div>
+    <div class="card mb-4" id="benchmarkStatus" style="display: none;">
+        <div class="card-header">
+            <h2>Benchmark Status</h2>
+        </div>
+        <div class="card-body">
+            <div class="progress mb-3">
+                <div class="progress-bar" id="benchmarkProgress" role="progressbar" style="width: 0%"></div>
+            </div>
+            <p id="statusMessage">Initializing benchmark...</p>
+            <button class="btn btn-secondary" id="viewResults" style="display: none;">View Results</button>
+        </div>
+    </div>
+    <div class="card" id="benchmarkResults" style="display: none;">
+        <div class="card-header">
+            <h2>Benchmark Results</h2>
+        </div>
+        <div class="card-body" id="resultsContent">
+        </div>
+    </div>
+</div>
+<script>
+    let currentJobId = null;
+    let statusInterval = null;
+    document.getElementById('benchmarkForm').addEventListener('submit', function(e) {
+        e.preventDefault();
+        const benchmarkType = document.getElementById('benchmarkType').value;
+        const numExamples = document.getElementById('numExamples').value;
+        const searchIterations = document.getElementById('searchIterations').value;
+        const questionsPerIteration = document.getElementById('questionsPerIteration').value;
+        const searchTool = document.getElementById('searchTool').value;
+        // Show status card
+        document.getElementById('benchmarkStatus').style.display = 'block';
+        document.getElementById('benchmarkResults').style.display = 'none';
+        document.getElementById('viewResults').style.display = 'none';
+        // Start benchmark
+        fetch('/benchmark/run', {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify({
+                benchmark_type: benchmarkType,
+                num_examples: parseInt(numExamples),
+                search_iterations: parseInt(searchIterations),
+                questions_per_iteration: parseInt(questionsPerIteration),
+                search_tool: searchTool
+            })
+        })
+        .then(response => response.json())
+        .then(data => {
+            if (data.job_id) {
+                currentJobId = data.job_id;
+                document.getElementById('statusMessage').textContent = data.message;
+                // Start polling for status updates
+                statusInterval = setInterval(checkStatus, 2000);
+            } else {
+                document.getElementById('statusMessage').textContent = 'Error: ' + data.error;
+            }
+        })
+        .catch(error => {
+            document.getElementById('statusMessage').textContent = 'Error: ' + error;
+        });
+    });
+    function checkStatus() {
+        if (!currentJobId) return;
+        fetch('/benchmark/status/' + currentJobId)
+            .then(response => response.json())
+            .then(data => {
+                if (data.status === 'running') {
+                    const runtime = data.runtime || 0;
+                    document.getElementById('statusMessage').textContent = `Running benchmark... (${Math.round(runtime)}s elapsed)`;
+                    document.getElementById('benchmarkProgress').style.width = '50%';
+                } else if (data.status === 'completed') {
+                    clearInterval(statusInterval);
+                    document.getElementById('statusMessage').textContent = 'Benchmark completed successfully!';
+                    document.getElementById('benchmarkProgress').style.width = '100%';
+                    document.getElementById('viewResults').style.display = 'inline-block';
+                    document.getElementById('viewResults').onclick = function() {
+                        showResults(currentJobId);
+                    };
+                } else if (data.status === 'error') {
+                    clearInterval(statusInterval);
+                    document.getElementById('statusMessage').textContent = 'Error: ' + data.error;
+                    document.getElementById('benchmarkProgress').style.width = '100%';
+                    document.getElementById('benchmarkProgress').classList.add('bg-danger');
+                }
+            })
+            .catch(error => {
+                document.getElementById('statusMessage').textContent = 'Error checking status: ' + error;
+            });
+    }
+    function showResults(jobId) {
+        fetch('/benchmark/results/' + jobId)
+            .then(response => response.json())
+            .then(data => {
+                document.getElementById('benchmarkResults').style.display = 'block';
+                let html = '';
+                if (data.metrics) {
+                    html += `<h3>Summary</h3>`;
+                    html += `<p><strong>Accuracy:</strong> ${(data.metrics.accuracy * 100).toFixed(1)}%</p>`;
+                    html += `<p><strong>Examples:</strong> ${data.metrics.total_examples}</p>`;
+                    html += `<p><strong>Correct:</strong> ${data.metrics.correct}</p>`;
+                    if (data.metrics.average_processing_time) {
+                        html += `<p><strong>Average Processing Time:</strong> ${data.metrics.average_processing_time.toFixed(2)}s</p>`;
+                    }
+                    html += `<p><a href="${data.report_path}" target="_blank" class="btn btn-info">View Full Report</a></p>`;
+                } else {
+                    html += `<p>No metrics available. Check the results file for details.</p>`;
+                    html += `<p><a href="${data.results_path}" target="_blank" class="btn btn-info">View Results File</a></p>`;
+                }
+                document.getElementById('resultsContent').innerHTML = html;
+            })
+            .catch(error => {
+                document.getElementById('resultsContent').innerHTML = `<p>Error loading results: ${error}</p>`;
+            });
+    }
+</script>
+{% endblock %}
+            """
+            )
+    logger.info("Benchmark routes registered")

local_deep_research/web/routes/research_routes.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import json
-import logging
 import os
 import platform
 import subprocess
@@ -14,14 +13,12 @@ from flask import (
     send_from_directory,
     url_for,
 )
+from loguru import logger
 from ..models.database import add_log_to_db, calculate_duration, get_db_connection
 from ..services.research_service import run_research_process, start_research_process
 from ..utils.templates import render_template_with_defaults
-# Initialize logger
-logger = logging.getLogger(__name__)
 # Create a Blueprint for the research application
 research_bp = Blueprint("research", __name__, url_prefix="/research")
@@ -370,8 +367,8 @@ def terminate_research(research_id):
         emit_socket_event(f"research_progress_{research_id}", event_data)
-    except Exception as socket_error:
-        print(f"Socket emit error (non-critical): {str(socket_error)}")
+    except Exception:
+        logger.exception("Socket emit error (non-critical)")
     return jsonify({"status": "success", "message": "Research termination requested"})
@@ -411,8 +408,8 @@ def delete_research(research_id):
     if report_path and os.path.exists(report_path):
         try:
             os.remove(report_path)
-        except Exception as e:
-            print(f"Error removing report file: {str(e)}")
+        except Exception:
+            logger.exception("Error removing report file")
     # Delete the database record
     cursor.execute("DELETE FROM research_history WHERE id = ?", (research_id,))
@@ -443,8 +440,8 @@ def clear_history():
             if report_path and os.path.exists(report_path):
                 try:
                     os.remove(report_path)
-                except Exception as e:
-                    print(f"Error removing report file: {str(e)}")
+                except Exception:
+                    logger.exception("Error removing report file")
         # Delete records from the database, except active research
         placeholders = ", ".join(["?"] * len(active_research))
@@ -461,6 +458,7 @@ def clear_history():
         return jsonify({"status": "success"})
     except Exception as e:
+        logger.exception("Error clearing history")
         return jsonify({"status": "error", "message": str(e)}), 500
@@ -496,6 +494,7 @@ def open_file_location():
         return jsonify({"status": "success"})
     except Exception as e:
+        logger.exception("Error opening a file")
         return jsonify({"status": "error", "message": str(e)}), 500
@@ -523,6 +522,7 @@ def save_raw_config():
         return jsonify({"success": True})
     except Exception as e:
+        logger.exception("Error saving configuration file")
         return jsonify({"success": False, "error": str(e)}), 500
@@ -578,8 +578,8 @@ def get_history():
             if completed_at and created_at:
                 try:
                     duration_seconds = calculate_duration(created_at, completed_at)
-                except Exception as e:
-                    print(f"Error calculating duration: {e}")
+                except Exception:
+                    logger.exception("Error calculating duration")
             # Create a history item
             item = {
@@ -602,11 +602,7 @@ def get_history():
         conn.close()
         return jsonify({"status": "success", "items": history_items})
     except Exception as e:
-        # Import traceback only when needed
-        import traceback
-        print(f"Error getting history: {e}")
-        print(traceback.format_exc())
+        logger.exception("Error getting history")
         return jsonify({"status": "error", "message": str(e)}), 500

local-deep-research 0.3.12__py3-none-any.whl → 0.4.0__py3-none-any.whl

local-deep-research 0.3.12py3-none-any.whl → 0.4.0py3-none-any.whl