PyPI - local-deep-research - Versions diffs - 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

local-deep-research 0.1.26py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

local_deep_research/web/services/research_service.py ADDED Viewed

@@ -0,0 +1,947 @@
+import json
+import logging
+import os
+import threading
+import traceback
+from datetime import datetime
+from ...config.llm_config import get_llm
+from ...config.search_config import get_search
+from ...report_generator import IntegratedReportGenerator
+from ...search_system import AdvancedSearchSystem
+from ...utilities.search_utilities import extract_links_from_search_results
+from ..models.database import add_log_to_db, calculate_duration, get_db_connection
+from .socket_service import emit_to_subscribers
+# Initialize logger
+logger = logging.getLogger(__name__)
+# Output directory for research results
+OUTPUT_DIR = "research_outputs"
+def start_research_process(
+    research_id,
+    query,
+    mode,
+    active_research,
+    termination_flags,
+    run_research_callback,
+    **kwargs,
+):
+    """
+    Start a research process in a background thread.
+    Args:
+        research_id: The ID of the research
+        query: The research query
+        mode: The research mode (quick/detailed)
+        active_research: Dictionary of active research processes
+        termination_flags: Dictionary of termination flags
+        run_research_callback: The callback function to run the research
+        **kwargs: Additional parameters to pass to the research process (model, search_engine, etc.)
+    Returns:
+        threading.Thread: The thread running the research
+    """
+    # Start research process in a background thread
+    thread = threading.Thread(
+        target=run_research_callback,
+        args=(research_id, query, mode, active_research, termination_flags),
+        kwargs=kwargs,
+    )
+    thread.daemon = True
+    thread.start()
+    active_research[research_id] = {
+        "thread": thread,
+        "progress": 0,
+        "status": "in_progress",
+        "log": [
+            {
+                "time": datetime.utcnow().isoformat(),
+                "message": "Research started",
+                "progress": 0,
+            }
+        ],
+        "settings": kwargs,  # Store settings for reference
+    }
+    return thread
+def run_research_process(
+    research_id, query, mode, active_research, termination_flags, **kwargs
+):
+    """
+    Run the research process in the background for a given research ID.
+    Args:
+        research_id: The ID of the research
+        query: The research query
+        mode: The research mode (quick/detailed)
+        active_research: Dictionary of active research processes
+        termination_flags: Dictionary of termination flags
+        **kwargs: Additional parameters for the research (model_provider, model, search_engine, etc.)
+    """
+    try:
+        # Check if this research has been terminated before we even start
+        if research_id in termination_flags and termination_flags[research_id]:
+            logger.info(f"Research {research_id} was terminated before starting")
+            cleanup_research_resources(research_id, active_research, termination_flags)
+            return
+        logger.info(
+            "Starting research process for ID %s, query: %s", research_id, query
+        )
+        # Extract key parameters
+        model_provider = kwargs.get("model_provider")
+        model = kwargs.get("model")
+        custom_endpoint = kwargs.get("custom_endpoint")
+        search_engine = kwargs.get("search_engine")
+        max_results = kwargs.get("max_results")
+        time_period = kwargs.get("time_period")
+        iterations = kwargs.get("iterations")
+        questions_per_iteration = kwargs.get("questions_per_iteration")
+        # Log all parameters for debugging
+        logger.info(
+            "Research parameters: provider=%s, model=%s, search_engine=%s, "
+            "max_results=%s, time_period=%s, iterations=%s, "
+            "questions_per_iteration=%s, custom_endpoint=%s",
+            model_provider,
+            model,
+            search_engine,
+            max_results,
+            time_period,
+            iterations,
+            questions_per_iteration,
+            custom_endpoint,
+        )
+        # Set up the AI Context Manager
+        output_dir = os.path.join(OUTPUT_DIR, f"research_{research_id}")
+        os.makedirs(output_dir, exist_ok=True)
+        # Set up progress callback
+        def progress_callback(message, progress_percent, metadata):
+            # Frequent termination check
+            if research_id in termination_flags and termination_flags[research_id]:
+                handle_termination(research_id, active_research, termination_flags)
+                raise Exception("Research was terminated by user")
+            if "SEARCH_PLAN:" in message:
+                engines = message.split("SEARCH_PLAN:")[1].strip()
+                metadata["planned_engines"] = engines
+                metadata["phase"] = "search_planning"  # Use existing phase
+            if "ENGINE_SELECTED:" in message:
+                engine = message.split("ENGINE_SELECTED:")[1].strip()
+                metadata["selected_engine"] = engine
+                metadata["phase"] = "search"  # Use existing 'search' phase
+            timestamp = datetime.utcnow().isoformat()
+            # Adjust progress based on research mode
+            adjusted_progress = progress_percent
+            if mode == "detailed" and metadata.get("phase") == "output_generation":
+                # For detailed mode, adjust the progress range for output generation
+                adjusted_progress = min(80, progress_percent)
+            elif mode == "detailed" and metadata.get("phase") == "report_generation":
+                # Scale the progress from 80% to 95% for the report generation phase
+                if progress_percent is not None:
+                    normalized = progress_percent / 100
+                    adjusted_progress = 80 + (normalized * 15)
+            elif mode == "quick" and metadata.get("phase") == "output_generation":
+                # For quick mode, ensure we're at least at 85% during output generation
+                adjusted_progress = max(85, progress_percent)
+                # Map any further progress within output_generation to 85-95% range
+                if progress_percent is not None and progress_percent > 0:
+                    normalized = progress_percent / 100
+                    adjusted_progress = 85 + (normalized * 10)
+            # Don't let progress go backwards
+            if research_id in active_research and adjusted_progress is not None:
+                current_progress = active_research[research_id].get("progress", 0)
+                adjusted_progress = max(current_progress, adjusted_progress)
+            log_entry = {
+                "time": timestamp,
+                "message": message,
+                "progress": adjusted_progress,
+                "metadata": metadata,
+            }
+            # Update active research record
+            if research_id in active_research:
+                active_research[research_id]["log"].append(log_entry)
+                if adjusted_progress is not None:
+                    active_research[research_id]["progress"] = adjusted_progress
+                # Determine log type for database storage
+                log_type = "info"
+                if metadata and metadata.get("phase"):
+                    phase = metadata.get("phase")
+                    if phase in ["complete", "iteration_complete"]:
+                        log_type = "milestone"
+                    elif phase == "error" or "error" in message.lower():
+                        log_type = "error"
+                # Save logs to the database
+                add_log_to_db(
+                    research_id,
+                    message,
+                    log_type=log_type,
+                    progress=adjusted_progress,
+                    metadata=metadata,
+                )
+                # Update progress in the research_history table (for backward compatibility)
+                conn = get_db_connection()
+                cursor = conn.cursor()
+                # Update the progress and log separately to avoid race conditions
+                if adjusted_progress is not None:
+                    cursor.execute(
+                        "UPDATE research_history SET progress = ? WHERE id = ?",
+                        (adjusted_progress, research_id),
+                    )
+                # Add the log entry to the progress_log
+                cursor.execute(
+                    "SELECT progress_log FROM research_history WHERE id = ?",
+                    (research_id,),
+                )
+                log_result = cursor.fetchone()
+                if log_result:
+                    try:
+                        current_log = json.loads(log_result[0])
+                    except Exception:
+                        current_log = []
+                    current_log.append(log_entry)
+                    cursor.execute(
+                        "UPDATE research_history SET progress_log = ? WHERE id = ?",
+                        (json.dumps(current_log), research_id),
+                    )
+                conn.commit()
+                conn.close()
+                # Emit a socket event
+                try:
+                    # Basic event data
+                    event_data = {"message": message, "progress": adjusted_progress}
+                    # Add log entry in full format for detailed logging on client
+                    if metadata:
+                        event_data["log_entry"] = log_entry
+                    emit_to_subscribers("research_progress", research_id, event_data)
+                except Exception as e:
+                    logger.error(f"Socket emit error (non-critical): {str(e)}")
+        # Function to check termination during long-running operations
+        def check_termination():
+            if research_id in termination_flags and termination_flags[research_id]:
+                handle_termination(research_id, active_research, termination_flags)
+                raise Exception(
+                    "Research was terminated by user during long-running operation"
+                )
+            return False  # Not terminated
+        # Configure the system with the specified parameters
+        use_llm = None
+        if model or search_engine or model_provider:
+            # Log that we're overriding system settings
+            logger.info(
+                f"Overriding system settings with: provider={model_provider}, model={model}, search_engine={search_engine}"
+            )
+            # Override LLM if model or model_provider specified
+            if model or model_provider:
+                try:
+                    # Get LLM with the overridden settings
+                    # Explicitly create the model with parameters to avoid fallback issues
+                    use_llm = get_llm(
+                        model_name=model,
+                        provider=model_provider,
+                        openai_endpoint_url=custom_endpoint,
+                    )
+                    logger.info(
+                        "Successfully set LLM to: provider=%s, model=%s",
+                        model_provider,
+                        model,
+                    )
+                except Exception as e:
+                    logger.error(
+                        "Error setting LLM provider=%s, model=%s: %s",
+                        model_provider,
+                        model,
+                        str(e),
+                    )
+                    logger.error(traceback.format_exc())
+            # Set the progress callback in the system
+            system = AdvancedSearchSystem(llm=use_llm)
+            system.set_progress_callback(progress_callback)
+            # Override search engine if specified
+            if search_engine:
+                try:
+                    if iterations:
+                        system.max_iterations = int(iterations)
+                    if questions_per_iteration:
+                        system.questions_per_iteration = int(questions_per_iteration)
+                    # Create a new search object with these settings
+                    system.search = get_search(
+                        search_tool=search_engine, llm_instance=system.model
+                    )
+                    logger.info("Successfully set search engine to: %s", search_engine)
+                except Exception as e:
+                    logger.error(
+                        "Error setting search engine to %s: %s", search_engine, str(e)
+                    )
+        # Run the search
+        progress_callback("Starting research process", 5, {"phase": "init"})
+        try:
+            results = system.analyze_topic(query)
+            if mode == "quick":
+                progress_callback(
+                    "Search complete, preparing to generate summary...",
+                    85,
+                    {"phase": "output_generation"},
+                )
+            else:
+                progress_callback(
+                    "Search complete, generating output",
+                    80,
+                    {"phase": "output_generation"},
+                )
+        except Exception as search_error:
+            # Better handling of specific search errors
+            error_message = str(search_error)
+            error_type = "unknown"
+            # Extract error details for common issues
+            if "status code: 503" in error_message:
+                error_message = "Ollama AI service is unavailable (HTTP 503). Please check that Ollama is running properly on your system."
+                error_type = "ollama_unavailable"
+            elif "status code: 404" in error_message:
+                error_message = "Ollama model not found (HTTP 404). Please check that you have pulled the required model."
+                error_type = "model_not_found"
+            elif "status code:" in error_message:
+                # Extract the status code for other HTTP errors
+                status_code = error_message.split("status code:")[1].strip()
+                error_message = f"API request failed with status code {status_code}. Please check your configuration."
+                error_type = "api_error"
+            elif "connection" in error_message.lower():
+                error_message = "Connection error. Please check that your LLM service (Ollama/API) is running and accessible."
+                error_type = "connection_error"
+            # Raise with improved error message
+            raise Exception(f"{error_message} (Error type: {error_type})")
+        # Generate output based on mode
+        if mode == "quick":
+            # Quick Summary
+            if results.get("findings") or results.get("formatted_findings"):
+                raw_formatted_findings = results["formatted_findings"]
+                # Check if formatted_findings contains an error message
+                if isinstance(
+                    raw_formatted_findings, str
+                ) and raw_formatted_findings.startswith("Error:"):
+                    import traceback
+                    logger.warning(
+                        f"Detected error in formatted findings: {raw_formatted_findings[:100]}... stack trace: {traceback.format_exc()}"
+                    )
+                    # Determine error type for better user feedback
+                    error_type = "unknown"
+                    error_message = raw_formatted_findings.lower()
+                    if (
+                        "token limit" in error_message
+                        or "context length" in error_message
+                    ):
+                        error_type = "token_limit"
+                        # Log specific error type
+                        logger.warning("Detected token limit error in synthesis")
+                        # Update progress with specific error type
+                        progress_callback(
+                            "Synthesis hit token limits. Attempting fallback...",
+                            87,
+                            {"phase": "synthesis_error", "error_type": error_type},
+                        )
+                    elif "timeout" in error_message or "timed out" in error_message:
+                        error_type = "timeout"
+                        logger.warning("Detected timeout error in synthesis")
+                        progress_callback(
+                            "Synthesis timed out. Attempting fallback...",
+                            87,
+                            {"phase": "synthesis_error", "error_type": error_type},
+                        )
+                    elif "rate limit" in error_message:
+                        error_type = "rate_limit"
+                        logger.warning("Detected rate limit error in synthesis")
+                        progress_callback(
+                            "LLM rate limit reached. Attempting fallback...",
+                            87,
+                            {"phase": "synthesis_error", "error_type": error_type},
+                        )
+                    elif "connection" in error_message or "network" in error_message:
+                        error_type = "connection"
+                        logger.warning("Detected connection error in synthesis")
+                        progress_callback(
+                            "Connection issue with LLM. Attempting fallback...",
+                            87,
+                            {"phase": "synthesis_error", "error_type": error_type},
+                        )
+                    elif (
+                        "llm error" in error_message
+                        or "final answer synthesis fail" in error_message
+                    ):
+                        error_type = "llm_error"
+                        logger.warning("Detected general LLM error in synthesis")
+                        progress_callback(
+                            "LLM error during synthesis. Attempting fallback...",
+                            87,
+                            {"phase": "synthesis_error", "error_type": error_type},
+                        )
+                    else:
+                        # Generic error
+                        logger.warning("Detected unknown error in synthesis")
+                        progress_callback(
+                            "Error during synthesis. Attempting fallback...",
+                            87,
+                            {"phase": "synthesis_error", "error_type": "unknown"},
+                        )
+                    # Extract synthesized content from findings if available
+                    synthesized_content = ""
+                    for finding in results.get("findings", []):
+                        if finding.get("phase") == "Final synthesis":
+                            synthesized_content = finding.get("content", "")
+                            break
+                    # Use synthesized content as fallback
+                    if synthesized_content and not synthesized_content.startswith(
+                        "Error:"
+                    ):
+                        logger.info("Using existing synthesized content as fallback")
+                        raw_formatted_findings = synthesized_content
+                    # Or use current_knowledge as another fallback
+                    elif results.get("current_knowledge"):
+                        logger.info("Using current_knowledge as fallback")
+                        raw_formatted_findings = results["current_knowledge"]
+                    # Or combine all finding contents as last resort
+                    elif results.get("findings"):
+                        logger.info("Combining all findings as fallback")
+                        # First try to use any findings that are not errors
+                        valid_findings = [
+                            f"## {finding.get('phase', 'Finding')}\n\n{finding.get('content', '')}"
+                            for finding in results.get("findings", [])
+                            if finding.get("content")
+                            and not finding.get("content", "").startswith("Error:")
+                        ]
+                        if valid_findings:
+                            raw_formatted_findings = (
+                                "# Research Results (Fallback Mode)\n\n"
+                            )
+                            raw_formatted_findings += "\n\n".join(valid_findings)
+                            raw_formatted_findings += (
+                                f"\n\n## Error Information\n{raw_formatted_findings}"
+                            )
+                        else:
+                            # Last resort: use everything including errors
+                            raw_formatted_findings = (
+                                "# Research Results (Emergency Fallback)\n\n"
+                            )
+                            raw_formatted_findings += "The system encountered errors during final synthesis.\n\n"
+                            raw_formatted_findings += "\n\n".join(
+                                f"## {finding.get('phase', 'Finding')}\n\n{finding.get('content', '')}"
+                                for finding in results.get("findings", [])
+                                if finding.get("content")
+                            )
+                    progress_callback(
+                        f"Using fallback synthesis due to {error_type} error",
+                        88,
+                        {"phase": "synthesis_fallback", "error_type": error_type},
+                    )
+                logger.info(
+                    "Found formatted_findings of length: %s",
+                    len(str(raw_formatted_findings)),
+                )
+                try:
+                    # Get the synthesized content from the LLM directly
+                    clean_markdown = raw_formatted_findings
+                    # Extract all sources from findings to add them to the summary
+                    all_links = []
+                    for finding in results.get("findings", []):
+                        search_results = finding.get("search_results", [])
+                        if search_results:
+                            try:
+                                links = extract_links_from_search_results(
+                                    search_results
+                                )
+                                all_links.extend(links)
+                            except Exception as link_err:
+                                logger.error(
+                                    f"Error processing search results/links: {link_err}"
+                                )
+                    logger.info(
+                        "Successfully converted to clean markdown of length: %s",
+                        len(clean_markdown),
+                    )
+                    # First send a progress update for generating the summary
+                    progress_callback(
+                        "Generating clean summary from research data...",
+                        90,
+                        {"phase": "output_generation"},
+                    )
+                    # Save as markdown file
+                    if not os.path.exists(OUTPUT_DIR):
+                        os.makedirs(OUTPUT_DIR)
+                    safe_query = "".join(
+                        x for x in query if x.isalnum() or x in [" ", "-", "_"]
+                    )[:50]
+                    safe_query = safe_query.replace(" ", "_").lower()
+                    report_path = os.path.join(
+                        OUTPUT_DIR, f"quick_summary_{safe_query}.md"
+                    )
+                    # Send progress update for writing to file
+                    progress_callback(
+                        "Writing research report to file...",
+                        95,
+                        {"phase": "report_complete"},
+                    )
+                    logger.info("Writing report to: %s", report_path)
+                    with open(report_path, "w", encoding="utf-8") as f:
+                        f.write("# Quick Research Summary\n\n")
+                        f.write(f"Query: {query}\n\n")
+                        f.write(clean_markdown)
+                        f.write("\n\n## Research Metrics\n")
+                        f.write(f"- Search Iterations: {results['iterations']}\n")
+                        f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
+                    # Update database
+                    metadata = {
+                        "iterations": results["iterations"],
+                        "generated_at": datetime.utcnow().isoformat(),
+                    }
+                    # Calculate duration in seconds - using UTC consistently
+                    now = datetime.utcnow()
+                    completed_at = now.isoformat()
+                    logger.info("Updating database for research_id: %s", research_id)
+                    # Get the start time from the database
+                    conn = get_db_connection()
+                    cursor = conn.cursor()
+                    cursor.execute(
+                        "SELECT created_at FROM research_history WHERE id = ?",
+                        (research_id,),
+                    )
+                    result = cursor.fetchone()
+                    # Use the helper function for consistent duration calculation
+                    duration_seconds = calculate_duration(result[0])
+                    # Update the record
+                    cursor.execute(
+                        "UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?",
+                        (
+                            "completed",
+                            completed_at,
+                            duration_seconds,
+                            report_path,
+                            json.dumps(metadata),
+                            research_id,
+                        ),
+                    )
+                    conn.commit()
+                    conn.close()
+                    logger.info(
+                        f"Database updated successfully for research_id: {research_id}"
+                    )
+                    # Send the final completion message
+                    progress_callback(
+                        "Research completed successfully",
+                        100,
+                        {"phase": "complete", "report_path": report_path},
+                    )
+                    # Clean up resources
+                    logger.info(
+                        "Cleaning up resources for research_id: %s", research_id
+                    )
+                    cleanup_research_resources(
+                        research_id, active_research, termination_flags
+                    )
+                    logger.info("Resources cleaned up for research_id: %s", research_id)
+                except Exception as inner_e:
+                    logger.error(
+                        "Error during quick summary generation: %s", str(inner_e)
+                    )
+                    logger.error(traceback.format_exc())
+                    raise Exception(f"Error generating quick summary: {str(inner_e)}")
+            else:
+                raise Exception(
+                    "No research findings were generated. Please try again."
+                )
+        else:
+            # Full Report
+            progress_callback(
+                "Generating detailed report...", 85, {"phase": "report_generation"}
+            )
+            # Extract the search system from the results if available
+            search_system = results.get("search_system", None)
+            # Pass the existing search system to maintain citation indices
+            report_generator = IntegratedReportGenerator(search_system=search_system)
+            final_report = report_generator.generate_report(results, query)
+            progress_callback(
+                "Report generation complete", 95, {"phase": "report_complete"}
+            )
+            # Save as markdown file
+            if not os.path.exists(OUTPUT_DIR):
+                os.makedirs(OUTPUT_DIR)
+            safe_query = "".join(
+                x for x in query if x.isalnum() or x in [" ", "-", "_"]
+            )[:50]
+            safe_query = safe_query.replace(" ", "_").lower()
+            report_path = os.path.join(OUTPUT_DIR, f"detailed_report_{safe_query}.md")
+            with open(report_path, "w", encoding="utf-8") as f:
+                f.write(final_report["content"])
+            # Update database
+            metadata = final_report["metadata"]
+            metadata["iterations"] = results["iterations"]
+            # Calculate duration in seconds - using UTC consistently
+            now = datetime.utcnow()
+            completed_at = now.isoformat()
+            # Get the start time from the database
+            conn = get_db_connection()
+            cursor = conn.cursor()
+            cursor.execute(
+                "SELECT created_at FROM research_history WHERE id = ?", (research_id,)
+            )
+            result = cursor.fetchone()
+            # Use the helper function for consistent duration calculation
+            duration_seconds = calculate_duration(result[0])
+            cursor.execute(
+                "UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?",
+                (
+                    "completed",
+                    completed_at,
+                    duration_seconds,
+                    report_path,
+                    json.dumps(metadata),
+                    research_id,
+                ),
+            )
+            conn.commit()
+            conn.close()
+            progress_callback(
+                "Research completed successfully",
+                100,
+                {"phase": "complete", "report_path": report_path},
+            )
+            # Clean up resources
+            cleanup_research_resources(research_id, active_research, termination_flags)
+    except Exception as e:
+        # Handle error
+        error_message = f"Research failed: {str(e)}"
+        logger.error(error_message)
+        try:
+            # Check for common Ollama error patterns in the exception and provide more user-friendly errors
+            user_friendly_error = str(e)
+            error_context = {}
+            if "Error type: ollama_unavailable" in user_friendly_error:
+                user_friendly_error = "Ollama AI service is unavailable. Please check that Ollama is running properly on your system."
+                error_context = {
+                    "solution": "Start Ollama with 'ollama serve' or check if it's installed correctly."
+                }
+            elif "Error type: model_not_found" in user_friendly_error:
+                user_friendly_error = (
+                    "Required Ollama model not found. Please pull the model first."
+                )
+                error_context = {
+                    "solution": "Run 'ollama pull mistral' to download the required model."
+                }
+            elif "Error type: connection_error" in user_friendly_error:
+                user_friendly_error = "Connection error with LLM service. Please check that your AI service is running."
+                error_context = {
+                    "solution": "Ensure Ollama or your API service is running and accessible."
+                }
+            elif "Error type: api_error" in user_friendly_error:
+                # Keep the original error message as it's already improved
+                error_context = {"solution": "Check API configuration and credentials."}
+            # Update metadata with more context about the error
+            metadata = {"phase": "error", "error": user_friendly_error}
+            if error_context:
+                metadata.update(error_context)
+            # If we still have an active research record, update its log
+            if research_id in active_research:
+                progress_callback(user_friendly_error, None, metadata)
+            conn = get_db_connection()
+            cursor = conn.cursor()
+            # If termination was requested, mark as suspended instead of failed
+            status = (
+                "suspended"
+                if (research_id in termination_flags and termination_flags[research_id])
+                else "failed"
+            )
+            message = (
+                "Research was terminated by user"
+                if status == "suspended"
+                else user_friendly_error
+            )
+            # Calculate duration up to termination point - using UTC consistently
+            now = datetime.utcnow()
+            completed_at = now.isoformat()
+            # Get the start time from the database
+            duration_seconds = None
+            cursor.execute(
+                "SELECT created_at FROM research_history WHERE id = ?", (research_id,)
+            )
+            result = cursor.fetchone()
+            # Use the helper function for consistent duration calculation
+            if result and result[0]:
+                duration_seconds = calculate_duration(result[0])
+            cursor.execute(
+                "UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, metadata = ? WHERE id = ?",
+                (
+                    status,
+                    completed_at,
+                    duration_seconds,
+                    json.dumps(metadata),
+                    research_id,
+                ),
+            )
+            conn.commit()
+            conn.close()
+            try:
+                emit_to_subscribers(
+                    "research_progress",
+                    research_id,
+                    {"status": status, "error": message},
+                )
+            except Exception as socket_error:
+                logger.error(f"Failed to emit error via socket: {str(socket_error)}")
+        except Exception as inner_e:
+            logger.error(f"Error in error handler: {str(inner_e)}")
+            logger.error(traceback.format_exc())
+        # Clean up resources
+        cleanup_research_resources(research_id, active_research, termination_flags)
+def cleanup_research_resources(research_id, active_research, termination_flags):
+    """
+    Clean up resources for a completed research.
+    Args:
+        research_id: The ID of the research
+        active_research: Dictionary of active research processes
+        termination_flags: Dictionary of termination flags
+    """
+    logger.info("Cleaning up resources for research %s", research_id)
+    # Get the current status from the database to determine the final status message
+    current_status = "completed"  # Default
+    try:
+        conn = get_db_connection()
+        cursor = conn.cursor()
+        cursor.execute(
+            "SELECT status FROM research_history WHERE id = ?", (research_id,)
+        )
+        result = cursor.fetchone()
+        if result and result[0]:
+            current_status = result[0]
+        conn.close()
+    except Exception as e:
+        logger.error("Error retrieving research status during cleanup: %s", e)
+    # Remove from active research
+    if research_id in active_research:
+        del active_research[research_id]
+    # Remove from termination flags
+    if research_id in termination_flags:
+        del termination_flags[research_id]
+    # Send a final message to subscribers
+    try:
+        # Import here to avoid circular imports
+        from ..routes.research_routes import get_globals
+        globals_dict = get_globals()
+        socket_subscriptions = globals_dict.get("socket_subscriptions", {})
+        # Send a final message to any remaining subscribers with explicit status
+        if research_id in socket_subscriptions and socket_subscriptions[research_id]:
+            # Use the proper status message based on database status
+            if current_status == "suspended" or current_status == "failed":
+                final_message = {
+                    "status": current_status,
+                    "message": f"Research was {current_status}",
+                    "progress": 0,  # For suspended research, show 0% not 100%
+                }
+            else:
+                final_message = {
+                    "status": "completed",
+                    "message": "Research process has ended and resources have been cleaned up",
+                    "progress": 100,
+                }
+            logger.info(
+                "Sending final %s socket message for research %s",
+                current_status,
+                research_id,
+            )
+            emit_to_subscribers("research_progress", research_id, final_message)
+    except Exception as e:
+        logger.error("Error sending final cleanup message: %s", e)
+def handle_termination(research_id, active_research, termination_flags):
+    """
+    Handle the termination of a research process.
+    Args:
+        research_id: The ID of the research
+        active_research: Dictionary of active research processes
+        termination_flags: Dictionary of termination flags
+    """
+    # Explicitly set the status to suspended in the database
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    # Calculate duration up to termination point - using UTC consistently
+    now = datetime.utcnow()
+    completed_at = now.isoformat()
+    # Get the start time from the database
+    cursor.execute(
+        "SELECT created_at FROM research_history WHERE id = ?",
+        (research_id,),
+    )
+    result = cursor.fetchone()
+    # Calculate the duration
+    duration_seconds = calculate_duration(result[0]) if result and result[0] else None
+    # Update the database with suspended status
+    cursor.execute(
+        "UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?",
+        ("suspended", completed_at, duration_seconds, research_id),
+    )
+    conn.commit()
+    conn.close()
+    # Clean up resources
+    cleanup_research_resources(research_id, active_research, termination_flags)
+def cancel_research(research_id):
+    """
+    Cancel/terminate a research process
+    Args:
+        research_id: The ID of the research to cancel
+    Returns:
+        bool: True if the research was found and cancelled, False otherwise
+    """
+    # Import globals from research routes
+    from ..routes.research_routes import get_globals
+    globals_dict = get_globals()
+    active_research = globals_dict["active_research"]
+    termination_flags = globals_dict["termination_flags"]
+    # Set termination flag
+    termination_flags[research_id] = True
+    # Check if the research is active
+    if research_id in active_research:
+        # Call handle_termination to update database
+        handle_termination(research_id, active_research, termination_flags)
+        return True
+    else:
+        # Update database directly if not found in active_research
+        from ..models.database import get_db_connection
+        conn = get_db_connection()
+        cursor = conn.cursor()
+        # First check if the research exists
+        cursor.execute(
+            "SELECT status FROM research_history WHERE id = ?", (research_id,)
+        )
+        result = cursor.fetchone()
+        if not result:
+            conn.close()
+            return False
+        # If it exists but isn't in active_research, still update status
+        cursor.execute(
+            "UPDATE research_history SET status = ? WHERE id = ?",
+            ("suspended", research_id),
+        )
+        conn.commit()
+        conn.close()
+        return True

local-deep-research 0.1.26__py3-none-any.whl → 0.2.0__py3-none-any.whl

local-deep-research 0.1.26py3-none-any.whl → 0.2.0py3-none-any.whl