PyPI - local-deep-research - Versions diffs - 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

local-deep-research 0.1.26py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

local_deep_research/web/app.py CHANGED Viewed

@@ -1,1746 +1,123 @@
-import os
-import json
-import time
-import sqlite3
-import threading
-from datetime import datetime
-from flask import Flask, render_template, request, jsonify, send_from_directory, Response, make_response, current_app, Blueprint, redirect, url_for, flash
-from flask_socketio import SocketIO, emit
-from local_deep_research.search_system import AdvancedSearchSystem
-from local_deep_research.report_generator import IntegratedReportGenerator
-# Move this import up to ensure it's available globally
-from dateutil import parser
-import traceback
-import pkg_resources
-# Import the new configuration manager
-from local_deep_research.config import get_config_dir
 import logging
-logger = logging.getLogger(__name__)
-CONFIG_DIR = get_config_dir() / "config"
-MAIN_CONFIG_FILE = CONFIG_DIR / "settings.toml"
-LLM_CONFIG_FILE = CONFIG_DIR / "llm_config.py"
-LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
-import toml
-# Set flag for tracking OpenAI availability - we'll check it only when needed
-OPENAI_AVAILABLE = False
-# Initialize Flask app
-try:
-    import os
-    import logging
-    from local_deep_research.utilties.setup_utils import setup_user_directories
-    # Configure logging
-    logging.basicConfig(level=logging.INFO)
-    logger = logging.getLogger(__name__)
-    # Explicitly run setup
-    logger.info("Initializing configuration...")
-    setup_user_directories()
-    # Get directories based on package installation
-    PACKAGE_DIR = pkg_resources.resource_filename('local_deep_research', 'web')
-    STATIC_DIR = os.path.join(PACKAGE_DIR, 'static')
-    TEMPLATE_DIR = os.path.join(PACKAGE_DIR, 'templates')
-    # Setup logging
-    logging.basicConfig(level=logging.INFO)
-    # Create directories and default configs if needed
-    setup_user_directories()
-    # Initialize Flask app with package directories
-    app = Flask(__name__,
-                static_folder=STATIC_DIR,
-                template_folder=TEMPLATE_DIR)
-    print(f"Using package static path: {STATIC_DIR}")
-    print(f"Using package template path: {TEMPLATE_DIR}")
-except Exception as e:
-    # Fallback for development
-    print(f"Package directories not found, using fallback paths: {str(e)}")
-    app = Flask(__name__,
-                static_folder=os.path.abspath('static'),
-                template_folder=os.path.abspath('templates'))
-app.config['SECRET_KEY'] = 'deep-research-secret-key'
-# Create a Blueprint for the research application
-research_bp = Blueprint('research', __name__, url_prefix='/research')
-# Add improved Socket.IO configuration with better error handling
-socketio = SocketIO(
-    app,
-    cors_allowed_origins="*",
-    async_mode='threading',
-    path='/research/socket.io',
-    logger=True,
-    engineio_logger=True,
-    ping_timeout=20,
-    ping_interval=5
+import os
+import sys
+from ..config.config_files import settings
+from ..setup_data_dir import setup_data_dir
+from ..utilities.db_utils import get_db_setting
+from .app_factory import create_app
+from .models.database import (
+    DB_PATH,
+    LEGACY_DEEP_RESEARCH_DB,
+    LEGACY_RESEARCH_HISTORY_DB,
 )
-# Active research processes and socket subscriptions
-active_research = {}
-socket_subscriptions = {}
-# Add termination flags dictionary
-termination_flags = {}
-# Database setup
-DB_PATH = 'research_history.db'
-# Output directory for research results
-OUTPUT_DIR = 'research_outputs'
-# Add Content Security Policy headers to allow Socket.IO to function
-@app.after_request
-def add_security_headers(response):
-    # Define a permissive CSP for development that allows Socket.IO to function
-    csp = (
-        "default-src 'self'; "
-        "connect-src 'self' ws: wss: http: https:; "
-        "script-src 'self' 'unsafe-inline' 'unsafe-eval' cdnjs.cloudflare.com cdn.jsdelivr.net unpkg.com; "
-        "style-src 'self' 'unsafe-inline' cdnjs.cloudflare.com; "
-        "font-src 'self' cdnjs.cloudflare.com; "
-        "img-src 'self' data:; "
-        "worker-src blob:; "
-        "frame-src 'self';"
-    )
-    response.headers['Content-Security-Policy'] = csp
-    response.headers['X-Content-Security-Policy'] = csp
-    # Add CORS headers for API requests
-    if request.path.startswith('/api/'):
-        response.headers['Access-Control-Allow-Origin'] = '*'
-        response.headers['Access-Control-Allow-Methods'] = 'GET, POST, DELETE, OPTIONS'
-        response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
-    return response
-# Add a middleware layer to handle abrupt disconnections
-@app.before_request
-def handle_websocket_requests():
-    if request.path.startswith('/research/socket.io'):
-        try:
-            if not request.environ.get('werkzeug.socket'):
-                return
-        except Exception as e:
-            print(f"WebSocket preprocessing error: {e}")
-            # Return empty response to prevent further processing
-            return '', 200
-def init_db():
-    conn = sqlite3.connect(DB_PATH)
-    cursor = conn.cursor()
-    # Create the table if it doesn't exist
-    cursor.execute('''
-    CREATE TABLE IF NOT EXISTS research_history (
-        id INTEGER PRIMARY KEY AUTOINCREMENT,
-        query TEXT NOT NULL,
-        mode TEXT NOT NULL,
-        status TEXT NOT NULL,
-        created_at TEXT NOT NULL,
-        completed_at TEXT,
-        duration_seconds INTEGER,
-        report_path TEXT,
-        metadata TEXT,
-        progress_log TEXT,
-        progress INTEGER
-    )
-    ''')
-    # Create a dedicated table for research logs
-    cursor.execute('''
-    CREATE TABLE IF NOT EXISTS research_logs (
-        id INTEGER PRIMARY KEY AUTOINCREMENT,
-        research_id INTEGER NOT NULL,
-        timestamp TEXT NOT NULL,
-        message TEXT NOT NULL,
-        log_type TEXT NOT NULL,
-        progress INTEGER,
-        metadata TEXT,
-        FOREIGN KEY (research_id) REFERENCES research_history (id) ON DELETE CASCADE
-    )
-    ''')
-    # Check if the duration_seconds column exists, add it if missing
-    cursor.execute('PRAGMA table_info(research_history)')
-    columns = [column[1] for column in cursor.fetchall()]
-    if 'duration_seconds' not in columns:
-        print("Adding missing 'duration_seconds' column to research_history table")
-        cursor.execute('ALTER TABLE research_history ADD COLUMN duration_seconds INTEGER')
-    # Check if the progress column exists, add it if missing
-    if 'progress' not in columns:
-        print("Adding missing 'progress' column to research_history table")
-        cursor.execute('ALTER TABLE research_history ADD COLUMN progress INTEGER')
-    # Enable foreign key support
-    cursor.execute('PRAGMA foreign_keys = ON')
-    conn.commit()
-    conn.close()
-# Helper function to calculate duration between created_at and completed_at timestamps
-def calculate_duration(created_at_str):
-    """
-    Calculate duration in seconds between created_at timestamp and now.
-    Handles various timestamp formats and returns None if calculation fails.
-    """
-    if not created_at_str:
-        return None
-    now = datetime.utcnow()
-    duration_seconds = None
-    try:
-        # Proper parsing of ISO format
-        if 'T' in created_at_str:  # ISO format with T separator
-            start_time = datetime.fromisoformat(created_at_str)
-        else:  # Older format without T
-            # Try different formats
-            try:
-                start_time = datetime.strptime(created_at_str, '%Y-%m-%d %H:%M:%S.%f')
-            except ValueError:
-                try:
-                    start_time = datetime.strptime(created_at_str, '%Y-%m-%d %H:%M:%S')
-                except ValueError:
-                    # Last resort fallback
-                    start_time = datetime.fromisoformat(created_at_str.replace(' ', 'T'))
-        # Ensure we're comparing UTC times
-        duration_seconds = int((now - start_time).total_seconds())
-    except Exception as e:
-        print(f"Error calculating duration: {str(e)}")
-        # Fallback method if parsing fails
-        try:
-            start_time_fallback = parser.parse(created_at_str)
-            duration_seconds = int((now - start_time_fallback).total_seconds())
-        except:
-            print(f"Fallback duration calculation also failed for timestamp: {created_at_str}")
-    return duration_seconds
-# Add these helper functions after the calculate_duration function
-def add_log_to_db(research_id, message, log_type='info', progress=None, metadata=None):
-    """
-    Store a log entry in the database
-    Args:
-        research_id: ID of the research
-        message: Log message text
-        log_type: Type of log (info, error, milestone)
-        progress: Progress percentage (0-100)
-        metadata: Additional metadata as dictionary (will be stored as JSON)
-    """
-    try:
-        timestamp = datetime.utcnow().isoformat()
-        metadata_json = json.dumps(metadata) if metadata else None
-        conn = sqlite3.connect(DB_PATH)
-        cursor = conn.cursor()
-        cursor.execute(
-            'INSERT INTO research_logs (research_id, timestamp, message, log_type, progress, metadata) '
-            'VALUES (?, ?, ?, ?, ?, ?)',
-            (research_id, timestamp, message, log_type, progress, metadata_json)
-        )
-        conn.commit()
-        conn.close()
-        return True
-    except Exception as e:
-        print(f"Error adding log to database: {str(e)}")
-        print(traceback.format_exc())
-        return False
-def get_logs_for_research(research_id):
-    """
-    Retrieve all logs for a specific research ID
-    Args:
-        research_id: ID of the research
-    Returns:
-        List of log entries as dictionaries
-    """
-    try:
-        conn = sqlite3.connect(DB_PATH)
-        conn.row_factory = sqlite3.Row
-        cursor = conn.cursor()
-        cursor.execute(
-            'SELECT * FROM research_logs WHERE research_id = ? ORDER BY timestamp ASC',
-            (research_id,)
-        )
-        results = cursor.fetchall()
-        conn.close()
-        logs = []
-        for result in results:
-            log_entry = dict(result)
-            # Parse metadata JSON if it exists
-            if log_entry.get('metadata'):
-                try:
-                    log_entry['metadata'] = json.loads(log_entry['metadata'])
-                except:
-                    log_entry['metadata'] = {}
-            else:
-                log_entry['metadata'] = {}
-            # Convert entry for frontend consumption
-            formatted_entry = {
-                'time': log_entry['timestamp'],
-                'message': log_entry['message'],
-                'progress': log_entry['progress'],
-                'metadata': log_entry['metadata'],
-                'type': log_entry['log_type']
-            }
-            logs.append(formatted_entry)
-        return logs
-    except Exception as e:
-        print(f"Error retrieving logs from database: {str(e)}")
-        print(traceback.format_exc())
-        return []
-# Initialize the database on startup
-def initialize():
-    init_db()
-# Call initialize immediately when app is created
-initialize()
-# Route for index page - keep this at root level for easy access
-@app.route('/')
-def root_index():
-    return redirect(url_for('research.index'))
-# Update all routes with the research prefix
-@research_bp.route('/')
-def index():
-    return render_template('index.html')
-@research_bp.route('/static/<path:path>')
-def serve_static(path):
-    try:
-        print(f"Serving static file: {path}")
-        print(f"Static folder path: {app.static_folder}")
-        return send_from_directory(app.static_folder, path)
-    except Exception as e:
-        print(f"Error serving static file {path}: {str(e)}")
-        return f"Error serving file: {str(e)}", 404
-@research_bp.route('/api/history', methods=['GET'])
-def get_history():
-    """Get the research history"""
-    try:
-        conn = sqlite3.connect(DB_PATH)
-        conn.row_factory = sqlite3.Row
-        cursor = conn.cursor()
-        # Get all history records ordered by latest first
-        cursor.execute('SELECT * FROM research_history ORDER BY created_at DESC')
-        results = cursor.fetchall()
-        conn.close()
-        # Convert to list of dicts
-        history = []
-        for result in results:
-            item = dict(result)
-            # Ensure all keys exist with default values
-            if 'id' not in item:
-                item['id'] = None
-            if 'query' not in item:
-                item['query'] = 'Untitled Research'
-            if 'mode' not in item:
-                item['mode'] = 'quick'
-            if 'status' not in item:
-                item['status'] = 'unknown'
-            if 'created_at' not in item:
-                item['created_at'] = None
-            if 'completed_at' not in item:
-                item['completed_at'] = None
-            if 'duration_seconds' not in item:
-                item['duration_seconds'] = None
-            if 'report_path' not in item:
-                item['report_path'] = None
-            if 'metadata' not in item:
-                item['metadata'] = '{}'
-            if 'progress_log' not in item:
-                item['progress_log'] = '[]'
-            # Ensure timestamps are in ISO format
-            if item['created_at'] and 'T' not in item['created_at']:
-                try:
-                    # Convert to ISO format if it's not already
-                    dt = parser.parse(item['created_at'])
-                    item['created_at'] = dt.isoformat()
-                except:
-                    pass
-            if item['completed_at'] and 'T' not in item['completed_at']:
-                try:
-                    # Convert to ISO format if it's not already
-                    dt = parser.parse(item['completed_at'])
-                    item['completed_at'] = dt.isoformat()
-                except:
-                    pass
-            # Recalculate duration based on timestamps if it's null but both timestamps exist
-            if item['duration_seconds'] is None and item['created_at'] and item['completed_at']:
-                try:
-                    start_time = parser.parse(item['created_at'])
-                    end_time = parser.parse(item['completed_at'])
-                    item['duration_seconds'] = int((end_time - start_time).total_seconds())
-                except Exception as e:
-                    print(f"Error recalculating duration: {str(e)}")
-            history.append(item)
-        # Add CORS headers
-        response = make_response(jsonify(history))
-        response.headers.add('Access-Control-Allow-Origin', '*')
-        response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
-        response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
-        return response
-    except Exception as e:
-        print(f"Error getting history: {str(e)}")
-        print(traceback.format_exc())
-        # Return empty array with CORS headers
-        response = make_response(jsonify([]))
-        response.headers.add('Access-Control-Allow-Origin', '*')
-        response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
-        response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
-        return response
-@research_bp.route('/api/start_research', methods=['POST'])
-def start_research():
-    data = request.json
-    query = data.get('query')
-    mode = data.get('mode', 'quick')
-    if not query:
-        return jsonify({'status': 'error', 'message': 'Query is required'}), 400
-    # Check if there's any active research that's actually still running
-    if active_research:
-        # Verify each active research is still valid
-        stale_research_ids = []
-        for research_id, research_data in list(active_research.items()):
-            # Check database status
-            conn = sqlite3.connect(DB_PATH)
-            cursor = conn.cursor()
-            cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
-            result = cursor.fetchone()
-            conn.close()
-            # If the research doesn't exist in DB or is not in_progress, it's stale
-            if not result or result[0] != 'in_progress':
-                stale_research_ids.append(research_id)
-            # Also check if thread is still alive
-            elif not research_data.get('thread') or not research_data.get('thread').is_alive():
-                stale_research_ids.append(research_id)
-        # Clean up any stale research processes
-        for stale_id in stale_research_ids:
-            print(f"Cleaning up stale research process: {stale_id}")
-            if stale_id in active_research:
-                del active_research[stale_id]
-            if stale_id in termination_flags:
-                del termination_flags[stale_id]
-        # After cleanup, check if there's still active research
-        if active_research:
-            return jsonify({
-                'status': 'error',
-                'message': 'Another research is already in progress. Please wait for it to complete.'
-            }), 409
-    # Create a record in the database with explicit UTC timestamp
-    created_at = datetime.utcnow().isoformat()
-    conn = sqlite3.connect(DB_PATH)
-    cursor = conn.cursor()
-    cursor.execute(
-        'INSERT INTO research_history (query, mode, status, created_at, progress_log) VALUES (?, ?, ?, ?, ?)',
-        (query, mode, 'in_progress', created_at, json.dumps([{"time": created_at, "message": "Research started", "progress": 0}]))
-    )
-    research_id = cursor.lastrowid
-    conn.commit()
-    conn.close()
-    # Start research process in a background thread
-    thread = threading.Thread(
-        target=run_research_process,
-        args=(research_id, query, mode)
-    )
-    thread.daemon = True
-    thread.start()
-    active_research[research_id] = {
-        'thread': thread,
-        'progress': 0,
-        'status': 'in_progress',
-        'log': [{"time": created_at, "message": "Research started", "progress": 0}]
-    }
-    return jsonify({
-        'status': 'success',
-        'research_id': research_id
-    })
-@research_bp.route('/api/research/<int:research_id>')
-def get_research_status(research_id):
-    conn = sqlite3.connect(DB_PATH)
-    conn.row_factory = sqlite3.Row
-    cursor = conn.cursor()
-    cursor.execute('SELECT * FROM research_history WHERE id = ?', (research_id,))
-    result = dict(cursor.fetchone() or {})
-    conn.close()
-    if not result:
-        return jsonify({'status': 'error', 'message': 'Research not found'}), 404
-    # Add progress information
-    if research_id in active_research:
-        result['progress'] = active_research[research_id]['progress']
-        result['log'] = active_research[research_id]['log']
-    elif result.get('status') == 'completed':
-        result['progress'] = 100
-        try:
-            result['log'] = json.loads(result.get('progress_log', '[]'))
-        except:
-            result['log'] = []
-    else:
-        result['progress'] = 0
-        try:
-            result['log'] = json.loads(result.get('progress_log', '[]'))
-        except:
-            result['log'] = []
-    return jsonify(result)
+# Initialize logger
+logger = logging.getLogger(__name__)
-@research_bp.route('/api/research/<int:research_id>/details')
-def get_research_details(research_id):
-    """Get detailed progress log for a specific research"""
-    conn = sqlite3.connect(DB_PATH)
-    conn.row_factory = sqlite3.Row
-    cursor = conn.cursor()
-    cursor.execute('SELECT * FROM research_history WHERE id = ?', (research_id,))
-    result = dict(cursor.fetchone() or {})
-    conn.close()
-    if not result:
-        return jsonify({'status': 'error', 'message': 'Research not found'}), 404
-    # Get logs from the dedicated log database
-    logs = get_logs_for_research(research_id)
-    # If this is an active research, merge with any in-memory logs
-    if research_id in active_research:
-        # Use the logs from memory temporarily until they're saved to the database
-        memory_logs = active_research[research_id]['log']
-        # Filter out logs that are already in the database by timestamp
-        db_timestamps = {log['time'] for log in logs}
-        unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
-        # Add unique memory logs to our return list
-        logs.extend(unique_memory_logs)
-        # Sort logs by timestamp
-        logs.sort(key=lambda x: x['time'])
-    return jsonify({
-        'status': 'success',
-        'research_id': research_id,
-        'query': result.get('query'),
-        'mode': result.get('mode'),
-        'status': result.get('status'),
-        'progress': active_research.get(research_id, {}).get('progress', 100 if result.get('status') == 'completed' else 0),
-        'created_at': result.get('created_at'),
-        'completed_at': result.get('completed_at'),
-        'log': logs
-    })
+# Ensure data directory exists
+setup_data_dir()
-@research_bp.route('/api/report/<int:research_id>')
-def get_report(research_id):
-    conn = sqlite3.connect(DB_PATH)
-    conn.row_factory = sqlite3.Row
-    cursor = conn.cursor()
-    cursor.execute('SELECT * FROM research_history WHERE id = ?', (research_id,))
-    result = dict(cursor.fetchone() or {})
-    conn.close()
-    if not result or not result.get('report_path'):
-        return jsonify({'status': 'error', 'message': 'Report not found'}), 404
+# Run schema upgrades if database exists
+if os.path.exists(DB_PATH):
     try:
-        with open(result['report_path'], 'r', encoding='utf-8') as f:
-            content = f.read()
-        return jsonify({
-            'status': 'success',
-            'content': content,
-            'metadata': json.loads(result.get('metadata', '{}'))
-        })
-    except Exception as e:
-        return jsonify({'status': 'error', 'message': str(e)}), 500
-@research_bp.route('/research/details/<int:research_id>')
-def research_details_page(research_id):
-    """Render the research details page"""
-    return render_template('index.html')
-@socketio.on('connect')
-def handle_connect():
-    print(f"Client connected: {request.sid}")
+        logger.info("Running schema upgrades on existing database")
+        from .database.schema_upgrade import run_schema_upgrades
-@socketio.on('disconnect')
-def handle_disconnect():
-    try:
-        print(f"Client disconnected: {request.sid}")
-        # Clean up subscriptions for this client
-        for research_id, subscribers in list(socket_subscriptions.items()):
-            if request.sid in subscribers:
-                subscribers.remove(request.sid)
-            if not subscribers:
-                socket_subscriptions.pop(research_id, None)
-                print(f"Removed empty subscription for research {research_id}")
+        run_schema_upgrades()
     except Exception as e:
-        print(f"Error handling disconnect: {e}")
-@socketio.on('subscribe_to_research')
-def handle_subscribe(data):
-    research_id = data.get('research_id')
-    if research_id:
-        # First check if this research is still active
-        conn = sqlite3.connect(DB_PATH)
-        cursor = conn.cursor()
-        cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
-        result = cursor.fetchone()
-        conn.close()
-        # Only allow subscription to valid research
-        if result:
-            status = result[0]
-            # Initialize subscription set if needed
-            if research_id not in socket_subscriptions:
-                socket_subscriptions[research_id] = set()
-            # Add this client to the subscribers
-            socket_subscriptions[research_id].add(request.sid)
-            print(f"Client {request.sid} subscribed to research {research_id}")
-            # Send current status immediately if available
-            if research_id in active_research:
-                progress = active_research[research_id]['progress']
-                latest_log = active_research[research_id]['log'][-1] if active_research[research_id]['log'] else None
-                if latest_log:
-                    emit(f'research_progress_{research_id}', {
-                        'progress': progress,
-                        'message': latest_log.get('message', 'Processing...'),
-                        'status': 'in_progress',
-                        'log_entry': latest_log
-                    })
-            elif status in ['completed', 'failed', 'suspended']:
-                # Send final status for completed research
-                emit(f'research_progress_{research_id}', {
-                    'progress': 100 if status == 'completed' else 0,
-                    'message': 'Research completed successfully' if status == 'completed' else
-                               'Research failed' if status == 'failed' else 'Research was suspended',
-                    'status': status,
-                    'log_entry': {
-                        'time': datetime.utcnow().isoformat(),
-                        'message': f'Research is {status}',
-                        'progress': 100 if status == 'completed' else 0,
-                        'metadata': {'phase': 'complete' if status == 'completed' else 'error'}
-                    }
-                })
-        else:
-            # Research not found
-            emit('error', {'message': f'Research ID {research_id} not found'})
-@socketio.on_error
-def handle_socket_error(e):
-    print(f"Socket.IO error: {str(e)}")
-    # Don't propagate exceptions to avoid crashing the server
-    return False
-@socketio.on_error_default
-def handle_default_error(e):
-    print(f"Unhandled Socket.IO error: {str(e)}")
-    # Don't propagate exceptions to avoid crashing the server
-    return False
-# Function to clean up resources for a completed research
-def cleanup_research_resources(research_id):
-    """Clean up resources for a completed research"""
-    print(f"Cleaning up resources for research {research_id}")
-    # Get the current status from the database to determine the final status message
-    current_status = "completed"  # Default
-    try:
-        conn = sqlite3.connect(DB_PATH)
-        cursor = conn.cursor()
-        cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
-        result = cursor.fetchone()
-        if result and result[0]:
-            current_status = result[0]
-        conn.close()
-    except Exception as e:
-        print(f"Error retrieving research status during cleanup: {e}")
-    # Remove from active research
-    if research_id in active_research:
-        del active_research[research_id]
-    # Remove from termination flags
-    if research_id in termination_flags:
-        del termination_flags[research_id]
-    # Send a final message to any remaining subscribers with explicit status
-    if research_id in socket_subscriptions and socket_subscriptions[research_id]:
-        # Use the proper status message based on database status
-        if current_status == 'suspended' or current_status == 'failed':
-            final_message = {
-                'status': current_status,
-                'message': f'Research was {current_status}',
-                'progress': 0,  # For suspended research, show 0% not 100%
-            }
-        else:
-            final_message = {
-                'status': 'completed',
-                'message': 'Research process has ended and resources have been cleaned up',
-                'progress': 100,
-            }
-        try:
-            print(f"Sending final {current_status} socket message for research {research_id}")
-            # Use emit to all, not just subscribers
-            socketio.emit(f'research_progress_{research_id}', final_message)
-            # Also emit to specific subscribers
-            for sid in socket_subscriptions[research_id]:
-                try:
-                    socketio.emit(
-                        f'research_progress_{research_id}',
-                        final_message,
-                        room=sid
-                    )
-                except Exception as sub_err:
-                    print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
-        except Exception as e:
-            print(f"Error sending final cleanup message: {e}")
-    # Don't immediately remove subscriptions - let clients disconnect naturally
+        logger.error(f"Error running schema upgrades: {e}")
+        logger.warning("Continuing without schema upgrades")
-def run_research_process(research_id, query, mode):
-    """Run the research process in the background for a given research ID"""
-    try:
-        # Check if this research has been terminated before we even start
-        if research_id in termination_flags and termination_flags[research_id]:
-            print(f"Research {research_id} was terminated before starting")
-            cleanup_research_resources(research_id)
-            return
-        print(f"Starting research process for ID {research_id}, query: {query}")
-        # Set up the AI Context Manager
-        output_dir = os.path.join(OUTPUT_DIR, f"research_{research_id}")
-        os.makedirs(output_dir, exist_ok=True)
-        # Set up progress callback
-        def progress_callback(message, progress_percent, metadata):
-            # FREQUENT TERMINATION CHECK: Check for termination at each callback
-            if research_id in termination_flags and termination_flags[research_id]:
-                # Explicitly set the status to suspended in the database
-                conn = sqlite3.connect(DB_PATH)
-                cursor = conn.cursor()
-                # Calculate duration up to termination point - using UTC consistently
-                now = datetime.utcnow()
-                completed_at = now.isoformat()
-                # Get the start time from the database
-                cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
-                result = cursor.fetchone()
-                # Calculate the duration
-                duration_seconds = calculate_duration(result[0]) if result and result[0] else None
-                # Update the database with suspended status
-                cursor.execute(
-                    'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
-                    ('suspended', completed_at, duration_seconds, research_id)
-                )
-                conn.commit()
-                conn.close()
-                # Clean up resources
-                cleanup_research_resources(research_id)
-                # Raise exception to exit the process
-                raise Exception("Research was terminated by user")
-            timestamp = datetime.utcnow().isoformat()
-            # Adjust progress based on research mode
-            adjusted_progress = progress_percent
-            if mode == 'detailed' and metadata.get('phase') == 'output_generation':
-                # For detailed mode, we need to adjust the progress range
-                # because detailed reports take longer after the search phase
-                adjusted_progress = min(80, progress_percent)
-            elif mode == 'detailed' and metadata.get('phase') == 'report_generation':
-                # Scale the progress from 80% to 95% for the report generation phase
-                # Map progress_percent values (0-100%) to the (80-95%) range
-                if progress_percent is not None:
-                    normalized = progress_percent / 100
-                    adjusted_progress = 80 + (normalized * 15)
-            elif mode == 'quick' and metadata.get('phase') == 'output_generation':
-                # For quick mode, ensure we're at least at 85% during output generation
-                adjusted_progress = max(85, progress_percent)
-                # Map any further progress within output_generation to 85-95% range
-                if progress_percent is not None and progress_percent > 0:
-                    normalized = progress_percent / 100
-                    adjusted_progress = 85 + (normalized * 10)
-            # Don't let progress go backwards
-            if research_id in active_research and adjusted_progress is not None:
-                current_progress = active_research[research_id].get('progress', 0)
-                adjusted_progress = max(current_progress, adjusted_progress)
-            log_entry = {
-                "time": timestamp,
-                "message": message,
-                "progress": adjusted_progress,
-                "metadata": metadata
-            }
-            # Check if termination was requested
-            if research_id in termination_flags and termination_flags[research_id]:
-                # Explicitly set the status to suspended in the database
-                conn = sqlite3.connect(DB_PATH)
-                cursor = conn.cursor()
-                # Calculate duration up to termination point - using UTC consistently
-                now = datetime.utcnow()
-                completed_at = now.isoformat()
-                # Get the start time from the database
-                cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
-                result = cursor.fetchone()
-                # Calculate the duration
-                duration_seconds = calculate_duration(result[0]) if result and result[0] else None
-                # Update the database with suspended status
-                cursor.execute(
-                    'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
-                    ('suspended', completed_at, duration_seconds, research_id)
-                )
-                conn.commit()
-                conn.close()
-                # Clean up resources
-                cleanup_research_resources(research_id)
-                # Raise exception to exit the process
-                raise Exception("Research was terminated by user")
-            # Update active research record
-            if research_id in active_research:
-                active_research[research_id]['log'].append(log_entry)
-                if adjusted_progress is not None:
-                    active_research[research_id]['progress'] = adjusted_progress
-                # Determine log type for database storage
-                log_type = 'info'
-                if metadata and metadata.get('phase'):
-                    phase = metadata.get('phase')
-                    if phase in ['complete', 'iteration_complete']:
-                        log_type = 'milestone'
-                    elif phase == 'error' or 'error' in message.lower():
-                        log_type = 'error'
-                # Always save logs to the new research_logs table
-                add_log_to_db(
-                    research_id,
-                    message,
-                    log_type=log_type,
-                    progress=adjusted_progress,
-                    metadata=metadata
-                )
-                # Update progress in the research_history table (for backward compatibility)
-                conn = sqlite3.connect(DB_PATH)
-                cursor = conn.cursor()
-                # Update the progress and log separately to avoid race conditions with reading/writing the log
-                if adjusted_progress is not None:
-                    cursor.execute(
-                        'UPDATE research_history SET progress = ? WHERE id = ?',
-                        (adjusted_progress, research_id)
-                    )
-                # Add the log entry to the progress_log
-                cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
-                log_result = cursor.fetchone()
-                if log_result:
-                    try:
-                        current_log = json.loads(log_result[0])
-                    except:
-                        current_log = []
-                    current_log.append(log_entry)
-                    cursor.execute(
-                        'UPDATE research_history SET progress_log = ? WHERE id = ?',
-                        (json.dumps(current_log), research_id)
-                    )
-                conn.commit()
-                conn.close()
-                # Emit a socket event
-                try:
-                    # Basic event data
-                    event_data = {
-                        'message': message,
-                        'progress': adjusted_progress
-                    }
-                    # Add log entry in full format for detailed logging on client
-                    if metadata:
-                        event_data['log_entry'] = log_entry
-                    # Send to all subscribers and broadcast channel
-                    socketio.emit(f'research_progress_{research_id}', event_data)
-                    if research_id in socket_subscriptions:
-                        for sid in socket_subscriptions[research_id]:
-                            try:
-                                socketio.emit(
-                                    f'research_progress_{research_id}',
-                                    event_data,
-                                    room=sid
-                                )
-                            except Exception as err:
-                                print(f"Error emitting to subscriber {sid}: {str(err)}")
-                except Exception as e:
-                    print(f"Socket emit error (non-critical): {str(e)}")
-        # FUNCTION TO CHECK TERMINATION DURING LONG-RUNNING OPERATIONS
-        def check_termination():
-            if research_id in termination_flags and termination_flags[research_id]:
-                # Explicitly set the status to suspended in the database
-                conn = sqlite3.connect(DB_PATH)
-                cursor = conn.cursor()
-                now = datetime.utcnow()
-                completed_at = now.isoformat()
-                cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
-                result = cursor.fetchone()
-                duration_seconds = calculate_duration(result[0]) if result and result[0] else None
-                cursor.execute(
-                    'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
-                    ('suspended', completed_at, duration_seconds, research_id)
-                )
-                conn.commit()
-                conn.close()
-                # Clean up resources
-                cleanup_research_resources(research_id)
-                # Raise exception to exit the process
-                raise Exception("Research was terminated by user during long-running operation")
-            return False  # Not terminated
-        # Set the progress callback in the system
-        system = AdvancedSearchSystem()
-        system.set_progress_callback(progress_callback)
-        # Run the search
-        progress_callback("Starting research process", 5, {"phase": "init"})
-        try:
-            results = system.analyze_topic(query)
-            if mode == 'quick':
-                progress_callback("Search complete, preparing to generate summary...", 85, {"phase": "output_generation"})
-            else:
-                progress_callback("Search complete, generating output", 80, {"phase": "output_generation"})
-        except Exception as search_error:
-            # Better handling of specific search errors
-            error_message = str(search_error)
-            error_type = "unknown"
-            # Extract error details for common issues
-            if "status code: 503" in error_message:
-                error_message = "Ollama AI service is unavailable (HTTP 503). Please check that Ollama is running properly on your system."
-                error_type = "ollama_unavailable"
-            elif "status code: 404" in error_message:
-                error_message = "Ollama model not found (HTTP 404). Please check that you have pulled the required model."
-                error_type = "model_not_found"
-            elif "status code:" in error_message:
-                # Extract the status code for other HTTP errors
-                status_code = error_message.split("status code:")[1].strip()
-                error_message = f"API request failed with status code {status_code}. Please check your configuration."
-                error_type = "api_error"
-            elif "connection" in error_message.lower():
-                error_message = "Connection error. Please check that your LLM service (Ollama/API) is running and accessible."
-                error_type = "connection_error"
-            # Raise with improved error message
-            raise Exception(f"{error_message} (Error type: {error_type})")
-        # Generate output based on mode
-        if mode == 'quick':
-            # Quick Summary
-            if results.get('findings'):
-                raw_formatted_findings = results['formatted_findings']
-                logger.info(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
-                try:
-                    clean_markdown = raw_formatted_findings
-                    # ADDED CODE: Convert debug output to clean markdown
-                    #clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
-                    print(f"Successfully converted to clean markdown of length: {len(clean_markdown)}")
-                    # First send a progress update for generating the summary
-                    progress_callback("Generating clean summary from research data...", 90, {"phase": "output_generation"})
-                    # Save as markdown file
-                    output_dir = "research_outputs"
-                    if not os.path.exists(output_dir):
-                        os.makedirs(output_dir)
-                    safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
-                    safe_query = safe_query.replace(" ", "_").lower()
-                    report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
-                    # Send progress update for writing to file
-                    progress_callback("Writing research report to file...", 95, {"phase": "report_complete"})
-                    print(f"Writing report to: {report_path}")
-                    with open(report_path, "w", encoding="utf-8") as f:
-                        f.write("# Quick Research Summary\n\n")
-                        f.write(f"Query: {query}\n\n")
-                        f.write(clean_markdown)  # Use clean markdown instead of raw findings
-                        f.write("\n\n## Research Metrics\n")
-                        f.write(f"- Search Iterations: {results['iterations']}\n")
-                        f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
-                    # Update database
-                    metadata = {
-                        'iterations': results['iterations'],
-                        'generated_at': datetime.utcnow().isoformat()
-                    }
-                    # Calculate duration in seconds - using UTC consistently
-                    now = datetime.utcnow()
-                    completed_at = now.isoformat()
-                    print(f"Updating database for research_id: {research_id}")
-                    # Get the start time from the database
-                    conn = sqlite3.connect(DB_PATH)
-                    cursor = conn.cursor()
-                    cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
-                    result = cursor.fetchone()
-                    # Use the helper function for consistent duration calculation
-                    duration_seconds = calculate_duration(result[0])
-                    # Update the record
-                    cursor.execute(
-                        'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
-                        ('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
-                    )
-                    conn.commit()
-                    conn.close()
-                    print(f"Database updated successfully for research_id: {research_id}")
-                    # Send the final completion message
-                    progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
-                    # Clean up resources
-                    print(f"Cleaning up resources for research_id: {research_id}")
-                    cleanup_research_resources(research_id)
-                    print(f"Resources cleaned up for research_id: {research_id}")
-                except Exception as inner_e:
-                    print(f"Error during quick summary generation: {str(inner_e)}")
-                    print(traceback.format_exc())
-                    raise Exception(f"Error generating quick summary: {str(inner_e)}")
-            else:
-                raise Exception("No research findings were generated. Please try again.")
-        else:
-            # Full Report
-            progress_callback("Generating detailed report...", 85, {"phase": "report_generation"})
-            report_generator = IntegratedReportGenerator()
-            final_report = report_generator.generate_report(results, query)
-            progress_callback("Report generation complete", 95, {"phase": "report_complete"})
-            # Save as markdown file
-            output_dir = "research_outputs"
-            if not os.path.exists(output_dir):
-                os.makedirs(output_dir)
-            safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
-            safe_query = safe_query.replace(" ", "_").lower()
-            report_path = os.path.join(output_dir, f"detailed_report_{safe_query}.md")
-            with open(report_path, "w", encoding="utf-8") as f:
-                f.write(final_report['content'])
-            # Update database
-            metadata = final_report['metadata']
-            metadata['iterations'] = results['iterations']
-            # Calculate duration in seconds - using UTC consistently
-            now = datetime.utcnow()
-            completed_at = now.isoformat()
-            # Get the start time from the database
-            conn = sqlite3.connect(DB_PATH)
-            cursor = conn.cursor()
-            cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
-            result = cursor.fetchone()
-            # Use the helper function for consistent duration calculation
-            duration_seconds = calculate_duration(result[0])
-            cursor.execute(
-                'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
-                ('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
-            )
-            conn.commit()
-            conn.close()
-            progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
-            # Clean up - moved to a separate function for reuse
-            cleanup_research_resources(research_id)
-    except Exception as e:
-        # Handle error
-        error_message = f"Research failed: {str(e)}"
-        print(f"Research error: {error_message}")
-        try:
-            # Check for common Ollama error patterns in the exception and provide more user-friendly errors
-            user_friendly_error = str(e)
-            error_context = {}
-            if "Error type: ollama_unavailable" in user_friendly_error:
-                user_friendly_error = "Ollama AI service is unavailable. Please check that Ollama is running properly on your system."
-                error_context = {"solution": "Start Ollama with 'ollama serve' or check if it's installed correctly."}
-            elif "Error type: model_not_found" in user_friendly_error:
-                user_friendly_error = "Required Ollama model not found. Please pull the model first."
-                error_context = {"solution": "Run 'ollama pull mistral' to download the required model."}
-            elif "Error type: connection_error" in user_friendly_error:
-                user_friendly_error = "Connection error with LLM service. Please check that your AI service is running."
-                error_context = {"solution": "Ensure Ollama or your API service is running and accessible."}
-            elif "Error type: api_error" in user_friendly_error:
-                # Keep the original error message as it's already improved
-                error_context = {"solution": "Check API configuration and credentials."}
-            # Update metadata with more context about the error
-            metadata = {
-                "phase": "error",
-                "error": user_friendly_error
-            }
-            if error_context:
-                metadata.update(error_context)
-            progress_callback(user_friendly_error, None, metadata)
-            conn = sqlite3.connect(DB_PATH)
-            cursor = conn.cursor()
-            # If termination was requested, mark as suspended instead of failed
-            status = 'suspended' if (research_id in termination_flags and termination_flags[research_id]) else 'failed'
-            message = "Research was terminated by user" if status == 'suspended' else user_friendly_error
-            # Calculate duration up to termination point - using UTC consistently
-            now = datetime.utcnow()
-            completed_at = now.isoformat()
-            # Get the start time from the database
-            duration_seconds = None
-            cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
-            result = cursor.fetchone()
-            # Use the helper function for consistent duration calculation
-            if result and result[0]:
-                duration_seconds = calculate_duration(result[0])
-            cursor.execute(
-                'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, metadata = ? WHERE id = ?',
-                (status, completed_at, duration_seconds, json.dumps(metadata), research_id)
-            )
-            conn.commit()
-            conn.close()
-            try:
-                socketio.emit(f'research_progress_{research_id}', {
-                    'status': status,
-                    'error': message
-                })
-                # Also notify specific subscribers
-                if research_id in socket_subscriptions and socket_subscriptions[research_id]:
-                    for sid in socket_subscriptions[research_id]:
-                        try:
-                            socketio.emit(
-                                f'research_progress_{research_id}',
-                                {'status': status, 'error': message},
-                                room=sid
-                            )
-                        except Exception as sub_err:
-                            print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
-            except Exception as socket_error:
-                print(f"Failed to emit error via socket: {str(socket_error)}")
-        except Exception as inner_e:
-            print(f"Error in error handler: {str(inner_e)}")
-        # Clean up resources - moved to a separate function for reuse
-        cleanup_research_resources(research_id)
-@research_bp.route('/api/research/<int:research_id>/terminate', methods=['POST'])
-def terminate_research(research_id):
-    """Terminate an in-progress research process"""
-    # Check if the research exists and is in progress
-    conn = sqlite3.connect(DB_PATH)
-    cursor = conn.cursor()
-    cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
-    result = cursor.fetchone()
-    if not result:
-        conn.close()
-        return jsonify({'status': 'error', 'message': 'Research not found'}), 404
-    status = result[0]
-    # If it's not in progress, return an error
-    if status != 'in_progress':
-        conn.close()
-        return jsonify({'status': 'error', 'message': 'Research is not in progress'}), 400
-    # Check if it's in the active_research dict
-    if research_id not in active_research:
-        # Update the status in the database
-        cursor.execute('UPDATE research_history SET status = ? WHERE id = ?', ('suspended', research_id))
-        conn.commit()
-        conn.close()
-        return jsonify({'status': 'success', 'message': 'Research terminated'})
-    # Set the termination flag
-    termination_flags[research_id] = True
-    # Log the termination request - using UTC timestamp
-    timestamp = datetime.utcnow().isoformat()
-    termination_message = "Research termination requested by user"
-    current_progress = active_research[research_id]['progress']
-    # Create log entry
-    log_entry = {
-        "time": timestamp,
-        "message": termination_message,
-        "progress": current_progress,
-        "metadata": {"phase": "termination"}
-    }
-    # Add to in-memory log
-    active_research[research_id]['log'].append(log_entry)
-    # Add to database log
-    add_log_to_db(
-        research_id,
-        termination_message,
-        log_type='milestone',
-        progress=current_progress,
-        metadata={"phase": "termination"}
-    )
-    # Update the log in the database (old way for backward compatibility)
-    cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
-    log_result = cursor.fetchone()
-    if log_result:
-        try:
-            current_log = json.loads(log_result[0])
-        except:
-            current_log = []
-        current_log.append(log_entry)
-        cursor.execute(
-            'UPDATE research_history SET progress_log = ? WHERE id = ?',
-            (json.dumps(current_log), research_id)
+# Check if we need to run database migration
+def check_migration_needed():
+    """Check if database migration is needed, based on presence of legacy files and absence of new DB"""
+    if not os.path.exists(DB_PATH):
+        # The new database doesn't exist, check if legacy databases exist
+        legacy_files_exist = os.path.exists(LEGACY_DEEP_RESEARCH_DB) or os.path.exists(
+            LEGACY_RESEARCH_HISTORY_DB
         )
-    # IMMEDIATELY update the status to 'suspended' to avoid race conditions
-    cursor.execute('UPDATE research_history SET status = ? WHERE id = ?', ('suspended', research_id))
-    conn.commit()
-    conn.close()
-    # Emit a socket event for the termination request
-    try:
-        event_data = {
-            'status': 'suspended',  # Changed from 'terminating' to 'suspended'
-            'message': 'Research was suspended by user request'
-        }
-        socketio.emit(f'research_progress_{research_id}', event_data)
-        if research_id in socket_subscriptions and socket_subscriptions[research_id]:
-            for sid in socket_subscriptions[research_id]:
-                try:
-                    socketio.emit(
-                        f'research_progress_{research_id}',
-                        event_data,
-                        room=sid
-                    )
-                except Exception as err:
-                    print(f"Error emitting to subscriber {sid}: {str(err)}")
-    except Exception as socket_error:
-        print(f"Socket emit error (non-critical): {str(socket_error)}")
-    return jsonify({'status': 'success', 'message': 'Research termination requested'})
-@research_bp.route('/api/research/<int:research_id>/delete', methods=['DELETE'])
-def delete_research(research_id):
-    """Delete a research record"""
-    conn = sqlite3.connect(DB_PATH)
-    cursor = conn.cursor()
-    # First check if the research exists and is not in progress
-    cursor.execute('SELECT status, report_path FROM research_history WHERE id = ?', (research_id,))
-    result = cursor.fetchone()
-    if not result:
-        conn.close()
-        return jsonify({'status': 'error', 'message': 'Research not found'}), 404
-    status, report_path = result
-    # Don't allow deleting research in progress
-    if status == 'in_progress' and research_id in active_research:
-        conn.close()
-        return jsonify({
-            'status': 'error',
-            'message': 'Cannot delete research that is in progress'
-        }), 400
-    # Delete report file if it exists
-    if report_path and os.path.exists(report_path):
-        try:
-            os.remove(report_path)
-        except Exception as e:
-            print(f"Error removing report file: {str(e)}")
-    # Delete the database record
-    cursor.execute('DELETE FROM research_history WHERE id = ?', (research_id,))
-    conn.commit()
-    conn.close()
-    return jsonify({'status': 'success'})
-@research_bp.route('/settings', methods=['GET'])
-def settings_page():
-    """Main settings dashboard with links to specialized config pages"""
-    return render_template('settings_dashboard.html')
-@research_bp.route('/settings/main', methods=['GET'])
-def main_config_page():
-    """Edit main configuration with search parameters"""
-    return render_template('main_config.html', main_file_path=MAIN_CONFIG_FILE)
-@research_bp.route('/settings/llm', methods=['GET'])
-def llm_config_page():
-    """Edit LLM configuration using raw file editor"""
-    return render_template('llm_config.html', llm_file_path=LLM_CONFIG_FILE)
-@research_bp.route('/settings/collections', methods=['GET'])
-def collections_config_page():
-    """Edit local collections configuration using raw file editor"""
-    return render_template('collections_config.html', collections_file_path=LOCAL_COLLECTIONS_FILE)
-@research_bp.route('/settings/api_keys', methods=['GET'])
-def api_keys_config_page():
-    """Edit API keys configuration"""
-    # Get the secrets file path
-    secrets_file = CONFIG_DIR / ".secrets.toml"
-    return render_template('api_keys_config.html', secrets_file_path=secrets_file)
-# Add to the imports section
-from local_deep_research.config import SEARCH_ENGINES_FILE
-# Add a new route for search engines configuration page
-@research_bp.route('/settings/search_engines', methods=['GET'])
-def search_engines_config_page():
-    """Edit search engines configuration using raw file editor"""
-    # Read the current config file
-    raw_config = ""
-    try:
-        with open(SEARCH_ENGINES_FILE, 'r') as f:
-            raw_config = f.read()
-    except Exception as e:
-        flash(f'Error reading search engines configuration: {str(e)}', 'error')
-        raw_config = "# Error reading configuration file"
-    # Get list of engine names for display
-    engine_names = []
-    try:
-        from local_deep_research.web_search_engines.search_engines_config import SEARCH_ENGINES
-        engine_names = list(SEARCH_ENGINES.keys())
-        engine_names.sort()  # Alphabetical order
-    except Exception as e:
-        logger.error(f"Error getting engine names: {e}")
-    return render_template('search_engines_config.html',
-                          search_engines_file_path=SEARCH_ENGINES_FILE,
-                          raw_config=raw_config,
-                          engine_names=engine_names)
-# Add a route to save search engines configuration
-@research_bp.route('/api/save_search_engines_config', methods=['POST'])
-def save_search_engines_config():
-    try:
-        data = request.get_json()
-        raw_config = data.get('raw_config', '')
-        # Validate TOML syntax
-        try:
-            toml.loads(raw_config)
-        except toml.TomlDecodeError as e:
-            return jsonify({'success': False, 'error': f'TOML syntax error: {str(e)}'})
-        # Ensure directory exists
-        os.makedirs(os.path.dirname(SEARCH_ENGINES_FILE), exist_ok=True)
-        # Create a backup first
-        backup_path = f"{SEARCH_ENGINES_FILE}.bak"
-        if os.path.exists(SEARCH_ENGINES_FILE):
-            import shutil
-            shutil.copy2(SEARCH_ENGINES_FILE, backup_path)
-        # Write new config
-        with open(SEARCH_ENGINES_FILE, 'w') as f:
-            f.write(raw_config)
-        return jsonify({'success': True})
-    except Exception as e:
-        return jsonify({'success': False, 'error': str(e)})
-# API endpoint to save raw LLM config
-@research_bp.route('/api/save_llm_config', methods=['POST'])
-def save_llm_config():
-    try:
-        data = request.get_json()
-        raw_config = data.get('raw_config', '')
-        # Validate Python syntax
-        try:
-            compile(raw_config, '<string>', 'exec')
-        except SyntaxError as e:
-            return jsonify({'success': False, 'error': f'Syntax error: {str(e)}'})
-        # Ensure directory exists
-        os.makedirs(os.path.dirname(LLM_CONFIG_FILE), exist_ok=True)
-        # Create a backup first
-        backup_path = f"{LLM_CONFIG_FILE}.bak"
-        if os.path.exists(LLM_CONFIG_FILE):
-            import shutil
-            shutil.copy2(LLM_CONFIG_FILE, backup_path)
-        # Write new config
-        with open(LLM_CONFIG_FILE, 'w') as f:
-            f.write(raw_config)
-        return jsonify({'success': True})
-    except Exception as e:
-        return jsonify({'success': False, 'error': str(e)})
-# API endpoint to save raw collections config
-@research_bp.route('/api/save_collections_config', methods=['POST'])
-def save_collections_config():
-    try:
-        data = request.get_json()
-        raw_config = data.get('raw_config', '')
-        # Validate TOML syntax
-        try:
-            toml.loads(raw_config)
-        except toml.TomlDecodeError as e:
-            return jsonify({'success': False, 'error': f'TOML syntax error: {str(e)}'})
-        # Ensure directory exists
-        os.makedirs(os.path.dirname(LOCAL_COLLECTIONS_FILE), exist_ok=True)
-        # Create a backup first
-        backup_path = f"{LOCAL_COLLECTIONS_FILE}.bak"
-        if os.path.exists(LOCAL_COLLECTIONS_FILE):
-            import shutil
-            shutil.copy2(LOCAL_COLLECTIONS_FILE, backup_path)
-        # Write new config
-        with open(LOCAL_COLLECTIONS_FILE, 'w') as f:
-            f.write(raw_config)
-        # Also trigger a reload in the collections system
-        try:
-            load_local_collections(reload=True)
-        except Exception as reload_error:
-            return jsonify({'success': True, 'warning': f'Config saved, but error reloading: {str(reload_error)}'})
-        return jsonify({'success': True})
-    except Exception as e:
-        return jsonify({'success': False, 'error': str(e)})
-# API endpoint to save raw main config
-@research_bp.route('/api/save_main_config', methods=['POST'])
-def save_raw_main_config():
-    try:
-        data = request.get_json()
-        raw_config = data.get('raw_config', '')
-        # Validate TOML syntax
-        try:
-            toml.loads(raw_config)
-        except toml.TomlDecodeError as e:
-            return jsonify({'success': False, 'error': f'TOML syntax error: {str(e)}'})
-        # Ensure directory exists
-        os.makedirs(os.path.dirname(MAIN_CONFIG_FILE), exist_ok=True)
-        # Create a backup first
-        backup_path = f"{MAIN_CONFIG_FILE}.bak"
-        if os.path.exists(MAIN_CONFIG_FILE):
-            import shutil
-            shutil.copy2(MAIN_CONFIG_FILE, backup_path)
-        # Write new config
-        with open(MAIN_CONFIG_FILE, 'w') as f:
-            f.write(raw_config)
-        return jsonify({'success': True})
-    except Exception as e:
-        return jsonify({'success': False, 'error': str(e)})
-@research_bp.route('/raw_config')
-def get_raw_config():
-    """Return the raw configuration file content"""
-    try:
-        # Determine which config file to load based on a query parameter
-        config_type = request.args.get('type', 'main')
-        if config_type == 'main':
-            config_path = os.path.join(app.config['CONFIG_DIR'], 'config.toml')
-            with open(config_path, 'r') as f:
-                return f.read()
-        elif config_type == 'llm':
-            config_path = os.path.join(app.config['CONFIG_DIR'], 'llm_config.py')
-            with open(config_path, 'r') as f:
-                return f.read()
-        elif config_type == 'collections':
-            config_path = os.path.join(app.config['CONFIG_DIR'], 'collections.toml')
-            with open(config_path, 'r') as f:
-                return f.read()
-        else:
-            return "Unknown configuration type", 400
-    except Exception as e:
-        return str(e), 500
-import os
-import subprocess
-import platform
-@research_bp.route('/open_file_location', methods=['POST'])
-def open_file_location():
-    file_path = request.form.get('file_path')
-    if not file_path:
-        flash('No file path provided', 'error')
-        return redirect(url_for('research.settings_page'))
-    # Get the directory containing the file
-    dir_path = os.path.dirname(os.path.abspath(file_path))
-    # Open the directory in the file explorer
-    try:
-        if platform.system() == "Windows":
-            subprocess.Popen(f'explorer "{dir_path}"')
-        elif platform.system() == "Darwin":  # macOS
-            subprocess.Popen(["open", dir_path])
-        else:  # Linux
-            subprocess.Popen(["xdg-open", dir_path])
-        flash(f'Opening folder: {dir_path}', 'success')
-    except Exception as e:
-        flash(f'Error opening folder: {str(e)}', 'error')
-    # Redirect back to the settings page
-    if 'llm' in file_path:
-        return redirect(url_for('research.llm_config_page'))
-    elif 'collections' in file_path:
-        return redirect(url_for('research.collections_config_page'))
-    else:
-        return redirect(url_for('research.main_config_page'))
-@research_bp.route('/api/research/<int:research_id>/logs')
-def get_research_logs(research_id):
-    """Get logs for a specific research ID"""
-    # First check if the research exists
-    conn = sqlite3.connect(DB_PATH)
-    conn.row_factory = sqlite3.Row
-    cursor = conn.cursor()
-    cursor.execute('SELECT id FROM research_history WHERE id = ?', (research_id,))
-    result = cursor.fetchone()
-    conn.close()
-    if not result:
-        return jsonify({'status': 'error', 'message': 'Research not found'}), 404
-    # Retrieve logs from the database
-    logs = get_logs_for_research(research_id)
-    # Add any current logs from memory if this is an active research
-    if research_id in active_research and active_research[research_id].get('log'):
-        # Use the logs from memory temporarily until they're saved to the database
-        memory_logs = active_research[research_id]['log']
-        # Filter out logs that are already in the database
-        # We'll compare timestamps to avoid duplicates
-        db_timestamps = {log['time'] for log in logs}
-        unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
-        # Add unique memory logs to our return list
-        logs.extend(unique_memory_logs)
-        # Sort logs by timestamp
-        logs.sort(key=lambda x: x['time'])
-    return jsonify({
-        'status': 'success',
-        'logs': logs
-    })
+        if legacy_files_exist:
+            logger.info(
+                "Legacy database files found, but ldr.db doesn't exist. Migration needed."
+            )
+            return True
+    return False
-# Register the blueprint
-app.register_blueprint(research_bp)
-# Also add the static route at the app level for compatibility
-@app.route('/static/<path:path>')
-def app_serve_static(path):
-    return send_from_directory(app.static_folder, path)
+# Create the Flask app and SocketIO instance
+app, socketio = create_app()
-# Add favicon route to prevent 404 errors
-@app.route('/favicon.ico')
-def favicon():
-    return send_from_directory(app.static_folder, 'favicon.ico', mimetype='image/x-icon')
-# Add this function to app.py
-def convert_debug_to_markdown(raw_text, query):
-    """
-    Convert the debug-formatted text to clean markdown.
-    Args:
-        raw_text: The raw formatted findings with debug symbols
-        query: Original research query
-    Returns:
-        Clean markdown formatted text
-    """
-    try:
-        print(f"Starting markdown conversion for query: {query}")
-        print(f"Raw text type: {type(raw_text)}")
-        # Handle None or empty input
-        if not raw_text:
-            print("WARNING: raw_text is empty or None")
-            return f"No detailed findings available for '{query}'."
-        # If there's a "DETAILED FINDINGS:" section, extract everything after it
-        if "DETAILED FINDINGS:" in raw_text:
-            print("Found DETAILED FINDINGS section")
-            detailed_index = raw_text.index("DETAILED FINDINGS:")
-            content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
-        else:
-            print("No DETAILED FINDINGS section found, using full text")
-            content = raw_text
-        # Remove divider lines with === symbols
-        lines_before = len(content.split("\n"))
-        content = "\n".join([line for line in content.split("\n")
-                            if not line.strip().startswith("===") and not line.strip() == "="*80])
-        lines_after = len(content.split("\n"))
-        print(f"Removed {lines_before - lines_after} divider lines")
-        # Remove SEARCH QUESTIONS BY ITERATION section
-        if "SEARCH QUESTIONS BY ITERATION:" in content:
-            print("Found SEARCH QUESTIONS BY ITERATION section")
-            search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
-            next_major_section = -1
-            for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
-                if marker in content[search_index:]:
-                    marker_pos = content.index(marker, search_index)
-                    if next_major_section == -1 or marker_pos < next_major_section:
-                        next_major_section = marker_pos
-            if next_major_section != -1:
-                print(f"Removing section from index {search_index} to {next_major_section}")
-                content = content[:search_index] + content[next_major_section:]
-            else:
-                # If no later section, just remove everything from SEARCH QUESTIONS onwards
-                print(f"Removing everything after index {search_index}")
-                content = content[:search_index].strip()
-        print(f"Final markdown length: {len(content.strip())}")
-        return content.strip()
-    except Exception as e:
-        print(f"Error in convert_debug_to_markdown: {str(e)}")
-        print(traceback.format_exc())
-        # Return a basic message with the original query as fallback
-        return f"# Research on {query}\n\nThere was an error formatting the research results."
 def main():
     """
     Entry point for the web application when run as a command.
     This function is needed for the package's entry point to work properly.
     """
-    # Import settings here to avoid circular imports
-    from local_deep_research.config import settings
+    # Check if migration is needed
+    if check_migration_needed():
+        logger.info(
+            "Database migration required. Run migrate_db.py before starting the application."
+        )
+        print("=" * 80)
+        print("DATABASE MIGRATION REQUIRED")
+        print(
+            "Legacy database files were found, but the new unified database doesn't exist."
+        )
+        print(
+            "Please run 'python -m src.local_deep_research.web.database.migrate_to_ldr_db' to migrate your data."
+        )
+        print(
+            "You can continue without migration, but your previous data won't be available."
+        )
+        print("=" * 80)
+        # If --auto-migrate flag is passed, run migration automatically
+        if "--auto-migrate" in sys.argv:
+            logger.info("Auto-migration flag detected, running migration...")
+            try:
+                from .database.migrate_to_ldr_db import migrate_to_ldr_db
+                success = migrate_to_ldr_db()
+                if success:
+                    logger.info("Database migration completed successfully.")
+                else:
+                    logger.warning("Database migration failed.")
+            except Exception as e:
+                logger.error(f"Error running database migration: {e}")
+                print(f"Error: {e}")
+                print("Please run migration manually.")
     # Get web server settings with defaults
-    port = settings.web.port
-    host = settings.web.host
-    debug = settings.web.debug
+    port = get_db_setting("web.port", settings.web.port)
+    host = get_db_setting("web.host", settings.web.host)
+    debug = get_db_setting("web.debug", settings.web.debug)
     # Check for OpenAI availability but don't import it unless necessary
     try:
-        import os
         api_key = os.environ.get("OPENAI_API_KEY")
         if api_key:
             try:
                 # Only try to import if we have an API key
                 import openai
                 openai.api_key = api_key
-                OPENAI_AVAILABLE = True
-                print("OpenAI integration is available")
+                logger.info("OpenAI integration is available")
             except ImportError:
-                print("OpenAI package not installed, integration disabled")
+                logger.info("OpenAI package not installed, integration disabled")
         else:
-            print("OPENAI_API_KEY not found in environment variables, OpenAI integration disabled")
+            logger.info(
+                "OPENAI_API_KEY not found in environment variables, OpenAI integration disabled"
+            )
     except Exception as e:
-        print(f"Error checking OpenAI availability: {e}")
+        logger.error(f"Error checking OpenAI availability: {e}")
+    logger.info(f"Starting web server on {host}:{port} (debug: {debug})")
     socketio.run(app, debug=debug, host=host, port=port, allow_unsafe_werkzeug=True)
-if __name__ == '__main__':
-    main()
+if __name__ == "__main__":
+    main()

local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

local-deep-research 0.1.26py3-none-any.whl → 0.2.2py3-none-any.whl