PyPI - local-deep-research - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

local-deep-research 0.1.0py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

local_deep_research/web/app.py CHANGED Viewed

@@ -90,6 +90,9 @@ termination_flags = {}
 # Database setup
 DB_PATH = 'research_history.db'
+# Output directory for research results
+OUTPUT_DIR = 'research_outputs'
 # Add Content Security Policy headers to allow Socket.IO to function
 @app.after_request
 def add_security_headers(response):
@@ -128,7 +131,6 @@ def handle_websocket_requests():
             # Return empty response to prevent further processing
             return '', 200
-# Initialize the database
 def init_db():
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
@@ -145,7 +147,22 @@ def init_db():
         duration_seconds INTEGER,
         report_path TEXT,
         metadata TEXT,
-        progress_log TEXT
+        progress_log TEXT,
+        progress INTEGER
+    )
+    ''')
+    # Create a dedicated table for research logs
+    cursor.execute('''
+    CREATE TABLE IF NOT EXISTS research_logs (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        research_id INTEGER NOT NULL,
+        timestamp TEXT NOT NULL,
+        message TEXT NOT NULL,
+        log_type TEXT NOT NULL,
+        progress INTEGER,
+        metadata TEXT,
+        FOREIGN KEY (research_id) REFERENCES research_history (id) ON DELETE CASCADE
     )
     ''')
@@ -157,9 +174,17 @@ def init_db():
         print("Adding missing 'duration_seconds' column to research_history table")
         cursor.execute('ALTER TABLE research_history ADD COLUMN duration_seconds INTEGER')
+    # Check if the progress column exists, add it if missing
+    if 'progress' not in columns:
+        print("Adding missing 'progress' column to research_history table")
+        cursor.execute('ALTER TABLE research_history ADD COLUMN progress INTEGER')
+    # Enable foreign key support
+    cursor.execute('PRAGMA foreign_keys = ON')
     conn.commit()
     conn.close()
 # Helper function to calculate duration between created_at and completed_at timestamps
 def calculate_duration(created_at_str):
     """
@@ -200,6 +225,88 @@ def calculate_duration(created_at_str):
     return duration_seconds
+# Add these helper functions after the calculate_duration function
+def add_log_to_db(research_id, message, log_type='info', progress=None, metadata=None):
+    """
+    Store a log entry in the database
+    Args:
+        research_id: ID of the research
+        message: Log message text
+        log_type: Type of log (info, error, milestone)
+        progress: Progress percentage (0-100)
+        metadata: Additional metadata as dictionary (will be stored as JSON)
+    """
+    try:
+        timestamp = datetime.utcnow().isoformat()
+        metadata_json = json.dumps(metadata) if metadata else None
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        cursor.execute(
+            'INSERT INTO research_logs (research_id, timestamp, message, log_type, progress, metadata) '
+            'VALUES (?, ?, ?, ?, ?, ?)',
+            (research_id, timestamp, message, log_type, progress, metadata_json)
+        )
+        conn.commit()
+        conn.close()
+        return True
+    except Exception as e:
+        print(f"Error adding log to database: {str(e)}")
+        print(traceback.format_exc())
+        return False
+def get_logs_for_research(research_id):
+    """
+    Retrieve all logs for a specific research ID
+    Args:
+        research_id: ID of the research
+    Returns:
+        List of log entries as dictionaries
+    """
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        conn.row_factory = sqlite3.Row
+        cursor = conn.cursor()
+        cursor.execute(
+            'SELECT * FROM research_logs WHERE research_id = ? ORDER BY timestamp ASC',
+            (research_id,)
+        )
+        results = cursor.fetchall()
+        conn.close()
+        logs = []
+        for result in results:
+            log_entry = dict(result)
+            # Parse metadata JSON if it exists
+            if log_entry.get('metadata'):
+                try:
+                    log_entry['metadata'] = json.loads(log_entry['metadata'])
+                except:
+                    log_entry['metadata'] = {}
+            else:
+                log_entry['metadata'] = {}
+            # Convert entry for frontend consumption
+            formatted_entry = {
+                'time': log_entry['timestamp'],
+                'message': log_entry['message'],
+                'progress': log_entry['progress'],
+                'metadata': log_entry['metadata'],
+                'type': log_entry['log_type']
+            }
+            logs.append(formatted_entry)
+        return logs
+    except Exception as e:
+        print(f"Error retrieving logs from database: {str(e)}")
+        print(traceback.format_exc())
+        return []
 # Initialize the database on startup
 def initialize():
     init_db()
@@ -320,12 +427,39 @@ def start_research():
     if not query:
         return jsonify({'status': 'error', 'message': 'Query is required'}), 400
-    # Check if there's any active research
+    # Check if there's any active research that's actually still running
     if active_research:
-        return jsonify({
-            'status': 'error',
-            'message': 'Another research is already in progress. Please wait for it to complete.'
-        }), 409
+        # Verify each active research is still valid
+        stale_research_ids = []
+        for research_id, research_data in list(active_research.items()):
+            # Check database status
+            conn = sqlite3.connect(DB_PATH)
+            cursor = conn.cursor()
+            cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
+            result = cursor.fetchone()
+            conn.close()
+            # If the research doesn't exist in DB or is not in_progress, it's stale
+            if not result or result[0] != 'in_progress':
+                stale_research_ids.append(research_id)
+            # Also check if thread is still alive
+            elif not research_data.get('thread') or not research_data.get('thread').is_alive():
+                stale_research_ids.append(research_id)
+        # Clean up any stale research processes
+        for stale_id in stale_research_ids:
+            print(f"Cleaning up stale research process: {stale_id}")
+            if stale_id in active_research:
+                del active_research[stale_id]
+            if stale_id in termination_flags:
+                del termination_flags[stale_id]
+        # After cleanup, check if there's still active research
+        if active_research:
+            return jsonify({
+                'status': 'error',
+                'message': 'Another research is already in progress. Please wait for it to complete.'
+            }), 409
     # Create a record in the database with explicit UTC timestamp
     created_at = datetime.utcnow().isoformat()
@@ -403,15 +537,23 @@ def get_research_details(research_id):
     if not result:
         return jsonify({'status': 'error', 'message': 'Research not found'}), 404
-    try:
-        # Get the progress log
-        progress_log = json.loads(result.get('progress_log', '[]'))
-    except:
-        progress_log = []
-    # If this is an active research, get the latest log
+    # Get logs from the dedicated log database
+    logs = get_logs_for_research(research_id)
+    # If this is an active research, merge with any in-memory logs
     if research_id in active_research:
-        progress_log = active_research[research_id]['log']
+        # Use the logs from memory temporarily until they're saved to the database
+        memory_logs = active_research[research_id]['log']
+        # Filter out logs that are already in the database by timestamp
+        db_timestamps = {log['time'] for log in logs}
+        unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
+        # Add unique memory logs to our return list
+        logs.extend(unique_memory_logs)
+        # Sort logs by timestamp
+        logs.sort(key=lambda x: x['time'])
     return jsonify({
         'status': 'success',
@@ -422,7 +564,7 @@ def get_research_details(research_id):
         'progress': active_research.get(research_id, {}).get('progress', 100 if result.get('status') == 'completed' else 0),
         'created_at': result.get('created_at'),
         'completed_at': result.get('completed_at'),
-        'log': progress_log
+        'log': logs
     })
 @research_bp.route('/api/report/<int:research_id>')
@@ -467,6 +609,7 @@ def handle_disconnect():
                 subscribers.remove(request.sid)
             if not subscribers:
                 socket_subscriptions.pop(research_id, None)
+                print(f"Removed empty subscription for research {research_id}")
     except Exception as e:
         print(f"Error handling disconnect: {e}")
@@ -474,23 +617,54 @@ def handle_disconnect():
 def handle_subscribe(data):
     research_id = data.get('research_id')
     if research_id:
-        if research_id not in socket_subscriptions:
-            socket_subscriptions[research_id] = set()
-        socket_subscriptions[research_id].add(request.sid)
-        print(f"Client {request.sid} subscribed to research {research_id}")
+        # First check if this research is still active
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
+        result = cursor.fetchone()
+        conn.close()
-        # Send current status immediately if available
-        if research_id in active_research:
-            progress = active_research[research_id]['progress']
-            latest_log = active_research[research_id]['log'][-1] if active_research[research_id]['log'] else None
+        # Only allow subscription to valid research
+        if result:
+            status = result[0]
+            # Initialize subscription set if needed
+            if research_id not in socket_subscriptions:
+                socket_subscriptions[research_id] = set()
+            # Add this client to the subscribers
+            socket_subscriptions[research_id].add(request.sid)
+            print(f"Client {request.sid} subscribed to research {research_id}")
-            if latest_log:
+            # Send current status immediately if available
+            if research_id in active_research:
+                progress = active_research[research_id]['progress']
+                latest_log = active_research[research_id]['log'][-1] if active_research[research_id]['log'] else None
+                if latest_log:
+                    emit(f'research_progress_{research_id}', {
+                        'progress': progress,
+                        'message': latest_log.get('message', 'Processing...'),
+                        'status': 'in_progress',
+                        'log_entry': latest_log
+                    })
+            elif status in ['completed', 'failed', 'suspended']:
+                # Send final status for completed research
                 emit(f'research_progress_{research_id}', {
-                    'progress': progress,
-                    'message': latest_log.get('message', 'Processing...'),
-                    'status': 'in_progress',
-                    'log_entry': latest_log
+                    'progress': 100 if status == 'completed' else 0,
+                    'message': 'Research completed successfully' if status == 'completed' else
+                               'Research failed' if status == 'failed' else 'Research was suspended',
+                    'status': status,
+                    'log_entry': {
+                        'time': datetime.utcnow().isoformat(),
+                        'message': f'Research is {status}',
+                        'progress': 100 if status == 'completed' else 0,
+                        'metadata': {'phase': 'complete' if status == 'completed' else 'error'}
+                    }
                 })
+        else:
+            # Research not found
+            emit('error', {'message': f'Research ID {research_id} not found'})
 @socketio.on_error
 def handle_socket_error(e):
@@ -504,88 +678,324 @@ def handle_default_error(e):
     # Don't propagate exceptions to avoid crashing the server
     return False
+# Function to clean up resources for a completed research
+def cleanup_research_resources(research_id):
+    """Clean up resources for a completed research"""
+    print(f"Cleaning up resources for research {research_id}")
+    # Get the current status from the database to determine the final status message
+    current_status = "completed"  # Default
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
+        result = cursor.fetchone()
+        if result and result[0]:
+            current_status = result[0]
+        conn.close()
+    except Exception as e:
+        print(f"Error retrieving research status during cleanup: {e}")
+    # Remove from active research
+    if research_id in active_research:
+        del active_research[research_id]
+    # Remove from termination flags
+    if research_id in termination_flags:
+        del termination_flags[research_id]
+    # Send a final message to any remaining subscribers with explicit status
+    if research_id in socket_subscriptions and socket_subscriptions[research_id]:
+        # Use the proper status message based on database status
+        if current_status == 'suspended' or current_status == 'failed':
+            final_message = {
+                'status': current_status,
+                'message': f'Research was {current_status}',
+                'progress': 0,  # For suspended research, show 0% not 100%
+            }
+        else:
+            final_message = {
+                'status': 'completed',
+                'message': 'Research process has ended and resources have been cleaned up',
+                'progress': 100,
+            }
+        try:
+            print(f"Sending final {current_status} socket message for research {research_id}")
+            # Use emit to all, not just subscribers
+            socketio.emit(f'research_progress_{research_id}', final_message)
+            # Also emit to specific subscribers
+            for sid in socket_subscriptions[research_id]:
+                try:
+                    socketio.emit(
+                        f'research_progress_{research_id}',
+                        final_message,
+                        room=sid
+                    )
+                except Exception as sub_err:
+                    print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
+        except Exception as e:
+            print(f"Error sending final cleanup message: {e}")
+    # Don't immediately remove subscriptions - let clients disconnect naturally
 def run_research_process(research_id, query, mode):
+    """Run the research process in the background for a given research ID"""
     try:
-        system = AdvancedSearchSystem()
+        # Check if this research has been terminated before we even start
+        if research_id in termination_flags and termination_flags[research_id]:
+            print(f"Research {research_id} was terminated before starting")
+            cleanup_research_resources(research_id)
+            return
+        print(f"Starting research process for ID {research_id}, query: {query}")
+        # Set up the AI Context Manager
+        output_dir = os.path.join(OUTPUT_DIR, f"research_{research_id}")
+        os.makedirs(output_dir, exist_ok=True)
         # Set up progress callback
         def progress_callback(message, progress_percent, metadata):
+            # FREQUENT TERMINATION CHECK: Check for termination at each callback
+            if research_id in termination_flags and termination_flags[research_id]:
+                # Explicitly set the status to suspended in the database
+                conn = sqlite3.connect(DB_PATH)
+                cursor = conn.cursor()
+                # Calculate duration up to termination point - using UTC consistently
+                now = datetime.utcnow()
+                completed_at = now.isoformat()
+                # Get the start time from the database
+                cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
+                result = cursor.fetchone()
+                # Calculate the duration
+                duration_seconds = calculate_duration(result[0]) if result and result[0] else None
+                # Update the database with suspended status
+                cursor.execute(
+                    'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
+                    ('suspended', completed_at, duration_seconds, research_id)
+                )
+                conn.commit()
+                conn.close()
+                # Clean up resources
+                cleanup_research_resources(research_id)
+                # Raise exception to exit the process
+                raise Exception("Research was terminated by user")
             timestamp = datetime.utcnow().isoformat()
+            # Adjust progress based on research mode
+            adjusted_progress = progress_percent
+            if mode == 'detailed' and metadata.get('phase') == 'output_generation':
+                # For detailed mode, we need to adjust the progress range
+                # because detailed reports take longer after the search phase
+                adjusted_progress = min(80, progress_percent)
+            elif mode == 'detailed' and metadata.get('phase') == 'report_generation':
+                # Scale the progress from 80% to 95% for the report generation phase
+                # Map progress_percent values (0-100%) to the (80-95%) range
+                if progress_percent is not None:
+                    normalized = progress_percent / 100
+                    adjusted_progress = 80 + (normalized * 15)
+            elif mode == 'quick' and metadata.get('phase') == 'output_generation':
+                # For quick mode, ensure we're at least at 85% during output generation
+                adjusted_progress = max(85, progress_percent)
+                # Map any further progress within output_generation to 85-95% range
+                if progress_percent is not None and progress_percent > 0:
+                    normalized = progress_percent / 100
+                    adjusted_progress = 85 + (normalized * 10)
+            # Don't let progress go backwards
+            if research_id in active_research and adjusted_progress is not None:
+                current_progress = active_research[research_id].get('progress', 0)
+                adjusted_progress = max(current_progress, adjusted_progress)
             log_entry = {
                 "time": timestamp,
                 "message": message,
-                "progress": progress_percent,
+                "progress": adjusted_progress,
                 "metadata": metadata
             }
             # Check if termination was requested
             if research_id in termination_flags and termination_flags[research_id]:
-                # Clean up and exit
+                # Explicitly set the status to suspended in the database
+                conn = sqlite3.connect(DB_PATH)
+                cursor = conn.cursor()
+                # Calculate duration up to termination point - using UTC consistently
+                now = datetime.utcnow()
+                completed_at = now.isoformat()
+                # Get the start time from the database
+                cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
+                result = cursor.fetchone()
+                # Calculate the duration
+                duration_seconds = calculate_duration(result[0]) if result and result[0] else None
+                # Update the database with suspended status
+                cursor.execute(
+                    'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
+                    ('suspended', completed_at, duration_seconds, research_id)
+                )
+                conn.commit()
+                conn.close()
+                # Clean up resources
+                cleanup_research_resources(research_id)
+                # Raise exception to exit the process
                 raise Exception("Research was terminated by user")
             # Update active research record
             if research_id in active_research:
                 active_research[research_id]['log'].append(log_entry)
-                if progress_percent is not None:
-                    active_research[research_id]['progress'] = progress_percent
+                if adjusted_progress is not None:
+                    active_research[research_id]['progress'] = adjusted_progress
-                # Save to database (but not too frequently)
-                if progress_percent is None or progress_percent % 10 == 0 or metadata.get('phase') in ['complete', 'iteration_complete']:
-                    conn = sqlite3.connect(DB_PATH)
-                    cursor = conn.cursor()
+                # Determine log type for database storage
+                log_type = 'info'
+                if metadata and metadata.get('phase'):
+                    phase = metadata.get('phase')
+                    if phase in ['complete', 'iteration_complete']:
+                        log_type = 'milestone'
+                    elif phase == 'error' or 'error' in message.lower():
+                        log_type = 'error'
+                # Always save logs to the new research_logs table
+                add_log_to_db(
+                    research_id,
+                    message,
+                    log_type=log_type,
+                    progress=adjusted_progress,
+                    metadata=metadata
+                )
+                # Update progress in the research_history table (for backward compatibility)
+                conn = sqlite3.connect(DB_PATH)
+                cursor = conn.cursor()
+                # Update the progress and log separately to avoid race conditions with reading/writing the log
+                if adjusted_progress is not None:
                     cursor.execute(
-                        'SELECT progress_log FROM research_history WHERE id = ?',
-                        (research_id,)
+                        'UPDATE research_history SET progress = ? WHERE id = ?',
+                        (adjusted_progress, research_id)
                     )
-                    result = cursor.fetchone()
-                    if result:
-                        try:
-                            current_log = json.loads(result[0])
-                        except:
-                            current_log = []
-                        current_log.append(log_entry)
-                        cursor.execute(
-                            'UPDATE research_history SET progress_log = ? WHERE id = ?',
-                            (json.dumps(current_log), research_id)
-                        )
-                        conn.commit()
-                    conn.close()
-                # Emit socket event with try/except block to handle connection issues
+                # Add the log entry to the progress_log
+                cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
+                log_result = cursor.fetchone()
+                if log_result:
+                    try:
+                        current_log = json.loads(log_result[0])
+                    except:
+                        current_log = []
+                    current_log.append(log_entry)
+                    cursor.execute(
+                        'UPDATE research_history SET progress_log = ? WHERE id = ?',
+                        (json.dumps(current_log), research_id)
+                    )
+                conn.commit()
+                conn.close()
+                # Emit a socket event
                 try:
+                    # Basic event data
                     event_data = {
-                        'progress': progress_percent,
                         'message': message,
-                        'status': 'in_progress',
-                        'log_entry': log_entry
+                        'progress': adjusted_progress
                     }
-                    # Emit to the specific research channel
+                    # Add log entry in full format for detailed logging on client
+                    if metadata:
+                        event_data['log_entry'] = log_entry
+                    # Send to all subscribers and broadcast channel
                     socketio.emit(f'research_progress_{research_id}', event_data)
-                    # Also emit to specific subscribers if available
-                    if research_id in socket_subscriptions and socket_subscriptions[research_id]:
+                    if research_id in socket_subscriptions:
                         for sid in socket_subscriptions[research_id]:
                             try:
                                 socketio.emit(
                                     f'research_progress_{research_id}',
-                                    event_data,
+                                    event_data,
                                     room=sid
                                 )
-                            except Exception as sub_err:
-                                print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
+                            except Exception as err:
+                                print(f"Error emitting to subscriber {sid}: {str(err)}")
+                except Exception as e:
+                    print(f"Socket emit error (non-critical): {str(e)}")
-                except Exception as socket_error:
-                    # Log socket error but continue with the research process
-                    print(f"Socket emit error (non-critical): {str(socket_error)}")
+        # FUNCTION TO CHECK TERMINATION DURING LONG-RUNNING OPERATIONS
+        def check_termination():
+            if research_id in termination_flags and termination_flags[research_id]:
+                # Explicitly set the status to suspended in the database
+                conn = sqlite3.connect(DB_PATH)
+                cursor = conn.cursor()
+                now = datetime.utcnow()
+                completed_at = now.isoformat()
+                cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
+                result = cursor.fetchone()
+                duration_seconds = calculate_duration(result[0]) if result and result[0] else None
+                cursor.execute(
+                    'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
+                    ('suspended', completed_at, duration_seconds, research_id)
+                )
+                conn.commit()
+                conn.close()
+                # Clean up resources
+                cleanup_research_resources(research_id)
+                # Raise exception to exit the process
+                raise Exception("Research was terminated by user during long-running operation")
+            return False  # Not terminated
         # Set the progress callback in the system
+        system = AdvancedSearchSystem()
         system.set_progress_callback(progress_callback)
         # Run the search
         progress_callback("Starting research process", 5, {"phase": "init"})
-        results = system.analyze_topic(query)
-        progress_callback("Search complete, generating output", 80, {"phase": "output_generation"})
+        try:
+            results = system.analyze_topic(query)
+            if mode == 'quick':
+                progress_callback("Search complete, preparing to generate summary...", 85, {"phase": "output_generation"})
+            else:
+                progress_callback("Search complete, generating output", 80, {"phase": "output_generation"})
+        except Exception as search_error:
+            # Better handling of specific search errors
+            error_message = str(search_error)
+            error_type = "unknown"
+            # Extract error details for common issues
+            if "status code: 503" in error_message:
+                error_message = "Ollama AI service is unavailable (HTTP 503). Please check that Ollama is running properly on your system."
+                error_type = "ollama_unavailable"
+            elif "status code: 404" in error_message:
+                error_message = "Ollama model not found (HTTP 404). Please check that you have pulled the required model."
+                error_type = "model_not_found"
+            elif "status code:" in error_message:
+                # Extract the status code for other HTTP errors
+                status_code = error_message.split("status code:")[1].strip()
+                error_message = f"API request failed with status code {status_code}. Please check your configuration."
+                error_type = "api_error"
+            elif "connection" in error_message.lower():
+                error_message = "Connection error. Please check that your LLM service (Ollama/API) is running and accessible."
+                error_type = "connection_error"
+            # Raise with improved error message
+            raise Exception(f"{error_message} (Error type: {error_type})")
         # Generate output based on mode
         if mode == 'quick':
@@ -593,56 +1003,92 @@ def run_research_process(research_id, query, mode):
             if results.get('findings'):
                 #initial_analysis = [finding['content'] for finding in results['findings']]
                 summary = ""
-                raw_formatted_findings = results['formatted_findings']
-                # ADDED CODE: Convert debug output to clean markdown
-                clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
-                # Save as markdown file
-                output_dir = "research_outputs"
-                if not os.path.exists(output_dir):
-                    os.makedirs(output_dir)
-                safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
-                safe_query = safe_query.replace(" ", "_").lower()
-                report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
+                # Safer access to formatted_findings with logging
+                print(f"Results keys: {list(results.keys())}")
-                with open(report_path, "w", encoding="utf-8") as f:
-                    f.write("# Quick Research Summary\n\n")
-                    f.write(f"Query: {query}\n\n")
-                    f.write(clean_markdown)  # Use clean markdown instead of raw findings
-                    f.write("\n\n## Research Metrics\n")
-                    f.write(f"- Search Iterations: {results['iterations']}\n")
-                    f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
+                # Check if formatted_findings exists in results
+                if 'formatted_findings' not in results:
+                    print("WARNING: 'formatted_findings' not found in results, using fallback")
+                    # Create fallback formatted findings from available data
+                    raw_formatted_findings = "# Research Findings\n\n"
+                    for i, finding in enumerate(results.get('findings', [])):
+                        raw_formatted_findings += f"## Finding {i+1}\n\n{finding.get('content', '')}\n\n"
+                else:
+                    raw_formatted_findings = results['formatted_findings']
+                    print(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
-                # Update database
-                metadata = {
-                    'iterations': results['iterations'],
-                    'generated_at': datetime.utcnow().isoformat()
-                }
-                # Calculate duration in seconds - using UTC consistently
-                now = datetime.utcnow()
-                completed_at = now.isoformat()
-                # Get the start time from the database
-                conn = sqlite3.connect(DB_PATH)
-                cursor = conn.cursor()
-                cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
-                result = cursor.fetchone()
-                # Use the helper function for consistent duration calculation
-                duration_seconds = calculate_duration(result[0])
-                # Update the record
-                cursor.execute(
-                    'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
-                    ('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
-                )
-                conn.commit()
-                conn.close()
-                progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
+                try:
+                    # ADDED CODE: Convert debug output to clean markdown
+                    clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
+                    print(f"Successfully converted to clean markdown of length: {len(clean_markdown)}")
+                    # First send a progress update for generating the summary
+                    progress_callback("Generating clean summary from research data...", 90, {"phase": "output_generation"})
+                    # Save as markdown file
+                    output_dir = "research_outputs"
+                    if not os.path.exists(output_dir):
+                        os.makedirs(output_dir)
+                    safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
+                    safe_query = safe_query.replace(" ", "_").lower()
+                    report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
+                    # Send progress update for writing to file
+                    progress_callback("Writing research report to file...", 95, {"phase": "report_complete"})
+                    print(f"Writing report to: {report_path}")
+                    with open(report_path, "w", encoding="utf-8") as f:
+                        f.write("# Quick Research Summary\n\n")
+                        f.write(f"Query: {query}\n\n")
+                        f.write(clean_markdown)  # Use clean markdown instead of raw findings
+                        f.write("\n\n## Research Metrics\n")
+                        f.write(f"- Search Iterations: {results['iterations']}\n")
+                        f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
+                    # Update database
+                    metadata = {
+                        'iterations': results['iterations'],
+                        'generated_at': datetime.utcnow().isoformat()
+                    }
+                    # Calculate duration in seconds - using UTC consistently
+                    now = datetime.utcnow()
+                    completed_at = now.isoformat()
+                    print(f"Updating database for research_id: {research_id}")
+                    # Get the start time from the database
+                    conn = sqlite3.connect(DB_PATH)
+                    cursor = conn.cursor()
+                    cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
+                    result = cursor.fetchone()
+                    # Use the helper function for consistent duration calculation
+                    duration_seconds = calculate_duration(result[0])
+                    # Update the record
+                    cursor.execute(
+                        'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
+                        ('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
+                    )
+                    conn.commit()
+                    conn.close()
+                    print(f"Database updated successfully for research_id: {research_id}")
+                    # Send the final completion message
+                    progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
+                    # Clean up resources
+                    print(f"Cleaning up resources for research_id: {research_id}")
+                    cleanup_research_resources(research_id)
+                    print(f"Resources cleaned up for research_id: {research_id}")
+                except Exception as inner_e:
+                    print(f"Error during quick summary generation: {str(inner_e)}")
+                    print(traceback.format_exc())
+                    raise Exception(f"Error generating quick summary: {str(inner_e)}")
+            else:
+                raise Exception("No research findings were generated. Please try again.")
         else:
             # Full Report
             progress_callback("Generating detailed report...", 85, {"phase": "report_generation"})
@@ -688,23 +1134,47 @@ def run_research_process(research_id, query, mode):
             progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
-        # Clean up
-        if research_id in active_research:
-            del active_research[research_id]
+            # Clean up - moved to a separate function for reuse
+            cleanup_research_resources(research_id)
     except Exception as e:
         # Handle error
         error_message = f"Research failed: {str(e)}"
         print(f"Research error: {error_message}")
         try:
-            progress_callback(error_message, None, {"phase": "error", "error": str(e)})
+            # Check for common Ollama error patterns in the exception and provide more user-friendly errors
+            user_friendly_error = str(e)
+            error_context = {}
+            if "Error type: ollama_unavailable" in user_friendly_error:
+                user_friendly_error = "Ollama AI service is unavailable. Please check that Ollama is running properly on your system."
+                error_context = {"solution": "Start Ollama with 'ollama serve' or check if it's installed correctly."}
+            elif "Error type: model_not_found" in user_friendly_error:
+                user_friendly_error = "Required Ollama model not found. Please pull the model first."
+                error_context = {"solution": "Run 'ollama pull mistral' to download the required model."}
+            elif "Error type: connection_error" in user_friendly_error:
+                user_friendly_error = "Connection error with LLM service. Please check that your AI service is running."
+                error_context = {"solution": "Ensure Ollama or your API service is running and accessible."}
+            elif "Error type: api_error" in user_friendly_error:
+                # Keep the original error message as it's already improved
+                error_context = {"solution": "Check API configuration and credentials."}
+            # Update metadata with more context about the error
+            metadata = {
+                "phase": "error",
+                "error": user_friendly_error
+            }
+            if error_context:
+                metadata.update(error_context)
+            progress_callback(user_friendly_error, None, metadata)
             conn = sqlite3.connect(DB_PATH)
             cursor = conn.cursor()
             # If termination was requested, mark as suspended instead of failed
             status = 'suspended' if (research_id in termination_flags and termination_flags[research_id]) else 'failed'
-            message = "Research was terminated by user" if status == 'suspended' else str(e)
+            message = "Research was terminated by user" if status == 'suspended' else user_friendly_error
             # Calculate duration up to termination point - using UTC consistently
             now = datetime.utcnow()
@@ -721,7 +1191,7 @@ def run_research_process(research_id, query, mode):
             cursor.execute(
                 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, metadata = ? WHERE id = ?',
-                (status, completed_at, duration_seconds, json.dumps({'error': message}), research_id)
+                (status, completed_at, duration_seconds, json.dumps(metadata), research_id)
             )
             conn.commit()
             conn.close()
@@ -749,11 +1219,8 @@ def run_research_process(research_id, query, mode):
         except Exception as inner_e:
             print(f"Error in error handler: {str(inner_e)}")
-        # Clean up resources
-        if research_id in active_research:
-            del active_research[research_id]
-        if research_id in termination_flags:
-            del termination_flags[research_id]
+        # Clean up resources - moved to a separate function for reuse
+        cleanup_research_resources(research_id)
 @research_bp.route('/api/research/<int:research_id>/terminate', methods=['POST'])
 def terminate_research(research_id):
@@ -789,16 +1256,30 @@ def terminate_research(research_id):
     # Log the termination request - using UTC timestamp
     timestamp = datetime.utcnow().isoformat()
+    termination_message = "Research termination requested by user"
+    current_progress = active_research[research_id]['progress']
+    # Create log entry
     log_entry = {
         "time": timestamp,
-        "message": "Research termination requested by user",
-        "progress": active_research[research_id]['progress'],
+        "message": termination_message,
+        "progress": current_progress,
         "metadata": {"phase": "termination"}
     }
+    # Add to in-memory log
     active_research[research_id]['log'].append(log_entry)
-    # Update the log in the database
+    # Add to database log
+    add_log_to_db(
+        research_id,
+        termination_message,
+        log_type='milestone',
+        progress=current_progress,
+        metadata={"phase": "termination"}
+    )
+    # Update the log in the database (old way for backward compatibility)
     cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
     log_result = cursor.fetchone()
     if log_result:
@@ -812,14 +1293,16 @@ def terminate_research(research_id):
             (json.dumps(current_log), research_id)
         )
+    # IMMEDIATELY update the status to 'suspended' to avoid race conditions
+    cursor.execute('UPDATE research_history SET status = ? WHERE id = ?', ('suspended', research_id))
     conn.commit()
     conn.close()
     # Emit a socket event for the termination request
     try:
         event_data = {
-            'status': 'terminating',
-            'message': 'Research termination requested by user'
+            'status': 'suspended',  # Changed from 'terminating' to 'suspended'
+            'message': 'Research was suspended by user request'
         }
         socketio.emit(f'research_progress_{research_id}', event_data)
@@ -877,8 +1360,6 @@ def delete_research(research_id):
     conn.close()
     return jsonify({'status': 'success'})
-# Main settings page that links to specialized config pages
 @research_bp.route('/settings', methods=['GET'])
 def settings_page():
     """Main settings dashboard with links to specialized config pages"""
@@ -1120,6 +1601,47 @@ def open_file_location():
         return redirect(url_for('research.collections_config_page'))
     else:
         return redirect(url_for('research.main_config_page'))
+@research_bp.route('/api/research/<int:research_id>/logs')
+def get_research_logs(research_id):
+    """Get logs for a specific research ID"""
+    # First check if the research exists
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    cursor = conn.cursor()
+    cursor.execute('SELECT id FROM research_history WHERE id = ?', (research_id,))
+    result = cursor.fetchone()
+    conn.close()
+    if not result:
+        return jsonify({'status': 'error', 'message': 'Research not found'}), 404
+    # Retrieve logs from the database
+    logs = get_logs_for_research(research_id)
+    # Add any current logs from memory if this is an active research
+    if research_id in active_research and active_research[research_id].get('log'):
+        # Use the logs from memory temporarily until they're saved to the database
+        memory_logs = active_research[research_id]['log']
+        # Filter out logs that are already in the database
+        # We'll compare timestamps to avoid duplicates
+        db_timestamps = {log['time'] for log in logs}
+        unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
+        # Add unique memory logs to our return list
+        logs.extend(unique_memory_logs)
+        # Sort logs by timestamp
+        logs.sort(key=lambda x: x['time'])
+    return jsonify({
+        'status': 'success',
+        'logs': logs
+    })
 # Register the blueprint
 app.register_blueprint(research_bp)
@@ -1133,7 +1655,6 @@ def app_serve_static(path):
 def favicon():
     return send_from_directory(app.static_folder, 'favicon.ico', mimetype='image/x-icon')
 # Add this function to app.py
 def convert_debug_to_markdown(raw_text, query):
     """
@@ -1146,43 +1667,75 @@ def convert_debug_to_markdown(raw_text, query):
     Returns:
         Clean markdown formatted text
     """
-    # If there's a "DETAILED FINDINGS:" section, extract everything after it
-    if "DETAILED FINDINGS:" in raw_text:
-        detailed_index = raw_text.index("DETAILED FINDINGS:")
-        content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
-    else:
-        content = raw_text
-    # Remove divider lines with === symbols
-    content = "\n".join([line for line in content.split("\n")
-                        if not line.strip().startswith("===") and not line.strip() == "="*80])
-    # If COMPLETE RESEARCH OUTPUT exists, remove that section
-    if "COMPLETE RESEARCH OUTPUT" in content:
-        content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
-    # Remove SEARCH QUESTIONS BY ITERATION section
-    if "SEARCH QUESTIONS BY ITERATION:" in content:
-        search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
-        next_major_section = -1
-        for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
-            if marker in content[search_index:]:
-                marker_pos = content.index(marker, search_index)
-                if next_major_section == -1 or marker_pos < next_major_section:
-                    next_major_section = marker_pos
+    try:
+        print(f"Starting markdown conversion for query: {query}")
+        print(f"Raw text type: {type(raw_text)}")
-        if next_major_section != -1:
-            content = content[:search_index] + content[next_major_section:]
+        # Handle None or empty input
+        if not raw_text:
+            print("WARNING: raw_text is empty or None")
+            return f"No detailed findings available for '{query}'."
+        # If there's a "DETAILED FINDINGS:" section, extract everything after it
+        if "DETAILED FINDINGS:" in raw_text:
+            print("Found DETAILED FINDINGS section")
+            detailed_index = raw_text.index("DETAILED FINDINGS:")
+            content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
         else:
-            # If no later section, just remove everything from SEARCH QUESTIONS onwards
-            content = content[:search_index].strip()
-    return content.strip()
+            print("No DETAILED FINDINGS section found, using full text")
+            content = raw_text
+        # Remove divider lines with === symbols
+        lines_before = len(content.split("\n"))
+        content = "\n".join([line for line in content.split("\n")
+                            if not line.strip().startswith("===") and not line.strip() == "="*80])
+        lines_after = len(content.split("\n"))
+        print(f"Removed {lines_before - lines_after} divider lines")
+        # If COMPLETE RESEARCH OUTPUT exists, remove that section
+        if "COMPLETE RESEARCH OUTPUT" in content:
+            print("Found and removing COMPLETE RESEARCH OUTPUT section")
+            content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
+        # Remove SEARCH QUESTIONS BY ITERATION section
+        if "SEARCH QUESTIONS BY ITERATION:" in content:
+            print("Found SEARCH QUESTIONS BY ITERATION section")
+            search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
+            next_major_section = -1
+            for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
+                if marker in content[search_index:]:
+                    marker_pos = content.index(marker, search_index)
+                    if next_major_section == -1 or marker_pos < next_major_section:
+                        next_major_section = marker_pos
+            if next_major_section != -1:
+                print(f"Removing section from index {search_index} to {next_major_section}")
+                content = content[:search_index] + content[next_major_section:]
+            else:
+                # If no later section, just remove everything from SEARCH QUESTIONS onwards
+                print(f"Removing everything after index {search_index}")
+                content = content[:search_index].strip()
+        print(f"Final markdown length: {len(content.strip())}")
+        return content.strip()
+    except Exception as e:
+        print(f"Error in convert_debug_to_markdown: {str(e)}")
+        print(traceback.format_exc())
+        # Return a basic message with the original query as fallback
+        return f"# Research on {query}\n\nThere was an error formatting the research results."
 def main():
     """
     Entry point for the web application when run as a command.
     This function is needed for the package's entry point to work properly.
     """
+    # Import settings here to avoid circular imports
+    from local_deep_research.config import settings
+    # Get web server settings with defaults
+    port = settings.web.port
+    host = settings.web.host
+    debug = settings.web.debug
     # Check for OpenAI availability but don't import it unless necessary
     try:
@@ -1202,8 +1755,7 @@ def main():
     except Exception as e:
         print(f"Error checking OpenAI availability: {e}")
-    socketio.run(app, debug=True, host='0.0.0.0', port=5000, allow_unsafe_werkzeug=True)
+    socketio.run(app, debug=debug, host=host, port=port, allow_unsafe_werkzeug=True)
 if __name__ == '__main__':
     main()

local-deep-research 0.1.0__py3-none-any.whl → 0.1.12__py3-none-any.whl

local-deep-research 0.1.0py3-none-any.whl → 0.1.12py3-none-any.whl