PyPI - local-deep-research - Versions diffs - 0.1.1__py3-none-any.whl → 0.1.12__py3-none-any.whl - Mend

local-deep-research 0.1.1py3-none-any.whl → 0.1.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

local_deep_research/web/app.py CHANGED Viewed

@@ -90,6 +90,9 @@ termination_flags = {}
 # Database setup
 DB_PATH = 'research_history.db'
+# Output directory for research results
+OUTPUT_DIR = 'research_outputs'
 # Add Content Security Policy headers to allow Socket.IO to function
 @app.after_request
 def add_security_headers(response):
@@ -128,7 +131,6 @@ def handle_websocket_requests():
             # Return empty response to prevent further processing
             return '', 200
-# Initialize the database
 def init_db():
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
@@ -145,7 +147,22 @@ def init_db():
         duration_seconds INTEGER,
         report_path TEXT,
         metadata TEXT,
-        progress_log TEXT
+        progress_log TEXT,
+        progress INTEGER
+    )
+    ''')
+    # Create a dedicated table for research logs
+    cursor.execute('''
+    CREATE TABLE IF NOT EXISTS research_logs (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        research_id INTEGER NOT NULL,
+        timestamp TEXT NOT NULL,
+        message TEXT NOT NULL,
+        log_type TEXT NOT NULL,
+        progress INTEGER,
+        metadata TEXT,
+        FOREIGN KEY (research_id) REFERENCES research_history (id) ON DELETE CASCADE
     )
     ''')
@@ -157,9 +174,17 @@ def init_db():
         print("Adding missing 'duration_seconds' column to research_history table")
         cursor.execute('ALTER TABLE research_history ADD COLUMN duration_seconds INTEGER')
+    # Check if the progress column exists, add it if missing
+    if 'progress' not in columns:
+        print("Adding missing 'progress' column to research_history table")
+        cursor.execute('ALTER TABLE research_history ADD COLUMN progress INTEGER')
+    # Enable foreign key support
+    cursor.execute('PRAGMA foreign_keys = ON')
     conn.commit()
     conn.close()
 # Helper function to calculate duration between created_at and completed_at timestamps
 def calculate_duration(created_at_str):
     """
@@ -200,6 +225,88 @@ def calculate_duration(created_at_str):
     return duration_seconds
+# Add these helper functions after the calculate_duration function
+def add_log_to_db(research_id, message, log_type='info', progress=None, metadata=None):
+    """
+    Store a log entry in the database
+    Args:
+        research_id: ID of the research
+        message: Log message text
+        log_type: Type of log (info, error, milestone)
+        progress: Progress percentage (0-100)
+        metadata: Additional metadata as dictionary (will be stored as JSON)
+    """
+    try:
+        timestamp = datetime.utcnow().isoformat()
+        metadata_json = json.dumps(metadata) if metadata else None
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        cursor.execute(
+            'INSERT INTO research_logs (research_id, timestamp, message, log_type, progress, metadata) '
+            'VALUES (?, ?, ?, ?, ?, ?)',
+            (research_id, timestamp, message, log_type, progress, metadata_json)
+        )
+        conn.commit()
+        conn.close()
+        return True
+    except Exception as e:
+        print(f"Error adding log to database: {str(e)}")
+        print(traceback.format_exc())
+        return False
+def get_logs_for_research(research_id):
+    """
+    Retrieve all logs for a specific research ID
+    Args:
+        research_id: ID of the research
+    Returns:
+        List of log entries as dictionaries
+    """
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        conn.row_factory = sqlite3.Row
+        cursor = conn.cursor()
+        cursor.execute(
+            'SELECT * FROM research_logs WHERE research_id = ? ORDER BY timestamp ASC',
+            (research_id,)
+        )
+        results = cursor.fetchall()
+        conn.close()
+        logs = []
+        for result in results:
+            log_entry = dict(result)
+            # Parse metadata JSON if it exists
+            if log_entry.get('metadata'):
+                try:
+                    log_entry['metadata'] = json.loads(log_entry['metadata'])
+                except:
+                    log_entry['metadata'] = {}
+            else:
+                log_entry['metadata'] = {}
+            # Convert entry for frontend consumption
+            formatted_entry = {
+                'time': log_entry['timestamp'],
+                'message': log_entry['message'],
+                'progress': log_entry['progress'],
+                'metadata': log_entry['metadata'],
+                'type': log_entry['log_type']
+            }
+            logs.append(formatted_entry)
+        return logs
+    except Exception as e:
+        print(f"Error retrieving logs from database: {str(e)}")
+        print(traceback.format_exc())
+        return []
 # Initialize the database on startup
 def initialize():
     init_db()
@@ -430,15 +537,23 @@ def get_research_details(research_id):
     if not result:
         return jsonify({'status': 'error', 'message': 'Research not found'}), 404
-    try:
-        # Get the progress log
-        progress_log = json.loads(result.get('progress_log', '[]'))
-    except:
-        progress_log = []
-    # If this is an active research, get the latest log
+    # Get logs from the dedicated log database
+    logs = get_logs_for_research(research_id)
+    # If this is an active research, merge with any in-memory logs
     if research_id in active_research:
-        progress_log = active_research[research_id]['log']
+        # Use the logs from memory temporarily until they're saved to the database
+        memory_logs = active_research[research_id]['log']
+        # Filter out logs that are already in the database by timestamp
+        db_timestamps = {log['time'] for log in logs}
+        unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
+        # Add unique memory logs to our return list
+        logs.extend(unique_memory_logs)
+        # Sort logs by timestamp
+        logs.sort(key=lambda x: x['time'])
     return jsonify({
         'status': 'success',
@@ -449,7 +564,7 @@ def get_research_details(research_id):
         'progress': active_research.get(research_id, {}).get('progress', 100 if result.get('status') == 'completed' else 0),
         'created_at': result.get('created_at'),
         'completed_at': result.get('completed_at'),
-        'log': progress_log
+        'log': logs
     })
 @research_bp.route('/api/report/<int:research_id>')
@@ -568,6 +683,19 @@ def cleanup_research_resources(research_id):
     """Clean up resources for a completed research"""
     print(f"Cleaning up resources for research {research_id}")
+    # Get the current status from the database to determine the final status message
+    current_status = "completed"  # Default
+    try:
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
+        result = cursor.fetchone()
+        if result and result[0]:
+            current_status = result[0]
+        conn.close()
+    except Exception as e:
+        print(f"Error retrieving research status during cleanup: {e}")
     # Remove from active research
     if research_id in active_research:
         del active_research[research_id]
@@ -576,16 +704,24 @@ def cleanup_research_resources(research_id):
     if research_id in termination_flags:
         del termination_flags[research_id]
-    # Send a final message to any remaining subscribers with explicit completed status
+    # Send a final message to any remaining subscribers with explicit status
     if research_id in socket_subscriptions and socket_subscriptions[research_id]:
-        final_message = {
-            'status': 'completed',
-            'message': 'Research process has ended and resources have been cleaned up',
-            'progress': 100,
-        }
+        # Use the proper status message based on database status
+        if current_status == 'suspended' or current_status == 'failed':
+            final_message = {
+                'status': current_status,
+                'message': f'Research was {current_status}',
+                'progress': 0,  # For suspended research, show 0% not 100%
+            }
+        else:
+            final_message = {
+                'status': 'completed',
+                'message': 'Research process has ended and resources have been cleaned up',
+                'progress': 100,
+            }
         try:
-            print(f"Sending final completion socket message for research {research_id}")
+            print(f"Sending final {current_status} socket message for research {research_id}")
             # Use emit to all, not just subscribers
             socketio.emit(f'research_progress_{research_id}', final_message)
@@ -605,11 +741,52 @@ def cleanup_research_resources(research_id):
     # Don't immediately remove subscriptions - let clients disconnect naturally
 def run_research_process(research_id, query, mode):
+    """Run the research process in the background for a given research ID"""
     try:
-        system = AdvancedSearchSystem()
+        # Check if this research has been terminated before we even start
+        if research_id in termination_flags and termination_flags[research_id]:
+            print(f"Research {research_id} was terminated before starting")
+            cleanup_research_resources(research_id)
+            return
+        print(f"Starting research process for ID {research_id}, query: {query}")
+        # Set up the AI Context Manager
+        output_dir = os.path.join(OUTPUT_DIR, f"research_{research_id}")
+        os.makedirs(output_dir, exist_ok=True)
         # Set up progress callback
         def progress_callback(message, progress_percent, metadata):
+            # FREQUENT TERMINATION CHECK: Check for termination at each callback
+            if research_id in termination_flags and termination_flags[research_id]:
+                # Explicitly set the status to suspended in the database
+                conn = sqlite3.connect(DB_PATH)
+                cursor = conn.cursor()
+                # Calculate duration up to termination point - using UTC consistently
+                now = datetime.utcnow()
+                completed_at = now.isoformat()
+                # Get the start time from the database
+                cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
+                result = cursor.fetchone()
+                # Calculate the duration
+                duration_seconds = calculate_duration(result[0]) if result and result[0] else None
+                # Update the database with suspended status
+                cursor.execute(
+                    'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
+                    ('suspended', completed_at, duration_seconds, research_id)
+                )
+                conn.commit()
+                conn.close()
+                # Clean up resources
+                cleanup_research_resources(research_id)
+                # Raise exception to exit the process
+                raise Exception("Research was terminated by user")
             timestamp = datetime.utcnow().isoformat()
             # Adjust progress based on research mode
@@ -646,7 +823,32 @@ def run_research_process(research_id, query, mode):
             # Check if termination was requested
             if research_id in termination_flags and termination_flags[research_id]:
-                # Clean up and exit
+                # Explicitly set the status to suspended in the database
+                conn = sqlite3.connect(DB_PATH)
+                cursor = conn.cursor()
+                # Calculate duration up to termination point - using UTC consistently
+                now = datetime.utcnow()
+                completed_at = now.isoformat()
+                # Get the start time from the database
+                cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
+                result = cursor.fetchone()
+                # Calculate the duration
+                duration_seconds = calculate_duration(result[0]) if result and result[0] else None
+                # Update the database with suspended status
+                cursor.execute(
+                    'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
+                    ('suspended', completed_at, duration_seconds, research_id)
+                )
+                conn.commit()
+                conn.close()
+                # Clean up resources
+                cleanup_research_resources(research_id)
+                # Raise exception to exit the process
                 raise Exception("Research was terminated by user")
             # Update active research record
@@ -655,59 +857,111 @@ def run_research_process(research_id, query, mode):
                 if adjusted_progress is not None:
                     active_research[research_id]['progress'] = adjusted_progress
-                # Save to database (but not too frequently)
-                if adjusted_progress is None or adjusted_progress % 10 == 0 or metadata.get('phase') in ['complete', 'iteration_complete', 'output_generation', 'report_generation', 'report_complete']:
-                    conn = sqlite3.connect(DB_PATH)
-                    cursor = conn.cursor()
+                # Determine log type for database storage
+                log_type = 'info'
+                if metadata and metadata.get('phase'):
+                    phase = metadata.get('phase')
+                    if phase in ['complete', 'iteration_complete']:
+                        log_type = 'milestone'
+                    elif phase == 'error' or 'error' in message.lower():
+                        log_type = 'error'
+                # Always save logs to the new research_logs table
+                add_log_to_db(
+                    research_id,
+                    message,
+                    log_type=log_type,
+                    progress=adjusted_progress,
+                    metadata=metadata
+                )
+                # Update progress in the research_history table (for backward compatibility)
+                conn = sqlite3.connect(DB_PATH)
+                cursor = conn.cursor()
+                # Update the progress and log separately to avoid race conditions with reading/writing the log
+                if adjusted_progress is not None:
                     cursor.execute(
-                        'SELECT progress_log FROM research_history WHERE id = ?',
-                        (research_id,)
+                        'UPDATE research_history SET progress = ? WHERE id = ?',
+                        (adjusted_progress, research_id)
                     )
-                    result = cursor.fetchone()
-                    if result:
-                        try:
-                            current_log = json.loads(result[0])
-                        except:
-                            current_log = []
-                        current_log.append(log_entry)
-                        cursor.execute(
-                            'UPDATE research_history SET progress_log = ? WHERE id = ?',
-                            (json.dumps(current_log), research_id)
-                        )
-                        conn.commit()
-                    conn.close()
-                # Emit socket event with try/except block to handle connection issues
+                # Add the log entry to the progress_log
+                cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
+                log_result = cursor.fetchone()
+                if log_result:
+                    try:
+                        current_log = json.loads(log_result[0])
+                    except:
+                        current_log = []
+                    current_log.append(log_entry)
+                    cursor.execute(
+                        'UPDATE research_history SET progress_log = ? WHERE id = ?',
+                        (json.dumps(current_log), research_id)
+                    )
+                conn.commit()
+                conn.close()
+                # Emit a socket event
                 try:
+                    # Basic event data
                     event_data = {
-                        'progress': adjusted_progress,
                         'message': message,
-                        'status': 'in_progress',
-                        'log_entry': log_entry
+                        'progress': adjusted_progress
                     }
-                    # Emit to the specific research channel
+                    # Add log entry in full format for detailed logging on client
+                    if metadata:
+                        event_data['log_entry'] = log_entry
+                    # Send to all subscribers and broadcast channel
                     socketio.emit(f'research_progress_{research_id}', event_data)
-                    # Also emit to specific subscribers if available
-                    if research_id in socket_subscriptions and socket_subscriptions[research_id]:
+                    if research_id in socket_subscriptions:
                         for sid in socket_subscriptions[research_id]:
                             try:
                                 socketio.emit(
                                     f'research_progress_{research_id}',
-                                    event_data,
+                                    event_data,
                                     room=sid
                                 )
-                            except Exception as sub_err:
-                                print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
+                            except Exception as err:
+                                print(f"Error emitting to subscriber {sid}: {str(err)}")
+                except Exception as e:
+                    print(f"Socket emit error (non-critical): {str(e)}")
-                except Exception as socket_error:
-                    # Log socket error but continue with the research process
-                    print(f"Socket emit error (non-critical): {str(socket_error)}")
-            return not (research_id in termination_flags and termination_flags[research_id])
+        # FUNCTION TO CHECK TERMINATION DURING LONG-RUNNING OPERATIONS
+        def check_termination():
+            if research_id in termination_flags and termination_flags[research_id]:
+                # Explicitly set the status to suspended in the database
+                conn = sqlite3.connect(DB_PATH)
+                cursor = conn.cursor()
+                now = datetime.utcnow()
+                completed_at = now.isoformat()
+                cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
+                result = cursor.fetchone()
+                duration_seconds = calculate_duration(result[0]) if result and result[0] else None
+                cursor.execute(
+                    'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
+                    ('suspended', completed_at, duration_seconds, research_id)
+                )
+                conn.commit()
+                conn.close()
+                # Clean up resources
+                cleanup_research_resources(research_id)
+                # Raise exception to exit the process
+                raise Exception("Research was terminated by user during long-running operation")
+            return False  # Not terminated
         # Set the progress callback in the system
+        system = AdvancedSearchSystem()
         system.set_progress_callback(progress_callback)
         # Run the search
@@ -965,7 +1219,7 @@ def run_research_process(research_id, query, mode):
         except Exception as inner_e:
             print(f"Error in error handler: {str(inner_e)}")
-        # Clean up resources - moved to a separate function
+        # Clean up resources - moved to a separate function for reuse
         cleanup_research_resources(research_id)
 @research_bp.route('/api/research/<int:research_id>/terminate', methods=['POST'])
@@ -1002,16 +1256,30 @@ def terminate_research(research_id):
     # Log the termination request - using UTC timestamp
     timestamp = datetime.utcnow().isoformat()
+    termination_message = "Research termination requested by user"
+    current_progress = active_research[research_id]['progress']
+    # Create log entry
     log_entry = {
         "time": timestamp,
-        "message": "Research termination requested by user",
-        "progress": active_research[research_id]['progress'],
+        "message": termination_message,
+        "progress": current_progress,
         "metadata": {"phase": "termination"}
     }
+    # Add to in-memory log
     active_research[research_id]['log'].append(log_entry)
-    # Update the log in the database
+    # Add to database log
+    add_log_to_db(
+        research_id,
+        termination_message,
+        log_type='milestone',
+        progress=current_progress,
+        metadata={"phase": "termination"}
+    )
+    # Update the log in the database (old way for backward compatibility)
     cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
     log_result = cursor.fetchone()
     if log_result:
@@ -1025,14 +1293,16 @@ def terminate_research(research_id):
             (json.dumps(current_log), research_id)
         )
+    # IMMEDIATELY update the status to 'suspended' to avoid race conditions
+    cursor.execute('UPDATE research_history SET status = ? WHERE id = ?', ('suspended', research_id))
     conn.commit()
     conn.close()
     # Emit a socket event for the termination request
     try:
         event_data = {
-            'status': 'terminating',
-            'message': 'Research termination requested by user'
+            'status': 'suspended',  # Changed from 'terminating' to 'suspended'
+            'message': 'Research was suspended by user request'
         }
         socketio.emit(f'research_progress_{research_id}', event_data)
@@ -1090,8 +1360,6 @@ def delete_research(research_id):
     conn.close()
     return jsonify({'status': 'success'})
-# Main settings page that links to specialized config pages
 @research_bp.route('/settings', methods=['GET'])
 def settings_page():
     """Main settings dashboard with links to specialized config pages"""
@@ -1333,6 +1601,47 @@ def open_file_location():
         return redirect(url_for('research.collections_config_page'))
     else:
         return redirect(url_for('research.main_config_page'))
+@research_bp.route('/api/research/<int:research_id>/logs')
+def get_research_logs(research_id):
+    """Get logs for a specific research ID"""
+    # First check if the research exists
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    cursor = conn.cursor()
+    cursor.execute('SELECT id FROM research_history WHERE id = ?', (research_id,))
+    result = cursor.fetchone()
+    conn.close()
+    if not result:
+        return jsonify({'status': 'error', 'message': 'Research not found'}), 404
+    # Retrieve logs from the database
+    logs = get_logs_for_research(research_id)
+    # Add any current logs from memory if this is an active research
+    if research_id in active_research and active_research[research_id].get('log'):
+        # Use the logs from memory temporarily until they're saved to the database
+        memory_logs = active_research[research_id]['log']
+        # Filter out logs that are already in the database
+        # We'll compare timestamps to avoid duplicates
+        db_timestamps = {log['time'] for log in logs}
+        unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
+        # Add unique memory logs to our return list
+        logs.extend(unique_memory_logs)
+        # Sort logs by timestamp
+        logs.sort(key=lambda x: x['time'])
+    return jsonify({
+        'status': 'success',
+        'logs': logs
+    })
 # Register the blueprint
 app.register_blueprint(research_bp)

local-deep-research 0.1.1__py3-none-any.whl → 0.1.12__py3-none-any.whl

local-deep-research 0.1.1py3-none-any.whl → 0.1.12py3-none-any.whl