local-deep-research 0.1.0__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -90,6 +90,9 @@ termination_flags = {}
90
90
  # Database setup
91
91
  DB_PATH = 'research_history.db'
92
92
 
93
+ # Output directory for research results
94
+ OUTPUT_DIR = 'research_outputs'
95
+
93
96
  # Add Content Security Policy headers to allow Socket.IO to function
94
97
  @app.after_request
95
98
  def add_security_headers(response):
@@ -128,7 +131,6 @@ def handle_websocket_requests():
128
131
  # Return empty response to prevent further processing
129
132
  return '', 200
130
133
 
131
- # Initialize the database
132
134
  def init_db():
133
135
  conn = sqlite3.connect(DB_PATH)
134
136
  cursor = conn.cursor()
@@ -145,7 +147,22 @@ def init_db():
145
147
  duration_seconds INTEGER,
146
148
  report_path TEXT,
147
149
  metadata TEXT,
148
- progress_log TEXT
150
+ progress_log TEXT,
151
+ progress INTEGER
152
+ )
153
+ ''')
154
+
155
+ # Create a dedicated table for research logs
156
+ cursor.execute('''
157
+ CREATE TABLE IF NOT EXISTS research_logs (
158
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
159
+ research_id INTEGER NOT NULL,
160
+ timestamp TEXT NOT NULL,
161
+ message TEXT NOT NULL,
162
+ log_type TEXT NOT NULL,
163
+ progress INTEGER,
164
+ metadata TEXT,
165
+ FOREIGN KEY (research_id) REFERENCES research_history (id) ON DELETE CASCADE
149
166
  )
150
167
  ''')
151
168
 
@@ -157,9 +174,17 @@ def init_db():
157
174
  print("Adding missing 'duration_seconds' column to research_history table")
158
175
  cursor.execute('ALTER TABLE research_history ADD COLUMN duration_seconds INTEGER')
159
176
 
177
+ # Check if the progress column exists, add it if missing
178
+ if 'progress' not in columns:
179
+ print("Adding missing 'progress' column to research_history table")
180
+ cursor.execute('ALTER TABLE research_history ADD COLUMN progress INTEGER')
181
+
182
+ # Enable foreign key support
183
+ cursor.execute('PRAGMA foreign_keys = ON')
184
+
160
185
  conn.commit()
161
186
  conn.close()
162
-
187
+
163
188
  # Helper function to calculate duration between created_at and completed_at timestamps
164
189
  def calculate_duration(created_at_str):
165
190
  """
@@ -200,6 +225,88 @@ def calculate_duration(created_at_str):
200
225
 
201
226
  return duration_seconds
202
227
 
228
+ # Add these helper functions after the calculate_duration function
229
+
230
+
231
+ def add_log_to_db(research_id, message, log_type='info', progress=None, metadata=None):
232
+ """
233
+ Store a log entry in the database
234
+
235
+ Args:
236
+ research_id: ID of the research
237
+ message: Log message text
238
+ log_type: Type of log (info, error, milestone)
239
+ progress: Progress percentage (0-100)
240
+ metadata: Additional metadata as dictionary (will be stored as JSON)
241
+ """
242
+ try:
243
+ timestamp = datetime.utcnow().isoformat()
244
+ metadata_json = json.dumps(metadata) if metadata else None
245
+
246
+ conn = sqlite3.connect(DB_PATH)
247
+ cursor = conn.cursor()
248
+ cursor.execute(
249
+ 'INSERT INTO research_logs (research_id, timestamp, message, log_type, progress, metadata) '
250
+ 'VALUES (?, ?, ?, ?, ?, ?)',
251
+ (research_id, timestamp, message, log_type, progress, metadata_json)
252
+ )
253
+ conn.commit()
254
+ conn.close()
255
+ return True
256
+ except Exception as e:
257
+ print(f"Error adding log to database: {str(e)}")
258
+ print(traceback.format_exc())
259
+ return False
260
+
261
+ def get_logs_for_research(research_id):
262
+ """
263
+ Retrieve all logs for a specific research ID
264
+
265
+ Args:
266
+ research_id: ID of the research
267
+
268
+ Returns:
269
+ List of log entries as dictionaries
270
+ """
271
+ try:
272
+ conn = sqlite3.connect(DB_PATH)
273
+ conn.row_factory = sqlite3.Row
274
+ cursor = conn.cursor()
275
+ cursor.execute(
276
+ 'SELECT * FROM research_logs WHERE research_id = ? ORDER BY timestamp ASC',
277
+ (research_id,)
278
+ )
279
+ results = cursor.fetchall()
280
+ conn.close()
281
+
282
+ logs = []
283
+ for result in results:
284
+ log_entry = dict(result)
285
+ # Parse metadata JSON if it exists
286
+ if log_entry.get('metadata'):
287
+ try:
288
+ log_entry['metadata'] = json.loads(log_entry['metadata'])
289
+ except:
290
+ log_entry['metadata'] = {}
291
+ else:
292
+ log_entry['metadata'] = {}
293
+
294
+ # Convert entry for frontend consumption
295
+ formatted_entry = {
296
+ 'time': log_entry['timestamp'],
297
+ 'message': log_entry['message'],
298
+ 'progress': log_entry['progress'],
299
+ 'metadata': log_entry['metadata'],
300
+ 'type': log_entry['log_type']
301
+ }
302
+ logs.append(formatted_entry)
303
+
304
+ return logs
305
+ except Exception as e:
306
+ print(f"Error retrieving logs from database: {str(e)}")
307
+ print(traceback.format_exc())
308
+ return []
309
+
203
310
  # Initialize the database on startup
204
311
  def initialize():
205
312
  init_db()
@@ -320,12 +427,39 @@ def start_research():
320
427
  if not query:
321
428
  return jsonify({'status': 'error', 'message': 'Query is required'}), 400
322
429
 
323
- # Check if there's any active research
430
+ # Check if there's any active research that's actually still running
324
431
  if active_research:
325
- return jsonify({
326
- 'status': 'error',
327
- 'message': 'Another research is already in progress. Please wait for it to complete.'
328
- }), 409
432
+ # Verify each active research is still valid
433
+ stale_research_ids = []
434
+ for research_id, research_data in list(active_research.items()):
435
+ # Check database status
436
+ conn = sqlite3.connect(DB_PATH)
437
+ cursor = conn.cursor()
438
+ cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
439
+ result = cursor.fetchone()
440
+ conn.close()
441
+
442
+ # If the research doesn't exist in DB or is not in_progress, it's stale
443
+ if not result or result[0] != 'in_progress':
444
+ stale_research_ids.append(research_id)
445
+ # Also check if thread is still alive
446
+ elif not research_data.get('thread') or not research_data.get('thread').is_alive():
447
+ stale_research_ids.append(research_id)
448
+
449
+ # Clean up any stale research processes
450
+ for stale_id in stale_research_ids:
451
+ print(f"Cleaning up stale research process: {stale_id}")
452
+ if stale_id in active_research:
453
+ del active_research[stale_id]
454
+ if stale_id in termination_flags:
455
+ del termination_flags[stale_id]
456
+
457
+ # After cleanup, check if there's still active research
458
+ if active_research:
459
+ return jsonify({
460
+ 'status': 'error',
461
+ 'message': 'Another research is already in progress. Please wait for it to complete.'
462
+ }), 409
329
463
 
330
464
  # Create a record in the database with explicit UTC timestamp
331
465
  created_at = datetime.utcnow().isoformat()
@@ -403,15 +537,23 @@ def get_research_details(research_id):
403
537
  if not result:
404
538
  return jsonify({'status': 'error', 'message': 'Research not found'}), 404
405
539
 
406
- try:
407
- # Get the progress log
408
- progress_log = json.loads(result.get('progress_log', '[]'))
409
- except:
410
- progress_log = []
411
-
412
- # If this is an active research, get the latest log
540
+ # Get logs from the dedicated log database
541
+ logs = get_logs_for_research(research_id)
542
+
543
+ # If this is an active research, merge with any in-memory logs
413
544
  if research_id in active_research:
414
- progress_log = active_research[research_id]['log']
545
+ # Use the logs from memory temporarily until they're saved to the database
546
+ memory_logs = active_research[research_id]['log']
547
+
548
+ # Filter out logs that are already in the database by timestamp
549
+ db_timestamps = {log['time'] for log in logs}
550
+ unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
551
+
552
+ # Add unique memory logs to our return list
553
+ logs.extend(unique_memory_logs)
554
+
555
+ # Sort logs by timestamp
556
+ logs.sort(key=lambda x: x['time'])
415
557
 
416
558
  return jsonify({
417
559
  'status': 'success',
@@ -422,7 +564,7 @@ def get_research_details(research_id):
422
564
  'progress': active_research.get(research_id, {}).get('progress', 100 if result.get('status') == 'completed' else 0),
423
565
  'created_at': result.get('created_at'),
424
566
  'completed_at': result.get('completed_at'),
425
- 'log': progress_log
567
+ 'log': logs
426
568
  })
427
569
 
428
570
  @research_bp.route('/api/report/<int:research_id>')
@@ -467,6 +609,7 @@ def handle_disconnect():
467
609
  subscribers.remove(request.sid)
468
610
  if not subscribers:
469
611
  socket_subscriptions.pop(research_id, None)
612
+ print(f"Removed empty subscription for research {research_id}")
470
613
  except Exception as e:
471
614
  print(f"Error handling disconnect: {e}")
472
615
 
@@ -474,23 +617,54 @@ def handle_disconnect():
474
617
  def handle_subscribe(data):
475
618
  research_id = data.get('research_id')
476
619
  if research_id:
477
- if research_id not in socket_subscriptions:
478
- socket_subscriptions[research_id] = set()
479
- socket_subscriptions[research_id].add(request.sid)
480
- print(f"Client {request.sid} subscribed to research {research_id}")
620
+ # First check if this research is still active
621
+ conn = sqlite3.connect(DB_PATH)
622
+ cursor = conn.cursor()
623
+ cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
624
+ result = cursor.fetchone()
625
+ conn.close()
481
626
 
482
- # Send current status immediately if available
483
- if research_id in active_research:
484
- progress = active_research[research_id]['progress']
485
- latest_log = active_research[research_id]['log'][-1] if active_research[research_id]['log'] else None
627
+ # Only allow subscription to valid research
628
+ if result:
629
+ status = result[0]
630
+
631
+ # Initialize subscription set if needed
632
+ if research_id not in socket_subscriptions:
633
+ socket_subscriptions[research_id] = set()
634
+
635
+ # Add this client to the subscribers
636
+ socket_subscriptions[research_id].add(request.sid)
637
+ print(f"Client {request.sid} subscribed to research {research_id}")
486
638
 
487
- if latest_log:
639
+ # Send current status immediately if available
640
+ if research_id in active_research:
641
+ progress = active_research[research_id]['progress']
642
+ latest_log = active_research[research_id]['log'][-1] if active_research[research_id]['log'] else None
643
+
644
+ if latest_log:
645
+ emit(f'research_progress_{research_id}', {
646
+ 'progress': progress,
647
+ 'message': latest_log.get('message', 'Processing...'),
648
+ 'status': 'in_progress',
649
+ 'log_entry': latest_log
650
+ })
651
+ elif status in ['completed', 'failed', 'suspended']:
652
+ # Send final status for completed research
488
653
  emit(f'research_progress_{research_id}', {
489
- 'progress': progress,
490
- 'message': latest_log.get('message', 'Processing...'),
491
- 'status': 'in_progress',
492
- 'log_entry': latest_log
654
+ 'progress': 100 if status == 'completed' else 0,
655
+ 'message': 'Research completed successfully' if status == 'completed' else
656
+ 'Research failed' if status == 'failed' else 'Research was suspended',
657
+ 'status': status,
658
+ 'log_entry': {
659
+ 'time': datetime.utcnow().isoformat(),
660
+ 'message': f'Research is {status}',
661
+ 'progress': 100 if status == 'completed' else 0,
662
+ 'metadata': {'phase': 'complete' if status == 'completed' else 'error'}
663
+ }
493
664
  })
665
+ else:
666
+ # Research not found
667
+ emit('error', {'message': f'Research ID {research_id} not found'})
494
668
 
495
669
  @socketio.on_error
496
670
  def handle_socket_error(e):
@@ -504,88 +678,324 @@ def handle_default_error(e):
504
678
  # Don't propagate exceptions to avoid crashing the server
505
679
  return False
506
680
 
681
+ # Function to clean up resources for a completed research
682
+ def cleanup_research_resources(research_id):
683
+ """Clean up resources for a completed research"""
684
+ print(f"Cleaning up resources for research {research_id}")
685
+
686
+ # Get the current status from the database to determine the final status message
687
+ current_status = "completed" # Default
688
+ try:
689
+ conn = sqlite3.connect(DB_PATH)
690
+ cursor = conn.cursor()
691
+ cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
692
+ result = cursor.fetchone()
693
+ if result and result[0]:
694
+ current_status = result[0]
695
+ conn.close()
696
+ except Exception as e:
697
+ print(f"Error retrieving research status during cleanup: {e}")
698
+
699
+ # Remove from active research
700
+ if research_id in active_research:
701
+ del active_research[research_id]
702
+
703
+ # Remove from termination flags
704
+ if research_id in termination_flags:
705
+ del termination_flags[research_id]
706
+
707
+ # Send a final message to any remaining subscribers with explicit status
708
+ if research_id in socket_subscriptions and socket_subscriptions[research_id]:
709
+ # Use the proper status message based on database status
710
+ if current_status == 'suspended' or current_status == 'failed':
711
+ final_message = {
712
+ 'status': current_status,
713
+ 'message': f'Research was {current_status}',
714
+ 'progress': 0, # For suspended research, show 0% not 100%
715
+ }
716
+ else:
717
+ final_message = {
718
+ 'status': 'completed',
719
+ 'message': 'Research process has ended and resources have been cleaned up',
720
+ 'progress': 100,
721
+ }
722
+
723
+ try:
724
+ print(f"Sending final {current_status} socket message for research {research_id}")
725
+ # Use emit to all, not just subscribers
726
+ socketio.emit(f'research_progress_{research_id}', final_message)
727
+
728
+ # Also emit to specific subscribers
729
+ for sid in socket_subscriptions[research_id]:
730
+ try:
731
+ socketio.emit(
732
+ f'research_progress_{research_id}',
733
+ final_message,
734
+ room=sid
735
+ )
736
+ except Exception as sub_err:
737
+ print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
738
+ except Exception as e:
739
+ print(f"Error sending final cleanup message: {e}")
740
+
741
+ # Don't immediately remove subscriptions - let clients disconnect naturally
742
+
507
743
  def run_research_process(research_id, query, mode):
744
+ """Run the research process in the background for a given research ID"""
508
745
  try:
509
- system = AdvancedSearchSystem()
746
+ # Check if this research has been terminated before we even start
747
+ if research_id in termination_flags and termination_flags[research_id]:
748
+ print(f"Research {research_id} was terminated before starting")
749
+ cleanup_research_resources(research_id)
750
+ return
751
+
752
+ print(f"Starting research process for ID {research_id}, query: {query}")
510
753
 
754
+ # Set up the AI Context Manager
755
+ output_dir = os.path.join(OUTPUT_DIR, f"research_{research_id}")
756
+ os.makedirs(output_dir, exist_ok=True)
757
+
511
758
  # Set up progress callback
512
759
  def progress_callback(message, progress_percent, metadata):
760
+ # FREQUENT TERMINATION CHECK: Check for termination at each callback
761
+ if research_id in termination_flags and termination_flags[research_id]:
762
+ # Explicitly set the status to suspended in the database
763
+ conn = sqlite3.connect(DB_PATH)
764
+ cursor = conn.cursor()
765
+ # Calculate duration up to termination point - using UTC consistently
766
+ now = datetime.utcnow()
767
+ completed_at = now.isoformat()
768
+
769
+ # Get the start time from the database
770
+ cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
771
+ result = cursor.fetchone()
772
+
773
+ # Calculate the duration
774
+ duration_seconds = calculate_duration(result[0]) if result and result[0] else None
775
+
776
+ # Update the database with suspended status
777
+ cursor.execute(
778
+ 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
779
+ ('suspended', completed_at, duration_seconds, research_id)
780
+ )
781
+ conn.commit()
782
+ conn.close()
783
+
784
+ # Clean up resources
785
+ cleanup_research_resources(research_id)
786
+
787
+ # Raise exception to exit the process
788
+ raise Exception("Research was terminated by user")
789
+
513
790
  timestamp = datetime.utcnow().isoformat()
791
+
792
+ # Adjust progress based on research mode
793
+ adjusted_progress = progress_percent
794
+ if mode == 'detailed' and metadata.get('phase') == 'output_generation':
795
+ # For detailed mode, we need to adjust the progress range
796
+ # because detailed reports take longer after the search phase
797
+ adjusted_progress = min(80, progress_percent)
798
+ elif mode == 'detailed' and metadata.get('phase') == 'report_generation':
799
+ # Scale the progress from 80% to 95% for the report generation phase
800
+ # Map progress_percent values (0-100%) to the (80-95%) range
801
+ if progress_percent is not None:
802
+ normalized = progress_percent / 100
803
+ adjusted_progress = 80 + (normalized * 15)
804
+ elif mode == 'quick' and metadata.get('phase') == 'output_generation':
805
+ # For quick mode, ensure we're at least at 85% during output generation
806
+ adjusted_progress = max(85, progress_percent)
807
+ # Map any further progress within output_generation to 85-95% range
808
+ if progress_percent is not None and progress_percent > 0:
809
+ normalized = progress_percent / 100
810
+ adjusted_progress = 85 + (normalized * 10)
811
+
812
+ # Don't let progress go backwards
813
+ if research_id in active_research and adjusted_progress is not None:
814
+ current_progress = active_research[research_id].get('progress', 0)
815
+ adjusted_progress = max(current_progress, adjusted_progress)
816
+
514
817
  log_entry = {
515
818
  "time": timestamp,
516
819
  "message": message,
517
- "progress": progress_percent,
820
+ "progress": adjusted_progress,
518
821
  "metadata": metadata
519
822
  }
520
823
 
521
824
  # Check if termination was requested
522
825
  if research_id in termination_flags and termination_flags[research_id]:
523
- # Clean up and exit
826
+ # Explicitly set the status to suspended in the database
827
+ conn = sqlite3.connect(DB_PATH)
828
+ cursor = conn.cursor()
829
+ # Calculate duration up to termination point - using UTC consistently
830
+ now = datetime.utcnow()
831
+ completed_at = now.isoformat()
832
+
833
+ # Get the start time from the database
834
+ cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
835
+ result = cursor.fetchone()
836
+
837
+ # Calculate the duration
838
+ duration_seconds = calculate_duration(result[0]) if result and result[0] else None
839
+
840
+ # Update the database with suspended status
841
+ cursor.execute(
842
+ 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
843
+ ('suspended', completed_at, duration_seconds, research_id)
844
+ )
845
+ conn.commit()
846
+ conn.close()
847
+
848
+ # Clean up resources
849
+ cleanup_research_resources(research_id)
850
+
851
+ # Raise exception to exit the process
524
852
  raise Exception("Research was terminated by user")
525
853
 
526
854
  # Update active research record
527
855
  if research_id in active_research:
528
856
  active_research[research_id]['log'].append(log_entry)
529
- if progress_percent is not None:
530
- active_research[research_id]['progress'] = progress_percent
857
+ if adjusted_progress is not None:
858
+ active_research[research_id]['progress'] = adjusted_progress
531
859
 
532
- # Save to database (but not too frequently)
533
- if progress_percent is None or progress_percent % 10 == 0 or metadata.get('phase') in ['complete', 'iteration_complete']:
534
- conn = sqlite3.connect(DB_PATH)
535
- cursor = conn.cursor()
860
+ # Determine log type for database storage
861
+ log_type = 'info'
862
+ if metadata and metadata.get('phase'):
863
+ phase = metadata.get('phase')
864
+ if phase in ['complete', 'iteration_complete']:
865
+ log_type = 'milestone'
866
+ elif phase == 'error' or 'error' in message.lower():
867
+ log_type = 'error'
868
+
869
+ # Always save logs to the new research_logs table
870
+ add_log_to_db(
871
+ research_id,
872
+ message,
873
+ log_type=log_type,
874
+ progress=adjusted_progress,
875
+ metadata=metadata
876
+ )
877
+
878
+ # Update progress in the research_history table (for backward compatibility)
879
+ conn = sqlite3.connect(DB_PATH)
880
+ cursor = conn.cursor()
881
+
882
+ # Update the progress and log separately to avoid race conditions with reading/writing the log
883
+ if adjusted_progress is not None:
536
884
  cursor.execute(
537
- 'SELECT progress_log FROM research_history WHERE id = ?',
538
- (research_id,)
885
+ 'UPDATE research_history SET progress = ? WHERE id = ?',
886
+ (adjusted_progress, research_id)
539
887
  )
540
- result = cursor.fetchone()
541
- if result:
542
- try:
543
- current_log = json.loads(result[0])
544
- except:
545
- current_log = []
546
- current_log.append(log_entry)
547
- cursor.execute(
548
- 'UPDATE research_history SET progress_log = ? WHERE id = ?',
549
- (json.dumps(current_log), research_id)
550
- )
551
- conn.commit()
552
- conn.close()
553
888
 
554
- # Emit socket event with try/except block to handle connection issues
889
+ # Add the log entry to the progress_log
890
+ cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
891
+ log_result = cursor.fetchone()
892
+
893
+ if log_result:
894
+ try:
895
+ current_log = json.loads(log_result[0])
896
+ except:
897
+ current_log = []
898
+
899
+ current_log.append(log_entry)
900
+ cursor.execute(
901
+ 'UPDATE research_history SET progress_log = ? WHERE id = ?',
902
+ (json.dumps(current_log), research_id)
903
+ )
904
+
905
+ conn.commit()
906
+ conn.close()
907
+
908
+ # Emit a socket event
555
909
  try:
910
+ # Basic event data
556
911
  event_data = {
557
- 'progress': progress_percent,
558
912
  'message': message,
559
- 'status': 'in_progress',
560
- 'log_entry': log_entry
913
+ 'progress': adjusted_progress
561
914
  }
562
915
 
563
- # Emit to the specific research channel
916
+ # Add log entry in full format for detailed logging on client
917
+ if metadata:
918
+ event_data['log_entry'] = log_entry
919
+
920
+ # Send to all subscribers and broadcast channel
564
921
  socketio.emit(f'research_progress_{research_id}', event_data)
565
922
 
566
- # Also emit to specific subscribers if available
567
- if research_id in socket_subscriptions and socket_subscriptions[research_id]:
923
+ if research_id in socket_subscriptions:
568
924
  for sid in socket_subscriptions[research_id]:
569
925
  try:
570
926
  socketio.emit(
571
927
  f'research_progress_{research_id}',
572
- event_data,
928
+ event_data,
573
929
  room=sid
574
930
  )
575
- except Exception as sub_err:
576
- print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
931
+ except Exception as err:
932
+ print(f"Error emitting to subscriber {sid}: {str(err)}")
933
+ except Exception as e:
934
+ print(f"Socket emit error (non-critical): {str(e)}")
577
935
 
578
- except Exception as socket_error:
579
- # Log socket error but continue with the research process
580
- print(f"Socket emit error (non-critical): {str(socket_error)}")
581
-
936
+ # FUNCTION TO CHECK TERMINATION DURING LONG-RUNNING OPERATIONS
937
+ def check_termination():
938
+ if research_id in termination_flags and termination_flags[research_id]:
939
+ # Explicitly set the status to suspended in the database
940
+ conn = sqlite3.connect(DB_PATH)
941
+ cursor = conn.cursor()
942
+ now = datetime.utcnow()
943
+ completed_at = now.isoformat()
944
+
945
+ cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
946
+ result = cursor.fetchone()
947
+ duration_seconds = calculate_duration(result[0]) if result and result[0] else None
948
+
949
+ cursor.execute(
950
+ 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
951
+ ('suspended', completed_at, duration_seconds, research_id)
952
+ )
953
+ conn.commit()
954
+ conn.close()
955
+
956
+ # Clean up resources
957
+ cleanup_research_resources(research_id)
958
+
959
+ # Raise exception to exit the process
960
+ raise Exception("Research was terminated by user during long-running operation")
961
+ return False # Not terminated
962
+
582
963
  # Set the progress callback in the system
964
+ system = AdvancedSearchSystem()
583
965
  system.set_progress_callback(progress_callback)
584
966
 
585
967
  # Run the search
586
968
  progress_callback("Starting research process", 5, {"phase": "init"})
587
- results = system.analyze_topic(query)
588
- progress_callback("Search complete, generating output", 80, {"phase": "output_generation"})
969
+
970
+ try:
971
+ results = system.analyze_topic(query)
972
+ if mode == 'quick':
973
+ progress_callback("Search complete, preparing to generate summary...", 85, {"phase": "output_generation"})
974
+ else:
975
+ progress_callback("Search complete, generating output", 80, {"phase": "output_generation"})
976
+ except Exception as search_error:
977
+ # Better handling of specific search errors
978
+ error_message = str(search_error)
979
+ error_type = "unknown"
980
+
981
+ # Extract error details for common issues
982
+ if "status code: 503" in error_message:
983
+ error_message = "Ollama AI service is unavailable (HTTP 503). Please check that Ollama is running properly on your system."
984
+ error_type = "ollama_unavailable"
985
+ elif "status code: 404" in error_message:
986
+ error_message = "Ollama model not found (HTTP 404). Please check that you have pulled the required model."
987
+ error_type = "model_not_found"
988
+ elif "status code:" in error_message:
989
+ # Extract the status code for other HTTP errors
990
+ status_code = error_message.split("status code:")[1].strip()
991
+ error_message = f"API request failed with status code {status_code}. Please check your configuration."
992
+ error_type = "api_error"
993
+ elif "connection" in error_message.lower():
994
+ error_message = "Connection error. Please check that your LLM service (Ollama/API) is running and accessible."
995
+ error_type = "connection_error"
996
+
997
+ # Raise with improved error message
998
+ raise Exception(f"{error_message} (Error type: {error_type})")
589
999
 
590
1000
  # Generate output based on mode
591
1001
  if mode == 'quick':
@@ -593,56 +1003,92 @@ def run_research_process(research_id, query, mode):
593
1003
  if results.get('findings'):
594
1004
  #initial_analysis = [finding['content'] for finding in results['findings']]
595
1005
  summary = ""
596
- raw_formatted_findings = results['formatted_findings']
597
-
598
- # ADDED CODE: Convert debug output to clean markdown
599
- clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
600
1006
 
601
- # Save as markdown file
602
- output_dir = "research_outputs"
603
- if not os.path.exists(output_dir):
604
- os.makedirs(output_dir)
605
-
606
- safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
607
- safe_query = safe_query.replace(" ", "_").lower()
608
- report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
1007
+ # Safer access to formatted_findings with logging
1008
+ print(f"Results keys: {list(results.keys())}")
609
1009
 
610
- with open(report_path, "w", encoding="utf-8") as f:
611
- f.write("# Quick Research Summary\n\n")
612
- f.write(f"Query: {query}\n\n")
613
- f.write(clean_markdown) # Use clean markdown instead of raw findings
614
- f.write("\n\n## Research Metrics\n")
615
- f.write(f"- Search Iterations: {results['iterations']}\n")
616
- f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
1010
+ # Check if formatted_findings exists in results
1011
+ if 'formatted_findings' not in results:
1012
+ print("WARNING: 'formatted_findings' not found in results, using fallback")
1013
+ # Create fallback formatted findings from available data
1014
+ raw_formatted_findings = "# Research Findings\n\n"
1015
+ for i, finding in enumerate(results.get('findings', [])):
1016
+ raw_formatted_findings += f"## Finding {i+1}\n\n{finding.get('content', '')}\n\n"
1017
+ else:
1018
+ raw_formatted_findings = results['formatted_findings']
1019
+ print(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
617
1020
 
618
- # Update database
619
- metadata = {
620
- 'iterations': results['iterations'],
621
- 'generated_at': datetime.utcnow().isoformat()
622
- }
623
-
624
- # Calculate duration in seconds - using UTC consistently
625
- now = datetime.utcnow()
626
- completed_at = now.isoformat()
627
-
628
- # Get the start time from the database
629
- conn = sqlite3.connect(DB_PATH)
630
- cursor = conn.cursor()
631
- cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
632
- result = cursor.fetchone()
633
-
634
- # Use the helper function for consistent duration calculation
635
- duration_seconds = calculate_duration(result[0])
636
-
637
- # Update the record
638
- cursor.execute(
639
- 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
640
- ('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
641
- )
642
- conn.commit()
643
- conn.close()
644
-
645
- progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
1021
+ try:
1022
+ # ADDED CODE: Convert debug output to clean markdown
1023
+ clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
1024
+ print(f"Successfully converted to clean markdown of length: {len(clean_markdown)}")
1025
+
1026
+ # First send a progress update for generating the summary
1027
+ progress_callback("Generating clean summary from research data...", 90, {"phase": "output_generation"})
1028
+
1029
+ # Save as markdown file
1030
+ output_dir = "research_outputs"
1031
+ if not os.path.exists(output_dir):
1032
+ os.makedirs(output_dir)
1033
+
1034
+ safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
1035
+ safe_query = safe_query.replace(" ", "_").lower()
1036
+ report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
1037
+
1038
+ # Send progress update for writing to file
1039
+ progress_callback("Writing research report to file...", 95, {"phase": "report_complete"})
1040
+
1041
+ print(f"Writing report to: {report_path}")
1042
+ with open(report_path, "w", encoding="utf-8") as f:
1043
+ f.write("# Quick Research Summary\n\n")
1044
+ f.write(f"Query: {query}\n\n")
1045
+ f.write(clean_markdown) # Use clean markdown instead of raw findings
1046
+ f.write("\n\n## Research Metrics\n")
1047
+ f.write(f"- Search Iterations: {results['iterations']}\n")
1048
+ f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
1049
+
1050
+ # Update database
1051
+ metadata = {
1052
+ 'iterations': results['iterations'],
1053
+ 'generated_at': datetime.utcnow().isoformat()
1054
+ }
1055
+
1056
+ # Calculate duration in seconds - using UTC consistently
1057
+ now = datetime.utcnow()
1058
+ completed_at = now.isoformat()
1059
+
1060
+ print(f"Updating database for research_id: {research_id}")
1061
+ # Get the start time from the database
1062
+ conn = sqlite3.connect(DB_PATH)
1063
+ cursor = conn.cursor()
1064
+ cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
1065
+ result = cursor.fetchone()
1066
+
1067
+ # Use the helper function for consistent duration calculation
1068
+ duration_seconds = calculate_duration(result[0])
1069
+
1070
+ # Update the record
1071
+ cursor.execute(
1072
+ 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
1073
+ ('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
1074
+ )
1075
+ conn.commit()
1076
+ conn.close()
1077
+ print(f"Database updated successfully for research_id: {research_id}")
1078
+
1079
+ # Send the final completion message
1080
+ progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
1081
+
1082
+ # Clean up resources
1083
+ print(f"Cleaning up resources for research_id: {research_id}")
1084
+ cleanup_research_resources(research_id)
1085
+ print(f"Resources cleaned up for research_id: {research_id}")
1086
+ except Exception as inner_e:
1087
+ print(f"Error during quick summary generation: {str(inner_e)}")
1088
+ print(traceback.format_exc())
1089
+ raise Exception(f"Error generating quick summary: {str(inner_e)}")
1090
+ else:
1091
+ raise Exception("No research findings were generated. Please try again.")
646
1092
  else:
647
1093
  # Full Report
648
1094
  progress_callback("Generating detailed report...", 85, {"phase": "report_generation"})
@@ -688,23 +1134,47 @@ def run_research_process(research_id, query, mode):
688
1134
 
689
1135
  progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
690
1136
 
691
- # Clean up
692
- if research_id in active_research:
693
- del active_research[research_id]
1137
+ # Clean up - moved to a separate function for reuse
1138
+ cleanup_research_resources(research_id)
694
1139
 
695
1140
  except Exception as e:
696
1141
  # Handle error
697
1142
  error_message = f"Research failed: {str(e)}"
698
1143
  print(f"Research error: {error_message}")
699
1144
  try:
700
- progress_callback(error_message, None, {"phase": "error", "error": str(e)})
1145
+ # Check for common Ollama error patterns in the exception and provide more user-friendly errors
1146
+ user_friendly_error = str(e)
1147
+ error_context = {}
1148
+
1149
+ if "Error type: ollama_unavailable" in user_friendly_error:
1150
+ user_friendly_error = "Ollama AI service is unavailable. Please check that Ollama is running properly on your system."
1151
+ error_context = {"solution": "Start Ollama with 'ollama serve' or check if it's installed correctly."}
1152
+ elif "Error type: model_not_found" in user_friendly_error:
1153
+ user_friendly_error = "Required Ollama model not found. Please pull the model first."
1154
+ error_context = {"solution": "Run 'ollama pull mistral' to download the required model."}
1155
+ elif "Error type: connection_error" in user_friendly_error:
1156
+ user_friendly_error = "Connection error with LLM service. Please check that your AI service is running."
1157
+ error_context = {"solution": "Ensure Ollama or your API service is running and accessible."}
1158
+ elif "Error type: api_error" in user_friendly_error:
1159
+ # Keep the original error message as it's already improved
1160
+ error_context = {"solution": "Check API configuration and credentials."}
1161
+
1162
+ # Update metadata with more context about the error
1163
+ metadata = {
1164
+ "phase": "error",
1165
+ "error": user_friendly_error
1166
+ }
1167
+ if error_context:
1168
+ metadata.update(error_context)
1169
+
1170
+ progress_callback(user_friendly_error, None, metadata)
701
1171
 
702
1172
  conn = sqlite3.connect(DB_PATH)
703
1173
  cursor = conn.cursor()
704
1174
 
705
1175
  # If termination was requested, mark as suspended instead of failed
706
1176
  status = 'suspended' if (research_id in termination_flags and termination_flags[research_id]) else 'failed'
707
- message = "Research was terminated by user" if status == 'suspended' else str(e)
1177
+ message = "Research was terminated by user" if status == 'suspended' else user_friendly_error
708
1178
 
709
1179
  # Calculate duration up to termination point - using UTC consistently
710
1180
  now = datetime.utcnow()
@@ -721,7 +1191,7 @@ def run_research_process(research_id, query, mode):
721
1191
 
722
1192
  cursor.execute(
723
1193
  'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, metadata = ? WHERE id = ?',
724
- (status, completed_at, duration_seconds, json.dumps({'error': message}), research_id)
1194
+ (status, completed_at, duration_seconds, json.dumps(metadata), research_id)
725
1195
  )
726
1196
  conn.commit()
727
1197
  conn.close()
@@ -749,11 +1219,8 @@ def run_research_process(research_id, query, mode):
749
1219
  except Exception as inner_e:
750
1220
  print(f"Error in error handler: {str(inner_e)}")
751
1221
 
752
- # Clean up resources
753
- if research_id in active_research:
754
- del active_research[research_id]
755
- if research_id in termination_flags:
756
- del termination_flags[research_id]
1222
+ # Clean up resources - moved to a separate function for reuse
1223
+ cleanup_research_resources(research_id)
757
1224
 
758
1225
  @research_bp.route('/api/research/<int:research_id>/terminate', methods=['POST'])
759
1226
  def terminate_research(research_id):
@@ -789,16 +1256,30 @@ def terminate_research(research_id):
789
1256
 
790
1257
  # Log the termination request - using UTC timestamp
791
1258
  timestamp = datetime.utcnow().isoformat()
1259
+ termination_message = "Research termination requested by user"
1260
+ current_progress = active_research[research_id]['progress']
1261
+
1262
+ # Create log entry
792
1263
  log_entry = {
793
1264
  "time": timestamp,
794
- "message": "Research termination requested by user",
795
- "progress": active_research[research_id]['progress'],
1265
+ "message": termination_message,
1266
+ "progress": current_progress,
796
1267
  "metadata": {"phase": "termination"}
797
1268
  }
798
1269
 
1270
+ # Add to in-memory log
799
1271
  active_research[research_id]['log'].append(log_entry)
800
1272
 
801
- # Update the log in the database
1273
+ # Add to database log
1274
+ add_log_to_db(
1275
+ research_id,
1276
+ termination_message,
1277
+ log_type='milestone',
1278
+ progress=current_progress,
1279
+ metadata={"phase": "termination"}
1280
+ )
1281
+
1282
+ # Update the log in the database (old way for backward compatibility)
802
1283
  cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
803
1284
  log_result = cursor.fetchone()
804
1285
  if log_result:
@@ -812,14 +1293,16 @@ def terminate_research(research_id):
812
1293
  (json.dumps(current_log), research_id)
813
1294
  )
814
1295
 
1296
+ # IMMEDIATELY update the status to 'suspended' to avoid race conditions
1297
+ cursor.execute('UPDATE research_history SET status = ? WHERE id = ?', ('suspended', research_id))
815
1298
  conn.commit()
816
1299
  conn.close()
817
1300
 
818
1301
  # Emit a socket event for the termination request
819
1302
  try:
820
1303
  event_data = {
821
- 'status': 'terminating',
822
- 'message': 'Research termination requested by user'
1304
+ 'status': 'suspended', # Changed from 'terminating' to 'suspended'
1305
+ 'message': 'Research was suspended by user request'
823
1306
  }
824
1307
 
825
1308
  socketio.emit(f'research_progress_{research_id}', event_data)
@@ -877,8 +1360,6 @@ def delete_research(research_id):
877
1360
  conn.close()
878
1361
 
879
1362
  return jsonify({'status': 'success'})
880
-
881
- # Main settings page that links to specialized config pages
882
1363
  @research_bp.route('/settings', methods=['GET'])
883
1364
  def settings_page():
884
1365
  """Main settings dashboard with links to specialized config pages"""
@@ -1120,6 +1601,47 @@ def open_file_location():
1120
1601
  return redirect(url_for('research.collections_config_page'))
1121
1602
  else:
1122
1603
  return redirect(url_for('research.main_config_page'))
1604
+
1605
+ @research_bp.route('/api/research/<int:research_id>/logs')
1606
+ def get_research_logs(research_id):
1607
+ """Get logs for a specific research ID"""
1608
+ # First check if the research exists
1609
+ conn = sqlite3.connect(DB_PATH)
1610
+ conn.row_factory = sqlite3.Row
1611
+ cursor = conn.cursor()
1612
+ cursor.execute('SELECT id FROM research_history WHERE id = ?', (research_id,))
1613
+ result = cursor.fetchone()
1614
+ conn.close()
1615
+
1616
+ if not result:
1617
+ return jsonify({'status': 'error', 'message': 'Research not found'}), 404
1618
+
1619
+ # Retrieve logs from the database
1620
+ logs = get_logs_for_research(research_id)
1621
+
1622
+ # Add any current logs from memory if this is an active research
1623
+ if research_id in active_research and active_research[research_id].get('log'):
1624
+ # Use the logs from memory temporarily until they're saved to the database
1625
+ memory_logs = active_research[research_id]['log']
1626
+
1627
+ # Filter out logs that are already in the database
1628
+ # We'll compare timestamps to avoid duplicates
1629
+ db_timestamps = {log['time'] for log in logs}
1630
+ unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
1631
+
1632
+ # Add unique memory logs to our return list
1633
+ logs.extend(unique_memory_logs)
1634
+
1635
+ # Sort logs by timestamp
1636
+ logs.sort(key=lambda x: x['time'])
1637
+
1638
+ return jsonify({
1639
+ 'status': 'success',
1640
+ 'logs': logs
1641
+ })
1642
+
1643
+
1644
+
1123
1645
  # Register the blueprint
1124
1646
  app.register_blueprint(research_bp)
1125
1647
 
@@ -1133,7 +1655,6 @@ def app_serve_static(path):
1133
1655
  def favicon():
1134
1656
  return send_from_directory(app.static_folder, 'favicon.ico', mimetype='image/x-icon')
1135
1657
 
1136
-
1137
1658
  # Add this function to app.py
1138
1659
  def convert_debug_to_markdown(raw_text, query):
1139
1660
  """
@@ -1146,43 +1667,75 @@ def convert_debug_to_markdown(raw_text, query):
1146
1667
  Returns:
1147
1668
  Clean markdown formatted text
1148
1669
  """
1149
- # If there's a "DETAILED FINDINGS:" section, extract everything after it
1150
- if "DETAILED FINDINGS:" in raw_text:
1151
- detailed_index = raw_text.index("DETAILED FINDINGS:")
1152
- content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
1153
- else:
1154
- content = raw_text
1155
-
1156
- # Remove divider lines with === symbols
1157
- content = "\n".join([line for line in content.split("\n")
1158
- if not line.strip().startswith("===") and not line.strip() == "="*80])
1159
-
1160
- # If COMPLETE RESEARCH OUTPUT exists, remove that section
1161
- if "COMPLETE RESEARCH OUTPUT" in content:
1162
- content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
1163
-
1164
- # Remove SEARCH QUESTIONS BY ITERATION section
1165
- if "SEARCH QUESTIONS BY ITERATION:" in content:
1166
- search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
1167
- next_major_section = -1
1168
- for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
1169
- if marker in content[search_index:]:
1170
- marker_pos = content.index(marker, search_index)
1171
- if next_major_section == -1 or marker_pos < next_major_section:
1172
- next_major_section = marker_pos
1670
+ try:
1671
+ print(f"Starting markdown conversion for query: {query}")
1672
+ print(f"Raw text type: {type(raw_text)}")
1173
1673
 
1174
- if next_major_section != -1:
1175
- content = content[:search_index] + content[next_major_section:]
1674
+ # Handle None or empty input
1675
+ if not raw_text:
1676
+ print("WARNING: raw_text is empty or None")
1677
+ return f"No detailed findings available for '{query}'."
1678
+
1679
+ # If there's a "DETAILED FINDINGS:" section, extract everything after it
1680
+ if "DETAILED FINDINGS:" in raw_text:
1681
+ print("Found DETAILED FINDINGS section")
1682
+ detailed_index = raw_text.index("DETAILED FINDINGS:")
1683
+ content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
1176
1684
  else:
1177
- # If no later section, just remove everything from SEARCH QUESTIONS onwards
1178
- content = content[:search_index].strip()
1179
-
1180
- return content.strip()
1685
+ print("No DETAILED FINDINGS section found, using full text")
1686
+ content = raw_text
1687
+
1688
+ # Remove divider lines with === symbols
1689
+ lines_before = len(content.split("\n"))
1690
+ content = "\n".join([line for line in content.split("\n")
1691
+ if not line.strip().startswith("===") and not line.strip() == "="*80])
1692
+ lines_after = len(content.split("\n"))
1693
+ print(f"Removed {lines_before - lines_after} divider lines")
1694
+
1695
+ # If COMPLETE RESEARCH OUTPUT exists, remove that section
1696
+ if "COMPLETE RESEARCH OUTPUT" in content:
1697
+ print("Found and removing COMPLETE RESEARCH OUTPUT section")
1698
+ content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
1699
+
1700
+ # Remove SEARCH QUESTIONS BY ITERATION section
1701
+ if "SEARCH QUESTIONS BY ITERATION:" in content:
1702
+ print("Found SEARCH QUESTIONS BY ITERATION section")
1703
+ search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
1704
+ next_major_section = -1
1705
+ for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
1706
+ if marker in content[search_index:]:
1707
+ marker_pos = content.index(marker, search_index)
1708
+ if next_major_section == -1 or marker_pos < next_major_section:
1709
+ next_major_section = marker_pos
1710
+
1711
+ if next_major_section != -1:
1712
+ print(f"Removing section from index {search_index} to {next_major_section}")
1713
+ content = content[:search_index] + content[next_major_section:]
1714
+ else:
1715
+ # If no later section, just remove everything from SEARCH QUESTIONS onwards
1716
+ print(f"Removing everything after index {search_index}")
1717
+ content = content[:search_index].strip()
1718
+
1719
+ print(f"Final markdown length: {len(content.strip())}")
1720
+ return content.strip()
1721
+ except Exception as e:
1722
+ print(f"Error in convert_debug_to_markdown: {str(e)}")
1723
+ print(traceback.format_exc())
1724
+ # Return a basic message with the original query as fallback
1725
+ return f"# Research on {query}\n\nThere was an error formatting the research results."
1726
+
1181
1727
  def main():
1182
1728
  """
1183
1729
  Entry point for the web application when run as a command.
1184
1730
  This function is needed for the package's entry point to work properly.
1185
1731
  """
1732
+ # Import settings here to avoid circular imports
1733
+ from local_deep_research.config import settings
1734
+
1735
+ # Get web server settings with defaults
1736
+ port = settings.web.port
1737
+ host = settings.web.host
1738
+ debug = settings.web.debug
1186
1739
 
1187
1740
  # Check for OpenAI availability but don't import it unless necessary
1188
1741
  try:
@@ -1202,8 +1755,7 @@ def main():
1202
1755
  except Exception as e:
1203
1756
  print(f"Error checking OpenAI availability: {e}")
1204
1757
 
1205
-
1206
- socketio.run(app, debug=True, host='0.0.0.0', port=5000, allow_unsafe_werkzeug=True)
1207
-
1758
+ socketio.run(app, debug=debug, host=host, port=port, allow_unsafe_werkzeug=True)
1759
+
1208
1760
  if __name__ == '__main__':
1209
1761
  main()