local-deep-research 0.1.1__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -90,6 +90,9 @@ termination_flags = {}
90
90
  # Database setup
91
91
  DB_PATH = 'research_history.db'
92
92
 
93
+ # Output directory for research results
94
+ OUTPUT_DIR = 'research_outputs'
95
+
93
96
  # Add Content Security Policy headers to allow Socket.IO to function
94
97
  @app.after_request
95
98
  def add_security_headers(response):
@@ -128,7 +131,6 @@ def handle_websocket_requests():
128
131
  # Return empty response to prevent further processing
129
132
  return '', 200
130
133
 
131
- # Initialize the database
132
134
  def init_db():
133
135
  conn = sqlite3.connect(DB_PATH)
134
136
  cursor = conn.cursor()
@@ -145,7 +147,22 @@ def init_db():
145
147
  duration_seconds INTEGER,
146
148
  report_path TEXT,
147
149
  metadata TEXT,
148
- progress_log TEXT
150
+ progress_log TEXT,
151
+ progress INTEGER
152
+ )
153
+ ''')
154
+
155
+ # Create a dedicated table for research logs
156
+ cursor.execute('''
157
+ CREATE TABLE IF NOT EXISTS research_logs (
158
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
159
+ research_id INTEGER NOT NULL,
160
+ timestamp TEXT NOT NULL,
161
+ message TEXT NOT NULL,
162
+ log_type TEXT NOT NULL,
163
+ progress INTEGER,
164
+ metadata TEXT,
165
+ FOREIGN KEY (research_id) REFERENCES research_history (id) ON DELETE CASCADE
149
166
  )
150
167
  ''')
151
168
 
@@ -157,9 +174,17 @@ def init_db():
157
174
  print("Adding missing 'duration_seconds' column to research_history table")
158
175
  cursor.execute('ALTER TABLE research_history ADD COLUMN duration_seconds INTEGER')
159
176
 
177
+ # Check if the progress column exists, add it if missing
178
+ if 'progress' not in columns:
179
+ print("Adding missing 'progress' column to research_history table")
180
+ cursor.execute('ALTER TABLE research_history ADD COLUMN progress INTEGER')
181
+
182
+ # Enable foreign key support
183
+ cursor.execute('PRAGMA foreign_keys = ON')
184
+
160
185
  conn.commit()
161
186
  conn.close()
162
-
187
+
163
188
  # Helper function to calculate duration between created_at and completed_at timestamps
164
189
  def calculate_duration(created_at_str):
165
190
  """
@@ -200,6 +225,88 @@ def calculate_duration(created_at_str):
200
225
 
201
226
  return duration_seconds
202
227
 
228
+ # Add these helper functions after the calculate_duration function
229
+
230
+
231
+ def add_log_to_db(research_id, message, log_type='info', progress=None, metadata=None):
232
+ """
233
+ Store a log entry in the database
234
+
235
+ Args:
236
+ research_id: ID of the research
237
+ message: Log message text
238
+ log_type: Type of log (info, error, milestone)
239
+ progress: Progress percentage (0-100)
240
+ metadata: Additional metadata as dictionary (will be stored as JSON)
241
+ """
242
+ try:
243
+ timestamp = datetime.utcnow().isoformat()
244
+ metadata_json = json.dumps(metadata) if metadata else None
245
+
246
+ conn = sqlite3.connect(DB_PATH)
247
+ cursor = conn.cursor()
248
+ cursor.execute(
249
+ 'INSERT INTO research_logs (research_id, timestamp, message, log_type, progress, metadata) '
250
+ 'VALUES (?, ?, ?, ?, ?, ?)',
251
+ (research_id, timestamp, message, log_type, progress, metadata_json)
252
+ )
253
+ conn.commit()
254
+ conn.close()
255
+ return True
256
+ except Exception as e:
257
+ print(f"Error adding log to database: {str(e)}")
258
+ print(traceback.format_exc())
259
+ return False
260
+
261
+ def get_logs_for_research(research_id):
262
+ """
263
+ Retrieve all logs for a specific research ID
264
+
265
+ Args:
266
+ research_id: ID of the research
267
+
268
+ Returns:
269
+ List of log entries as dictionaries
270
+ """
271
+ try:
272
+ conn = sqlite3.connect(DB_PATH)
273
+ conn.row_factory = sqlite3.Row
274
+ cursor = conn.cursor()
275
+ cursor.execute(
276
+ 'SELECT * FROM research_logs WHERE research_id = ? ORDER BY timestamp ASC',
277
+ (research_id,)
278
+ )
279
+ results = cursor.fetchall()
280
+ conn.close()
281
+
282
+ logs = []
283
+ for result in results:
284
+ log_entry = dict(result)
285
+ # Parse metadata JSON if it exists
286
+ if log_entry.get('metadata'):
287
+ try:
288
+ log_entry['metadata'] = json.loads(log_entry['metadata'])
289
+ except:
290
+ log_entry['metadata'] = {}
291
+ else:
292
+ log_entry['metadata'] = {}
293
+
294
+ # Convert entry for frontend consumption
295
+ formatted_entry = {
296
+ 'time': log_entry['timestamp'],
297
+ 'message': log_entry['message'],
298
+ 'progress': log_entry['progress'],
299
+ 'metadata': log_entry['metadata'],
300
+ 'type': log_entry['log_type']
301
+ }
302
+ logs.append(formatted_entry)
303
+
304
+ return logs
305
+ except Exception as e:
306
+ print(f"Error retrieving logs from database: {str(e)}")
307
+ print(traceback.format_exc())
308
+ return []
309
+
203
310
  # Initialize the database on startup
204
311
  def initialize():
205
312
  init_db()
@@ -430,15 +537,23 @@ def get_research_details(research_id):
430
537
  if not result:
431
538
  return jsonify({'status': 'error', 'message': 'Research not found'}), 404
432
539
 
433
- try:
434
- # Get the progress log
435
- progress_log = json.loads(result.get('progress_log', '[]'))
436
- except:
437
- progress_log = []
438
-
439
- # If this is an active research, get the latest log
540
+ # Get logs from the dedicated log database
541
+ logs = get_logs_for_research(research_id)
542
+
543
+ # If this is an active research, merge with any in-memory logs
440
544
  if research_id in active_research:
441
- progress_log = active_research[research_id]['log']
545
+ # Use the logs from memory temporarily until they're saved to the database
546
+ memory_logs = active_research[research_id]['log']
547
+
548
+ # Filter out logs that are already in the database by timestamp
549
+ db_timestamps = {log['time'] for log in logs}
550
+ unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
551
+
552
+ # Add unique memory logs to our return list
553
+ logs.extend(unique_memory_logs)
554
+
555
+ # Sort logs by timestamp
556
+ logs.sort(key=lambda x: x['time'])
442
557
 
443
558
  return jsonify({
444
559
  'status': 'success',
@@ -449,7 +564,7 @@ def get_research_details(research_id):
449
564
  'progress': active_research.get(research_id, {}).get('progress', 100 if result.get('status') == 'completed' else 0),
450
565
  'created_at': result.get('created_at'),
451
566
  'completed_at': result.get('completed_at'),
452
- 'log': progress_log
567
+ 'log': logs
453
568
  })
454
569
 
455
570
  @research_bp.route('/api/report/<int:research_id>')
@@ -568,6 +683,19 @@ def cleanup_research_resources(research_id):
568
683
  """Clean up resources for a completed research"""
569
684
  print(f"Cleaning up resources for research {research_id}")
570
685
 
686
+ # Get the current status from the database to determine the final status message
687
+ current_status = "completed" # Default
688
+ try:
689
+ conn = sqlite3.connect(DB_PATH)
690
+ cursor = conn.cursor()
691
+ cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
692
+ result = cursor.fetchone()
693
+ if result and result[0]:
694
+ current_status = result[0]
695
+ conn.close()
696
+ except Exception as e:
697
+ print(f"Error retrieving research status during cleanup: {e}")
698
+
571
699
  # Remove from active research
572
700
  if research_id in active_research:
573
701
  del active_research[research_id]
@@ -576,16 +704,24 @@ def cleanup_research_resources(research_id):
576
704
  if research_id in termination_flags:
577
705
  del termination_flags[research_id]
578
706
 
579
- # Send a final message to any remaining subscribers with explicit completed status
707
+ # Send a final message to any remaining subscribers with explicit status
580
708
  if research_id in socket_subscriptions and socket_subscriptions[research_id]:
581
- final_message = {
582
- 'status': 'completed',
583
- 'message': 'Research process has ended and resources have been cleaned up',
584
- 'progress': 100,
585
- }
709
+ # Use the proper status message based on database status
710
+ if current_status == 'suspended' or current_status == 'failed':
711
+ final_message = {
712
+ 'status': current_status,
713
+ 'message': f'Research was {current_status}',
714
+ 'progress': 0, # For suspended research, show 0% not 100%
715
+ }
716
+ else:
717
+ final_message = {
718
+ 'status': 'completed',
719
+ 'message': 'Research process has ended and resources have been cleaned up',
720
+ 'progress': 100,
721
+ }
586
722
 
587
723
  try:
588
- print(f"Sending final completion socket message for research {research_id}")
724
+ print(f"Sending final {current_status} socket message for research {research_id}")
589
725
  # Use emit to all, not just subscribers
590
726
  socketio.emit(f'research_progress_{research_id}', final_message)
591
727
 
@@ -605,11 +741,52 @@ def cleanup_research_resources(research_id):
605
741
  # Don't immediately remove subscriptions - let clients disconnect naturally
606
742
 
607
743
  def run_research_process(research_id, query, mode):
744
+ """Run the research process in the background for a given research ID"""
608
745
  try:
609
- system = AdvancedSearchSystem()
746
+ # Check if this research has been terminated before we even start
747
+ if research_id in termination_flags and termination_flags[research_id]:
748
+ print(f"Research {research_id} was terminated before starting")
749
+ cleanup_research_resources(research_id)
750
+ return
751
+
752
+ print(f"Starting research process for ID {research_id}, query: {query}")
610
753
 
754
+ # Set up the AI Context Manager
755
+ output_dir = os.path.join(OUTPUT_DIR, f"research_{research_id}")
756
+ os.makedirs(output_dir, exist_ok=True)
757
+
611
758
  # Set up progress callback
612
759
  def progress_callback(message, progress_percent, metadata):
760
+ # FREQUENT TERMINATION CHECK: Check for termination at each callback
761
+ if research_id in termination_flags and termination_flags[research_id]:
762
+ # Explicitly set the status to suspended in the database
763
+ conn = sqlite3.connect(DB_PATH)
764
+ cursor = conn.cursor()
765
+ # Calculate duration up to termination point - using UTC consistently
766
+ now = datetime.utcnow()
767
+ completed_at = now.isoformat()
768
+
769
+ # Get the start time from the database
770
+ cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
771
+ result = cursor.fetchone()
772
+
773
+ # Calculate the duration
774
+ duration_seconds = calculate_duration(result[0]) if result and result[0] else None
775
+
776
+ # Update the database with suspended status
777
+ cursor.execute(
778
+ 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
779
+ ('suspended', completed_at, duration_seconds, research_id)
780
+ )
781
+ conn.commit()
782
+ conn.close()
783
+
784
+ # Clean up resources
785
+ cleanup_research_resources(research_id)
786
+
787
+ # Raise exception to exit the process
788
+ raise Exception("Research was terminated by user")
789
+
613
790
  timestamp = datetime.utcnow().isoformat()
614
791
 
615
792
  # Adjust progress based on research mode
@@ -646,7 +823,32 @@ def run_research_process(research_id, query, mode):
646
823
 
647
824
  # Check if termination was requested
648
825
  if research_id in termination_flags and termination_flags[research_id]:
649
- # Clean up and exit
826
+ # Explicitly set the status to suspended in the database
827
+ conn = sqlite3.connect(DB_PATH)
828
+ cursor = conn.cursor()
829
+ # Calculate duration up to termination point - using UTC consistently
830
+ now = datetime.utcnow()
831
+ completed_at = now.isoformat()
832
+
833
+ # Get the start time from the database
834
+ cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
835
+ result = cursor.fetchone()
836
+
837
+ # Calculate the duration
838
+ duration_seconds = calculate_duration(result[0]) if result and result[0] else None
839
+
840
+ # Update the database with suspended status
841
+ cursor.execute(
842
+ 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
843
+ ('suspended', completed_at, duration_seconds, research_id)
844
+ )
845
+ conn.commit()
846
+ conn.close()
847
+
848
+ # Clean up resources
849
+ cleanup_research_resources(research_id)
850
+
851
+ # Raise exception to exit the process
650
852
  raise Exception("Research was terminated by user")
651
853
 
652
854
  # Update active research record
@@ -655,59 +857,111 @@ def run_research_process(research_id, query, mode):
655
857
  if adjusted_progress is not None:
656
858
  active_research[research_id]['progress'] = adjusted_progress
657
859
 
658
- # Save to database (but not too frequently)
659
- if adjusted_progress is None or adjusted_progress % 10 == 0 or metadata.get('phase') in ['complete', 'iteration_complete', 'output_generation', 'report_generation', 'report_complete']:
660
- conn = sqlite3.connect(DB_PATH)
661
- cursor = conn.cursor()
860
+ # Determine log type for database storage
861
+ log_type = 'info'
862
+ if metadata and metadata.get('phase'):
863
+ phase = metadata.get('phase')
864
+ if phase in ['complete', 'iteration_complete']:
865
+ log_type = 'milestone'
866
+ elif phase == 'error' or 'error' in message.lower():
867
+ log_type = 'error'
868
+
869
+ # Always save logs to the new research_logs table
870
+ add_log_to_db(
871
+ research_id,
872
+ message,
873
+ log_type=log_type,
874
+ progress=adjusted_progress,
875
+ metadata=metadata
876
+ )
877
+
878
+ # Update progress in the research_history table (for backward compatibility)
879
+ conn = sqlite3.connect(DB_PATH)
880
+ cursor = conn.cursor()
881
+
882
+ # Update the progress and log separately to avoid race conditions with reading/writing the log
883
+ if adjusted_progress is not None:
662
884
  cursor.execute(
663
- 'SELECT progress_log FROM research_history WHERE id = ?',
664
- (research_id,)
885
+ 'UPDATE research_history SET progress = ? WHERE id = ?',
886
+ (adjusted_progress, research_id)
665
887
  )
666
- result = cursor.fetchone()
667
- if result:
668
- try:
669
- current_log = json.loads(result[0])
670
- except:
671
- current_log = []
672
- current_log.append(log_entry)
673
- cursor.execute(
674
- 'UPDATE research_history SET progress_log = ? WHERE id = ?',
675
- (json.dumps(current_log), research_id)
676
- )
677
- conn.commit()
678
- conn.close()
679
888
 
680
- # Emit socket event with try/except block to handle connection issues
889
+ # Add the log entry to the progress_log
890
+ cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
891
+ log_result = cursor.fetchone()
892
+
893
+ if log_result:
894
+ try:
895
+ current_log = json.loads(log_result[0])
896
+ except:
897
+ current_log = []
898
+
899
+ current_log.append(log_entry)
900
+ cursor.execute(
901
+ 'UPDATE research_history SET progress_log = ? WHERE id = ?',
902
+ (json.dumps(current_log), research_id)
903
+ )
904
+
905
+ conn.commit()
906
+ conn.close()
907
+
908
+ # Emit a socket event
681
909
  try:
910
+ # Basic event data
682
911
  event_data = {
683
- 'progress': adjusted_progress,
684
912
  'message': message,
685
- 'status': 'in_progress',
686
- 'log_entry': log_entry
913
+ 'progress': adjusted_progress
687
914
  }
688
915
 
689
- # Emit to the specific research channel
916
+ # Add log entry in full format for detailed logging on client
917
+ if metadata:
918
+ event_data['log_entry'] = log_entry
919
+
920
+ # Send to all subscribers and broadcast channel
690
921
  socketio.emit(f'research_progress_{research_id}', event_data)
691
922
 
692
- # Also emit to specific subscribers if available
693
- if research_id in socket_subscriptions and socket_subscriptions[research_id]:
923
+ if research_id in socket_subscriptions:
694
924
  for sid in socket_subscriptions[research_id]:
695
925
  try:
696
926
  socketio.emit(
697
927
  f'research_progress_{research_id}',
698
- event_data,
928
+ event_data,
699
929
  room=sid
700
930
  )
701
- except Exception as sub_err:
702
- print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
931
+ except Exception as err:
932
+ print(f"Error emitting to subscriber {sid}: {str(err)}")
933
+ except Exception as e:
934
+ print(f"Socket emit error (non-critical): {str(e)}")
703
935
 
704
- except Exception as socket_error:
705
- # Log socket error but continue with the research process
706
- print(f"Socket emit error (non-critical): {str(socket_error)}")
707
-
708
- return not (research_id in termination_flags and termination_flags[research_id])
709
-
936
+ # FUNCTION TO CHECK TERMINATION DURING LONG-RUNNING OPERATIONS
937
+ def check_termination():
938
+ if research_id in termination_flags and termination_flags[research_id]:
939
+ # Explicitly set the status to suspended in the database
940
+ conn = sqlite3.connect(DB_PATH)
941
+ cursor = conn.cursor()
942
+ now = datetime.utcnow()
943
+ completed_at = now.isoformat()
944
+
945
+ cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
946
+ result = cursor.fetchone()
947
+ duration_seconds = calculate_duration(result[0]) if result and result[0] else None
948
+
949
+ cursor.execute(
950
+ 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
951
+ ('suspended', completed_at, duration_seconds, research_id)
952
+ )
953
+ conn.commit()
954
+ conn.close()
955
+
956
+ # Clean up resources
957
+ cleanup_research_resources(research_id)
958
+
959
+ # Raise exception to exit the process
960
+ raise Exception("Research was terminated by user during long-running operation")
961
+ return False # Not terminated
962
+
710
963
  # Set the progress callback in the system
964
+ system = AdvancedSearchSystem()
711
965
  system.set_progress_callback(progress_callback)
712
966
 
713
967
  # Run the search
@@ -965,7 +1219,7 @@ def run_research_process(research_id, query, mode):
965
1219
  except Exception as inner_e:
966
1220
  print(f"Error in error handler: {str(inner_e)}")
967
1221
 
968
- # Clean up resources - moved to a separate function
1222
+ # Clean up resources - moved to a separate function for reuse
969
1223
  cleanup_research_resources(research_id)
970
1224
 
971
1225
  @research_bp.route('/api/research/<int:research_id>/terminate', methods=['POST'])
@@ -1002,16 +1256,30 @@ def terminate_research(research_id):
1002
1256
 
1003
1257
  # Log the termination request - using UTC timestamp
1004
1258
  timestamp = datetime.utcnow().isoformat()
1259
+ termination_message = "Research termination requested by user"
1260
+ current_progress = active_research[research_id]['progress']
1261
+
1262
+ # Create log entry
1005
1263
  log_entry = {
1006
1264
  "time": timestamp,
1007
- "message": "Research termination requested by user",
1008
- "progress": active_research[research_id]['progress'],
1265
+ "message": termination_message,
1266
+ "progress": current_progress,
1009
1267
  "metadata": {"phase": "termination"}
1010
1268
  }
1011
1269
 
1270
+ # Add to in-memory log
1012
1271
  active_research[research_id]['log'].append(log_entry)
1013
1272
 
1014
- # Update the log in the database
1273
+ # Add to database log
1274
+ add_log_to_db(
1275
+ research_id,
1276
+ termination_message,
1277
+ log_type='milestone',
1278
+ progress=current_progress,
1279
+ metadata={"phase": "termination"}
1280
+ )
1281
+
1282
+ # Update the log in the database (old way for backward compatibility)
1015
1283
  cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
1016
1284
  log_result = cursor.fetchone()
1017
1285
  if log_result:
@@ -1025,14 +1293,16 @@ def terminate_research(research_id):
1025
1293
  (json.dumps(current_log), research_id)
1026
1294
  )
1027
1295
 
1296
+ # IMMEDIATELY update the status to 'suspended' to avoid race conditions
1297
+ cursor.execute('UPDATE research_history SET status = ? WHERE id = ?', ('suspended', research_id))
1028
1298
  conn.commit()
1029
1299
  conn.close()
1030
1300
 
1031
1301
  # Emit a socket event for the termination request
1032
1302
  try:
1033
1303
  event_data = {
1034
- 'status': 'terminating',
1035
- 'message': 'Research termination requested by user'
1304
+ 'status': 'suspended', # Changed from 'terminating' to 'suspended'
1305
+ 'message': 'Research was suspended by user request'
1036
1306
  }
1037
1307
 
1038
1308
  socketio.emit(f'research_progress_{research_id}', event_data)
@@ -1090,8 +1360,6 @@ def delete_research(research_id):
1090
1360
  conn.close()
1091
1361
 
1092
1362
  return jsonify({'status': 'success'})
1093
-
1094
- # Main settings page that links to specialized config pages
1095
1363
  @research_bp.route('/settings', methods=['GET'])
1096
1364
  def settings_page():
1097
1365
  """Main settings dashboard with links to specialized config pages"""
@@ -1333,6 +1601,47 @@ def open_file_location():
1333
1601
  return redirect(url_for('research.collections_config_page'))
1334
1602
  else:
1335
1603
  return redirect(url_for('research.main_config_page'))
1604
+
1605
+ @research_bp.route('/api/research/<int:research_id>/logs')
1606
+ def get_research_logs(research_id):
1607
+ """Get logs for a specific research ID"""
1608
+ # First check if the research exists
1609
+ conn = sqlite3.connect(DB_PATH)
1610
+ conn.row_factory = sqlite3.Row
1611
+ cursor = conn.cursor()
1612
+ cursor.execute('SELECT id FROM research_history WHERE id = ?', (research_id,))
1613
+ result = cursor.fetchone()
1614
+ conn.close()
1615
+
1616
+ if not result:
1617
+ return jsonify({'status': 'error', 'message': 'Research not found'}), 404
1618
+
1619
+ # Retrieve logs from the database
1620
+ logs = get_logs_for_research(research_id)
1621
+
1622
+ # Add any current logs from memory if this is an active research
1623
+ if research_id in active_research and active_research[research_id].get('log'):
1624
+ # Use the logs from memory temporarily until they're saved to the database
1625
+ memory_logs = active_research[research_id]['log']
1626
+
1627
+ # Filter out logs that are already in the database
1628
+ # We'll compare timestamps to avoid duplicates
1629
+ db_timestamps = {log['time'] for log in logs}
1630
+ unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
1631
+
1632
+ # Add unique memory logs to our return list
1633
+ logs.extend(unique_memory_logs)
1634
+
1635
+ # Sort logs by timestamp
1636
+ logs.sort(key=lambda x: x['time'])
1637
+
1638
+ return jsonify({
1639
+ 'status': 'success',
1640
+ 'logs': logs
1641
+ })
1642
+
1643
+
1644
+
1336
1645
  # Register the blueprint
1337
1646
  app.register_blueprint(research_bp)
1338
1647