local-deep-research 0.1.0__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/defaults/main.toml +5 -0
- local_deep_research/search_system.py +98 -38
- local_deep_research/web/app.py +721 -169
- local_deep_research/web/static/css/styles.css +270 -5
- local_deep_research/web/static/js/app.js +2247 -562
- local_deep_research/web/templates/index.html +37 -1
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +454 -0
- local_deep_research/web_search_engines/search_engine_factory.py +20 -1
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.12.dist-info}/METADATA +24 -6
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.12.dist-info}/RECORD +14 -13
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.12.dist-info}/WHEEL +1 -1
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.12.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.12.dist-info/licenses}/LICENSE +0 -0
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.12.dist-info}/top_level.txt +0 -0
local_deep_research/web/app.py
CHANGED
@@ -90,6 +90,9 @@ termination_flags = {}
|
|
90
90
|
# Database setup
|
91
91
|
DB_PATH = 'research_history.db'
|
92
92
|
|
93
|
+
# Output directory for research results
|
94
|
+
OUTPUT_DIR = 'research_outputs'
|
95
|
+
|
93
96
|
# Add Content Security Policy headers to allow Socket.IO to function
|
94
97
|
@app.after_request
|
95
98
|
def add_security_headers(response):
|
@@ -128,7 +131,6 @@ def handle_websocket_requests():
|
|
128
131
|
# Return empty response to prevent further processing
|
129
132
|
return '', 200
|
130
133
|
|
131
|
-
# Initialize the database
|
132
134
|
def init_db():
|
133
135
|
conn = sqlite3.connect(DB_PATH)
|
134
136
|
cursor = conn.cursor()
|
@@ -145,7 +147,22 @@ def init_db():
|
|
145
147
|
duration_seconds INTEGER,
|
146
148
|
report_path TEXT,
|
147
149
|
metadata TEXT,
|
148
|
-
progress_log TEXT
|
150
|
+
progress_log TEXT,
|
151
|
+
progress INTEGER
|
152
|
+
)
|
153
|
+
''')
|
154
|
+
|
155
|
+
# Create a dedicated table for research logs
|
156
|
+
cursor.execute('''
|
157
|
+
CREATE TABLE IF NOT EXISTS research_logs (
|
158
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
159
|
+
research_id INTEGER NOT NULL,
|
160
|
+
timestamp TEXT NOT NULL,
|
161
|
+
message TEXT NOT NULL,
|
162
|
+
log_type TEXT NOT NULL,
|
163
|
+
progress INTEGER,
|
164
|
+
metadata TEXT,
|
165
|
+
FOREIGN KEY (research_id) REFERENCES research_history (id) ON DELETE CASCADE
|
149
166
|
)
|
150
167
|
''')
|
151
168
|
|
@@ -157,9 +174,17 @@ def init_db():
|
|
157
174
|
print("Adding missing 'duration_seconds' column to research_history table")
|
158
175
|
cursor.execute('ALTER TABLE research_history ADD COLUMN duration_seconds INTEGER')
|
159
176
|
|
177
|
+
# Check if the progress column exists, add it if missing
|
178
|
+
if 'progress' not in columns:
|
179
|
+
print("Adding missing 'progress' column to research_history table")
|
180
|
+
cursor.execute('ALTER TABLE research_history ADD COLUMN progress INTEGER')
|
181
|
+
|
182
|
+
# Enable foreign key support
|
183
|
+
cursor.execute('PRAGMA foreign_keys = ON')
|
184
|
+
|
160
185
|
conn.commit()
|
161
186
|
conn.close()
|
162
|
-
|
187
|
+
|
163
188
|
# Helper function to calculate duration between created_at and completed_at timestamps
|
164
189
|
def calculate_duration(created_at_str):
|
165
190
|
"""
|
@@ -200,6 +225,88 @@ def calculate_duration(created_at_str):
|
|
200
225
|
|
201
226
|
return duration_seconds
|
202
227
|
|
228
|
+
# Add these helper functions after the calculate_duration function
|
229
|
+
|
230
|
+
|
231
|
+
def add_log_to_db(research_id, message, log_type='info', progress=None, metadata=None):
|
232
|
+
"""
|
233
|
+
Store a log entry in the database
|
234
|
+
|
235
|
+
Args:
|
236
|
+
research_id: ID of the research
|
237
|
+
message: Log message text
|
238
|
+
log_type: Type of log (info, error, milestone)
|
239
|
+
progress: Progress percentage (0-100)
|
240
|
+
metadata: Additional metadata as dictionary (will be stored as JSON)
|
241
|
+
"""
|
242
|
+
try:
|
243
|
+
timestamp = datetime.utcnow().isoformat()
|
244
|
+
metadata_json = json.dumps(metadata) if metadata else None
|
245
|
+
|
246
|
+
conn = sqlite3.connect(DB_PATH)
|
247
|
+
cursor = conn.cursor()
|
248
|
+
cursor.execute(
|
249
|
+
'INSERT INTO research_logs (research_id, timestamp, message, log_type, progress, metadata) '
|
250
|
+
'VALUES (?, ?, ?, ?, ?, ?)',
|
251
|
+
(research_id, timestamp, message, log_type, progress, metadata_json)
|
252
|
+
)
|
253
|
+
conn.commit()
|
254
|
+
conn.close()
|
255
|
+
return True
|
256
|
+
except Exception as e:
|
257
|
+
print(f"Error adding log to database: {str(e)}")
|
258
|
+
print(traceback.format_exc())
|
259
|
+
return False
|
260
|
+
|
261
|
+
def get_logs_for_research(research_id):
|
262
|
+
"""
|
263
|
+
Retrieve all logs for a specific research ID
|
264
|
+
|
265
|
+
Args:
|
266
|
+
research_id: ID of the research
|
267
|
+
|
268
|
+
Returns:
|
269
|
+
List of log entries as dictionaries
|
270
|
+
"""
|
271
|
+
try:
|
272
|
+
conn = sqlite3.connect(DB_PATH)
|
273
|
+
conn.row_factory = sqlite3.Row
|
274
|
+
cursor = conn.cursor()
|
275
|
+
cursor.execute(
|
276
|
+
'SELECT * FROM research_logs WHERE research_id = ? ORDER BY timestamp ASC',
|
277
|
+
(research_id,)
|
278
|
+
)
|
279
|
+
results = cursor.fetchall()
|
280
|
+
conn.close()
|
281
|
+
|
282
|
+
logs = []
|
283
|
+
for result in results:
|
284
|
+
log_entry = dict(result)
|
285
|
+
# Parse metadata JSON if it exists
|
286
|
+
if log_entry.get('metadata'):
|
287
|
+
try:
|
288
|
+
log_entry['metadata'] = json.loads(log_entry['metadata'])
|
289
|
+
except:
|
290
|
+
log_entry['metadata'] = {}
|
291
|
+
else:
|
292
|
+
log_entry['metadata'] = {}
|
293
|
+
|
294
|
+
# Convert entry for frontend consumption
|
295
|
+
formatted_entry = {
|
296
|
+
'time': log_entry['timestamp'],
|
297
|
+
'message': log_entry['message'],
|
298
|
+
'progress': log_entry['progress'],
|
299
|
+
'metadata': log_entry['metadata'],
|
300
|
+
'type': log_entry['log_type']
|
301
|
+
}
|
302
|
+
logs.append(formatted_entry)
|
303
|
+
|
304
|
+
return logs
|
305
|
+
except Exception as e:
|
306
|
+
print(f"Error retrieving logs from database: {str(e)}")
|
307
|
+
print(traceback.format_exc())
|
308
|
+
return []
|
309
|
+
|
203
310
|
# Initialize the database on startup
|
204
311
|
def initialize():
|
205
312
|
init_db()
|
@@ -320,12 +427,39 @@ def start_research():
|
|
320
427
|
if not query:
|
321
428
|
return jsonify({'status': 'error', 'message': 'Query is required'}), 400
|
322
429
|
|
323
|
-
# Check if there's any active research
|
430
|
+
# Check if there's any active research that's actually still running
|
324
431
|
if active_research:
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
432
|
+
# Verify each active research is still valid
|
433
|
+
stale_research_ids = []
|
434
|
+
for research_id, research_data in list(active_research.items()):
|
435
|
+
# Check database status
|
436
|
+
conn = sqlite3.connect(DB_PATH)
|
437
|
+
cursor = conn.cursor()
|
438
|
+
cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
|
439
|
+
result = cursor.fetchone()
|
440
|
+
conn.close()
|
441
|
+
|
442
|
+
# If the research doesn't exist in DB or is not in_progress, it's stale
|
443
|
+
if not result or result[0] != 'in_progress':
|
444
|
+
stale_research_ids.append(research_id)
|
445
|
+
# Also check if thread is still alive
|
446
|
+
elif not research_data.get('thread') or not research_data.get('thread').is_alive():
|
447
|
+
stale_research_ids.append(research_id)
|
448
|
+
|
449
|
+
# Clean up any stale research processes
|
450
|
+
for stale_id in stale_research_ids:
|
451
|
+
print(f"Cleaning up stale research process: {stale_id}")
|
452
|
+
if stale_id in active_research:
|
453
|
+
del active_research[stale_id]
|
454
|
+
if stale_id in termination_flags:
|
455
|
+
del termination_flags[stale_id]
|
456
|
+
|
457
|
+
# After cleanup, check if there's still active research
|
458
|
+
if active_research:
|
459
|
+
return jsonify({
|
460
|
+
'status': 'error',
|
461
|
+
'message': 'Another research is already in progress. Please wait for it to complete.'
|
462
|
+
}), 409
|
329
463
|
|
330
464
|
# Create a record in the database with explicit UTC timestamp
|
331
465
|
created_at = datetime.utcnow().isoformat()
|
@@ -403,15 +537,23 @@ def get_research_details(research_id):
|
|
403
537
|
if not result:
|
404
538
|
return jsonify({'status': 'error', 'message': 'Research not found'}), 404
|
405
539
|
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
progress_log = []
|
411
|
-
|
412
|
-
# If this is an active research, get the latest log
|
540
|
+
# Get logs from the dedicated log database
|
541
|
+
logs = get_logs_for_research(research_id)
|
542
|
+
|
543
|
+
# If this is an active research, merge with any in-memory logs
|
413
544
|
if research_id in active_research:
|
414
|
-
|
545
|
+
# Use the logs from memory temporarily until they're saved to the database
|
546
|
+
memory_logs = active_research[research_id]['log']
|
547
|
+
|
548
|
+
# Filter out logs that are already in the database by timestamp
|
549
|
+
db_timestamps = {log['time'] for log in logs}
|
550
|
+
unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
|
551
|
+
|
552
|
+
# Add unique memory logs to our return list
|
553
|
+
logs.extend(unique_memory_logs)
|
554
|
+
|
555
|
+
# Sort logs by timestamp
|
556
|
+
logs.sort(key=lambda x: x['time'])
|
415
557
|
|
416
558
|
return jsonify({
|
417
559
|
'status': 'success',
|
@@ -422,7 +564,7 @@ def get_research_details(research_id):
|
|
422
564
|
'progress': active_research.get(research_id, {}).get('progress', 100 if result.get('status') == 'completed' else 0),
|
423
565
|
'created_at': result.get('created_at'),
|
424
566
|
'completed_at': result.get('completed_at'),
|
425
|
-
'log':
|
567
|
+
'log': logs
|
426
568
|
})
|
427
569
|
|
428
570
|
@research_bp.route('/api/report/<int:research_id>')
|
@@ -467,6 +609,7 @@ def handle_disconnect():
|
|
467
609
|
subscribers.remove(request.sid)
|
468
610
|
if not subscribers:
|
469
611
|
socket_subscriptions.pop(research_id, None)
|
612
|
+
print(f"Removed empty subscription for research {research_id}")
|
470
613
|
except Exception as e:
|
471
614
|
print(f"Error handling disconnect: {e}")
|
472
615
|
|
@@ -474,23 +617,54 @@ def handle_disconnect():
|
|
474
617
|
def handle_subscribe(data):
|
475
618
|
research_id = data.get('research_id')
|
476
619
|
if research_id:
|
477
|
-
if
|
478
|
-
|
479
|
-
|
480
|
-
|
620
|
+
# First check if this research is still active
|
621
|
+
conn = sqlite3.connect(DB_PATH)
|
622
|
+
cursor = conn.cursor()
|
623
|
+
cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
|
624
|
+
result = cursor.fetchone()
|
625
|
+
conn.close()
|
481
626
|
|
482
|
-
#
|
483
|
-
if
|
484
|
-
|
485
|
-
|
627
|
+
# Only allow subscription to valid research
|
628
|
+
if result:
|
629
|
+
status = result[0]
|
630
|
+
|
631
|
+
# Initialize subscription set if needed
|
632
|
+
if research_id not in socket_subscriptions:
|
633
|
+
socket_subscriptions[research_id] = set()
|
634
|
+
|
635
|
+
# Add this client to the subscribers
|
636
|
+
socket_subscriptions[research_id].add(request.sid)
|
637
|
+
print(f"Client {request.sid} subscribed to research {research_id}")
|
486
638
|
|
487
|
-
if
|
639
|
+
# Send current status immediately if available
|
640
|
+
if research_id in active_research:
|
641
|
+
progress = active_research[research_id]['progress']
|
642
|
+
latest_log = active_research[research_id]['log'][-1] if active_research[research_id]['log'] else None
|
643
|
+
|
644
|
+
if latest_log:
|
645
|
+
emit(f'research_progress_{research_id}', {
|
646
|
+
'progress': progress,
|
647
|
+
'message': latest_log.get('message', 'Processing...'),
|
648
|
+
'status': 'in_progress',
|
649
|
+
'log_entry': latest_log
|
650
|
+
})
|
651
|
+
elif status in ['completed', 'failed', 'suspended']:
|
652
|
+
# Send final status for completed research
|
488
653
|
emit(f'research_progress_{research_id}', {
|
489
|
-
'progress':
|
490
|
-
'message':
|
491
|
-
|
492
|
-
'
|
654
|
+
'progress': 100 if status == 'completed' else 0,
|
655
|
+
'message': 'Research completed successfully' if status == 'completed' else
|
656
|
+
'Research failed' if status == 'failed' else 'Research was suspended',
|
657
|
+
'status': status,
|
658
|
+
'log_entry': {
|
659
|
+
'time': datetime.utcnow().isoformat(),
|
660
|
+
'message': f'Research is {status}',
|
661
|
+
'progress': 100 if status == 'completed' else 0,
|
662
|
+
'metadata': {'phase': 'complete' if status == 'completed' else 'error'}
|
663
|
+
}
|
493
664
|
})
|
665
|
+
else:
|
666
|
+
# Research not found
|
667
|
+
emit('error', {'message': f'Research ID {research_id} not found'})
|
494
668
|
|
495
669
|
@socketio.on_error
|
496
670
|
def handle_socket_error(e):
|
@@ -504,88 +678,324 @@ def handle_default_error(e):
|
|
504
678
|
# Don't propagate exceptions to avoid crashing the server
|
505
679
|
return False
|
506
680
|
|
681
|
+
# Function to clean up resources for a completed research
|
682
|
+
def cleanup_research_resources(research_id):
|
683
|
+
"""Clean up resources for a completed research"""
|
684
|
+
print(f"Cleaning up resources for research {research_id}")
|
685
|
+
|
686
|
+
# Get the current status from the database to determine the final status message
|
687
|
+
current_status = "completed" # Default
|
688
|
+
try:
|
689
|
+
conn = sqlite3.connect(DB_PATH)
|
690
|
+
cursor = conn.cursor()
|
691
|
+
cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
|
692
|
+
result = cursor.fetchone()
|
693
|
+
if result and result[0]:
|
694
|
+
current_status = result[0]
|
695
|
+
conn.close()
|
696
|
+
except Exception as e:
|
697
|
+
print(f"Error retrieving research status during cleanup: {e}")
|
698
|
+
|
699
|
+
# Remove from active research
|
700
|
+
if research_id in active_research:
|
701
|
+
del active_research[research_id]
|
702
|
+
|
703
|
+
# Remove from termination flags
|
704
|
+
if research_id in termination_flags:
|
705
|
+
del termination_flags[research_id]
|
706
|
+
|
707
|
+
# Send a final message to any remaining subscribers with explicit status
|
708
|
+
if research_id in socket_subscriptions and socket_subscriptions[research_id]:
|
709
|
+
# Use the proper status message based on database status
|
710
|
+
if current_status == 'suspended' or current_status == 'failed':
|
711
|
+
final_message = {
|
712
|
+
'status': current_status,
|
713
|
+
'message': f'Research was {current_status}',
|
714
|
+
'progress': 0, # For suspended research, show 0% not 100%
|
715
|
+
}
|
716
|
+
else:
|
717
|
+
final_message = {
|
718
|
+
'status': 'completed',
|
719
|
+
'message': 'Research process has ended and resources have been cleaned up',
|
720
|
+
'progress': 100,
|
721
|
+
}
|
722
|
+
|
723
|
+
try:
|
724
|
+
print(f"Sending final {current_status} socket message for research {research_id}")
|
725
|
+
# Use emit to all, not just subscribers
|
726
|
+
socketio.emit(f'research_progress_{research_id}', final_message)
|
727
|
+
|
728
|
+
# Also emit to specific subscribers
|
729
|
+
for sid in socket_subscriptions[research_id]:
|
730
|
+
try:
|
731
|
+
socketio.emit(
|
732
|
+
f'research_progress_{research_id}',
|
733
|
+
final_message,
|
734
|
+
room=sid
|
735
|
+
)
|
736
|
+
except Exception as sub_err:
|
737
|
+
print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
|
738
|
+
except Exception as e:
|
739
|
+
print(f"Error sending final cleanup message: {e}")
|
740
|
+
|
741
|
+
# Don't immediately remove subscriptions - let clients disconnect naturally
|
742
|
+
|
507
743
|
def run_research_process(research_id, query, mode):
|
744
|
+
"""Run the research process in the background for a given research ID"""
|
508
745
|
try:
|
509
|
-
|
746
|
+
# Check if this research has been terminated before we even start
|
747
|
+
if research_id in termination_flags and termination_flags[research_id]:
|
748
|
+
print(f"Research {research_id} was terminated before starting")
|
749
|
+
cleanup_research_resources(research_id)
|
750
|
+
return
|
751
|
+
|
752
|
+
print(f"Starting research process for ID {research_id}, query: {query}")
|
510
753
|
|
754
|
+
# Set up the AI Context Manager
|
755
|
+
output_dir = os.path.join(OUTPUT_DIR, f"research_{research_id}")
|
756
|
+
os.makedirs(output_dir, exist_ok=True)
|
757
|
+
|
511
758
|
# Set up progress callback
|
512
759
|
def progress_callback(message, progress_percent, metadata):
|
760
|
+
# FREQUENT TERMINATION CHECK: Check for termination at each callback
|
761
|
+
if research_id in termination_flags and termination_flags[research_id]:
|
762
|
+
# Explicitly set the status to suspended in the database
|
763
|
+
conn = sqlite3.connect(DB_PATH)
|
764
|
+
cursor = conn.cursor()
|
765
|
+
# Calculate duration up to termination point - using UTC consistently
|
766
|
+
now = datetime.utcnow()
|
767
|
+
completed_at = now.isoformat()
|
768
|
+
|
769
|
+
# Get the start time from the database
|
770
|
+
cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
|
771
|
+
result = cursor.fetchone()
|
772
|
+
|
773
|
+
# Calculate the duration
|
774
|
+
duration_seconds = calculate_duration(result[0]) if result and result[0] else None
|
775
|
+
|
776
|
+
# Update the database with suspended status
|
777
|
+
cursor.execute(
|
778
|
+
'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
|
779
|
+
('suspended', completed_at, duration_seconds, research_id)
|
780
|
+
)
|
781
|
+
conn.commit()
|
782
|
+
conn.close()
|
783
|
+
|
784
|
+
# Clean up resources
|
785
|
+
cleanup_research_resources(research_id)
|
786
|
+
|
787
|
+
# Raise exception to exit the process
|
788
|
+
raise Exception("Research was terminated by user")
|
789
|
+
|
513
790
|
timestamp = datetime.utcnow().isoformat()
|
791
|
+
|
792
|
+
# Adjust progress based on research mode
|
793
|
+
adjusted_progress = progress_percent
|
794
|
+
if mode == 'detailed' and metadata.get('phase') == 'output_generation':
|
795
|
+
# For detailed mode, we need to adjust the progress range
|
796
|
+
# because detailed reports take longer after the search phase
|
797
|
+
adjusted_progress = min(80, progress_percent)
|
798
|
+
elif mode == 'detailed' and metadata.get('phase') == 'report_generation':
|
799
|
+
# Scale the progress from 80% to 95% for the report generation phase
|
800
|
+
# Map progress_percent values (0-100%) to the (80-95%) range
|
801
|
+
if progress_percent is not None:
|
802
|
+
normalized = progress_percent / 100
|
803
|
+
adjusted_progress = 80 + (normalized * 15)
|
804
|
+
elif mode == 'quick' and metadata.get('phase') == 'output_generation':
|
805
|
+
# For quick mode, ensure we're at least at 85% during output generation
|
806
|
+
adjusted_progress = max(85, progress_percent)
|
807
|
+
# Map any further progress within output_generation to 85-95% range
|
808
|
+
if progress_percent is not None and progress_percent > 0:
|
809
|
+
normalized = progress_percent / 100
|
810
|
+
adjusted_progress = 85 + (normalized * 10)
|
811
|
+
|
812
|
+
# Don't let progress go backwards
|
813
|
+
if research_id in active_research and adjusted_progress is not None:
|
814
|
+
current_progress = active_research[research_id].get('progress', 0)
|
815
|
+
adjusted_progress = max(current_progress, adjusted_progress)
|
816
|
+
|
514
817
|
log_entry = {
|
515
818
|
"time": timestamp,
|
516
819
|
"message": message,
|
517
|
-
"progress":
|
820
|
+
"progress": adjusted_progress,
|
518
821
|
"metadata": metadata
|
519
822
|
}
|
520
823
|
|
521
824
|
# Check if termination was requested
|
522
825
|
if research_id in termination_flags and termination_flags[research_id]:
|
523
|
-
#
|
826
|
+
# Explicitly set the status to suspended in the database
|
827
|
+
conn = sqlite3.connect(DB_PATH)
|
828
|
+
cursor = conn.cursor()
|
829
|
+
# Calculate duration up to termination point - using UTC consistently
|
830
|
+
now = datetime.utcnow()
|
831
|
+
completed_at = now.isoformat()
|
832
|
+
|
833
|
+
# Get the start time from the database
|
834
|
+
cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
|
835
|
+
result = cursor.fetchone()
|
836
|
+
|
837
|
+
# Calculate the duration
|
838
|
+
duration_seconds = calculate_duration(result[0]) if result and result[0] else None
|
839
|
+
|
840
|
+
# Update the database with suspended status
|
841
|
+
cursor.execute(
|
842
|
+
'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
|
843
|
+
('suspended', completed_at, duration_seconds, research_id)
|
844
|
+
)
|
845
|
+
conn.commit()
|
846
|
+
conn.close()
|
847
|
+
|
848
|
+
# Clean up resources
|
849
|
+
cleanup_research_resources(research_id)
|
850
|
+
|
851
|
+
# Raise exception to exit the process
|
524
852
|
raise Exception("Research was terminated by user")
|
525
853
|
|
526
854
|
# Update active research record
|
527
855
|
if research_id in active_research:
|
528
856
|
active_research[research_id]['log'].append(log_entry)
|
529
|
-
if
|
530
|
-
active_research[research_id]['progress'] =
|
857
|
+
if adjusted_progress is not None:
|
858
|
+
active_research[research_id]['progress'] = adjusted_progress
|
531
859
|
|
532
|
-
#
|
533
|
-
|
534
|
-
|
535
|
-
|
860
|
+
# Determine log type for database storage
|
861
|
+
log_type = 'info'
|
862
|
+
if metadata and metadata.get('phase'):
|
863
|
+
phase = metadata.get('phase')
|
864
|
+
if phase in ['complete', 'iteration_complete']:
|
865
|
+
log_type = 'milestone'
|
866
|
+
elif phase == 'error' or 'error' in message.lower():
|
867
|
+
log_type = 'error'
|
868
|
+
|
869
|
+
# Always save logs to the new research_logs table
|
870
|
+
add_log_to_db(
|
871
|
+
research_id,
|
872
|
+
message,
|
873
|
+
log_type=log_type,
|
874
|
+
progress=adjusted_progress,
|
875
|
+
metadata=metadata
|
876
|
+
)
|
877
|
+
|
878
|
+
# Update progress in the research_history table (for backward compatibility)
|
879
|
+
conn = sqlite3.connect(DB_PATH)
|
880
|
+
cursor = conn.cursor()
|
881
|
+
|
882
|
+
# Update the progress and log separately to avoid race conditions with reading/writing the log
|
883
|
+
if adjusted_progress is not None:
|
536
884
|
cursor.execute(
|
537
|
-
'
|
538
|
-
(research_id
|
885
|
+
'UPDATE research_history SET progress = ? WHERE id = ?',
|
886
|
+
(adjusted_progress, research_id)
|
539
887
|
)
|
540
|
-
result = cursor.fetchone()
|
541
|
-
if result:
|
542
|
-
try:
|
543
|
-
current_log = json.loads(result[0])
|
544
|
-
except:
|
545
|
-
current_log = []
|
546
|
-
current_log.append(log_entry)
|
547
|
-
cursor.execute(
|
548
|
-
'UPDATE research_history SET progress_log = ? WHERE id = ?',
|
549
|
-
(json.dumps(current_log), research_id)
|
550
|
-
)
|
551
|
-
conn.commit()
|
552
|
-
conn.close()
|
553
888
|
|
554
|
-
#
|
889
|
+
# Add the log entry to the progress_log
|
890
|
+
cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
|
891
|
+
log_result = cursor.fetchone()
|
892
|
+
|
893
|
+
if log_result:
|
894
|
+
try:
|
895
|
+
current_log = json.loads(log_result[0])
|
896
|
+
except:
|
897
|
+
current_log = []
|
898
|
+
|
899
|
+
current_log.append(log_entry)
|
900
|
+
cursor.execute(
|
901
|
+
'UPDATE research_history SET progress_log = ? WHERE id = ?',
|
902
|
+
(json.dumps(current_log), research_id)
|
903
|
+
)
|
904
|
+
|
905
|
+
conn.commit()
|
906
|
+
conn.close()
|
907
|
+
|
908
|
+
# Emit a socket event
|
555
909
|
try:
|
910
|
+
# Basic event data
|
556
911
|
event_data = {
|
557
|
-
'progress': progress_percent,
|
558
912
|
'message': message,
|
559
|
-
'
|
560
|
-
'log_entry': log_entry
|
913
|
+
'progress': adjusted_progress
|
561
914
|
}
|
562
915
|
|
563
|
-
#
|
916
|
+
# Add log entry in full format for detailed logging on client
|
917
|
+
if metadata:
|
918
|
+
event_data['log_entry'] = log_entry
|
919
|
+
|
920
|
+
# Send to all subscribers and broadcast channel
|
564
921
|
socketio.emit(f'research_progress_{research_id}', event_data)
|
565
922
|
|
566
|
-
|
567
|
-
if research_id in socket_subscriptions and socket_subscriptions[research_id]:
|
923
|
+
if research_id in socket_subscriptions:
|
568
924
|
for sid in socket_subscriptions[research_id]:
|
569
925
|
try:
|
570
926
|
socketio.emit(
|
571
927
|
f'research_progress_{research_id}',
|
572
|
-
event_data,
|
928
|
+
event_data,
|
573
929
|
room=sid
|
574
930
|
)
|
575
|
-
except Exception as
|
576
|
-
print(f"Error emitting to subscriber {sid}: {str(
|
931
|
+
except Exception as err:
|
932
|
+
print(f"Error emitting to subscriber {sid}: {str(err)}")
|
933
|
+
except Exception as e:
|
934
|
+
print(f"Socket emit error (non-critical): {str(e)}")
|
577
935
|
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
936
|
+
# FUNCTION TO CHECK TERMINATION DURING LONG-RUNNING OPERATIONS
|
937
|
+
def check_termination():
|
938
|
+
if research_id in termination_flags and termination_flags[research_id]:
|
939
|
+
# Explicitly set the status to suspended in the database
|
940
|
+
conn = sqlite3.connect(DB_PATH)
|
941
|
+
cursor = conn.cursor()
|
942
|
+
now = datetime.utcnow()
|
943
|
+
completed_at = now.isoformat()
|
944
|
+
|
945
|
+
cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
|
946
|
+
result = cursor.fetchone()
|
947
|
+
duration_seconds = calculate_duration(result[0]) if result and result[0] else None
|
948
|
+
|
949
|
+
cursor.execute(
|
950
|
+
'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
|
951
|
+
('suspended', completed_at, duration_seconds, research_id)
|
952
|
+
)
|
953
|
+
conn.commit()
|
954
|
+
conn.close()
|
955
|
+
|
956
|
+
# Clean up resources
|
957
|
+
cleanup_research_resources(research_id)
|
958
|
+
|
959
|
+
# Raise exception to exit the process
|
960
|
+
raise Exception("Research was terminated by user during long-running operation")
|
961
|
+
return False # Not terminated
|
962
|
+
|
582
963
|
# Set the progress callback in the system
|
964
|
+
system = AdvancedSearchSystem()
|
583
965
|
system.set_progress_callback(progress_callback)
|
584
966
|
|
585
967
|
# Run the search
|
586
968
|
progress_callback("Starting research process", 5, {"phase": "init"})
|
587
|
-
|
588
|
-
|
969
|
+
|
970
|
+
try:
|
971
|
+
results = system.analyze_topic(query)
|
972
|
+
if mode == 'quick':
|
973
|
+
progress_callback("Search complete, preparing to generate summary...", 85, {"phase": "output_generation"})
|
974
|
+
else:
|
975
|
+
progress_callback("Search complete, generating output", 80, {"phase": "output_generation"})
|
976
|
+
except Exception as search_error:
|
977
|
+
# Better handling of specific search errors
|
978
|
+
error_message = str(search_error)
|
979
|
+
error_type = "unknown"
|
980
|
+
|
981
|
+
# Extract error details for common issues
|
982
|
+
if "status code: 503" in error_message:
|
983
|
+
error_message = "Ollama AI service is unavailable (HTTP 503). Please check that Ollama is running properly on your system."
|
984
|
+
error_type = "ollama_unavailable"
|
985
|
+
elif "status code: 404" in error_message:
|
986
|
+
error_message = "Ollama model not found (HTTP 404). Please check that you have pulled the required model."
|
987
|
+
error_type = "model_not_found"
|
988
|
+
elif "status code:" in error_message:
|
989
|
+
# Extract the status code for other HTTP errors
|
990
|
+
status_code = error_message.split("status code:")[1].strip()
|
991
|
+
error_message = f"API request failed with status code {status_code}. Please check your configuration."
|
992
|
+
error_type = "api_error"
|
993
|
+
elif "connection" in error_message.lower():
|
994
|
+
error_message = "Connection error. Please check that your LLM service (Ollama/API) is running and accessible."
|
995
|
+
error_type = "connection_error"
|
996
|
+
|
997
|
+
# Raise with improved error message
|
998
|
+
raise Exception(f"{error_message} (Error type: {error_type})")
|
589
999
|
|
590
1000
|
# Generate output based on mode
|
591
1001
|
if mode == 'quick':
|
@@ -593,56 +1003,92 @@ def run_research_process(research_id, query, mode):
|
|
593
1003
|
if results.get('findings'):
|
594
1004
|
#initial_analysis = [finding['content'] for finding in results['findings']]
|
595
1005
|
summary = ""
|
596
|
-
raw_formatted_findings = results['formatted_findings']
|
597
|
-
|
598
|
-
# ADDED CODE: Convert debug output to clean markdown
|
599
|
-
clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
|
600
1006
|
|
601
|
-
#
|
602
|
-
|
603
|
-
if not os.path.exists(output_dir):
|
604
|
-
os.makedirs(output_dir)
|
605
|
-
|
606
|
-
safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
|
607
|
-
safe_query = safe_query.replace(" ", "_").lower()
|
608
|
-
report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
|
1007
|
+
# Safer access to formatted_findings with logging
|
1008
|
+
print(f"Results keys: {list(results.keys())}")
|
609
1009
|
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
1010
|
+
# Check if formatted_findings exists in results
|
1011
|
+
if 'formatted_findings' not in results:
|
1012
|
+
print("WARNING: 'formatted_findings' not found in results, using fallback")
|
1013
|
+
# Create fallback formatted findings from available data
|
1014
|
+
raw_formatted_findings = "# Research Findings\n\n"
|
1015
|
+
for i, finding in enumerate(results.get('findings', [])):
|
1016
|
+
raw_formatted_findings += f"## Finding {i+1}\n\n{finding.get('content', '')}\n\n"
|
1017
|
+
else:
|
1018
|
+
raw_formatted_findings = results['formatted_findings']
|
1019
|
+
print(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
|
617
1020
|
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
1021
|
+
try:
|
1022
|
+
# ADDED CODE: Convert debug output to clean markdown
|
1023
|
+
clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
|
1024
|
+
print(f"Successfully converted to clean markdown of length: {len(clean_markdown)}")
|
1025
|
+
|
1026
|
+
# First send a progress update for generating the summary
|
1027
|
+
progress_callback("Generating clean summary from research data...", 90, {"phase": "output_generation"})
|
1028
|
+
|
1029
|
+
# Save as markdown file
|
1030
|
+
output_dir = "research_outputs"
|
1031
|
+
if not os.path.exists(output_dir):
|
1032
|
+
os.makedirs(output_dir)
|
1033
|
+
|
1034
|
+
safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
|
1035
|
+
safe_query = safe_query.replace(" ", "_").lower()
|
1036
|
+
report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
|
1037
|
+
|
1038
|
+
# Send progress update for writing to file
|
1039
|
+
progress_callback("Writing research report to file...", 95, {"phase": "report_complete"})
|
1040
|
+
|
1041
|
+
print(f"Writing report to: {report_path}")
|
1042
|
+
with open(report_path, "w", encoding="utf-8") as f:
|
1043
|
+
f.write("# Quick Research Summary\n\n")
|
1044
|
+
f.write(f"Query: {query}\n\n")
|
1045
|
+
f.write(clean_markdown) # Use clean markdown instead of raw findings
|
1046
|
+
f.write("\n\n## Research Metrics\n")
|
1047
|
+
f.write(f"- Search Iterations: {results['iterations']}\n")
|
1048
|
+
f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
|
1049
|
+
|
1050
|
+
# Update database
|
1051
|
+
metadata = {
|
1052
|
+
'iterations': results['iterations'],
|
1053
|
+
'generated_at': datetime.utcnow().isoformat()
|
1054
|
+
}
|
1055
|
+
|
1056
|
+
# Calculate duration in seconds - using UTC consistently
|
1057
|
+
now = datetime.utcnow()
|
1058
|
+
completed_at = now.isoformat()
|
1059
|
+
|
1060
|
+
print(f"Updating database for research_id: {research_id}")
|
1061
|
+
# Get the start time from the database
|
1062
|
+
conn = sqlite3.connect(DB_PATH)
|
1063
|
+
cursor = conn.cursor()
|
1064
|
+
cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
|
1065
|
+
result = cursor.fetchone()
|
1066
|
+
|
1067
|
+
# Use the helper function for consistent duration calculation
|
1068
|
+
duration_seconds = calculate_duration(result[0])
|
1069
|
+
|
1070
|
+
# Update the record
|
1071
|
+
cursor.execute(
|
1072
|
+
'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
|
1073
|
+
('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
|
1074
|
+
)
|
1075
|
+
conn.commit()
|
1076
|
+
conn.close()
|
1077
|
+
print(f"Database updated successfully for research_id: {research_id}")
|
1078
|
+
|
1079
|
+
# Send the final completion message
|
1080
|
+
progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
|
1081
|
+
|
1082
|
+
# Clean up resources
|
1083
|
+
print(f"Cleaning up resources for research_id: {research_id}")
|
1084
|
+
cleanup_research_resources(research_id)
|
1085
|
+
print(f"Resources cleaned up for research_id: {research_id}")
|
1086
|
+
except Exception as inner_e:
|
1087
|
+
print(f"Error during quick summary generation: {str(inner_e)}")
|
1088
|
+
print(traceback.format_exc())
|
1089
|
+
raise Exception(f"Error generating quick summary: {str(inner_e)}")
|
1090
|
+
else:
|
1091
|
+
raise Exception("No research findings were generated. Please try again.")
|
646
1092
|
else:
|
647
1093
|
# Full Report
|
648
1094
|
progress_callback("Generating detailed report...", 85, {"phase": "report_generation"})
|
@@ -688,23 +1134,47 @@ def run_research_process(research_id, query, mode):
|
|
688
1134
|
|
689
1135
|
progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
|
690
1136
|
|
691
|
-
|
692
|
-
|
693
|
-
del active_research[research_id]
|
1137
|
+
# Clean up - moved to a separate function for reuse
|
1138
|
+
cleanup_research_resources(research_id)
|
694
1139
|
|
695
1140
|
except Exception as e:
|
696
1141
|
# Handle error
|
697
1142
|
error_message = f"Research failed: {str(e)}"
|
698
1143
|
print(f"Research error: {error_message}")
|
699
1144
|
try:
|
700
|
-
|
1145
|
+
# Check for common Ollama error patterns in the exception and provide more user-friendly errors
|
1146
|
+
user_friendly_error = str(e)
|
1147
|
+
error_context = {}
|
1148
|
+
|
1149
|
+
if "Error type: ollama_unavailable" in user_friendly_error:
|
1150
|
+
user_friendly_error = "Ollama AI service is unavailable. Please check that Ollama is running properly on your system."
|
1151
|
+
error_context = {"solution": "Start Ollama with 'ollama serve' or check if it's installed correctly."}
|
1152
|
+
elif "Error type: model_not_found" in user_friendly_error:
|
1153
|
+
user_friendly_error = "Required Ollama model not found. Please pull the model first."
|
1154
|
+
error_context = {"solution": "Run 'ollama pull mistral' to download the required model."}
|
1155
|
+
elif "Error type: connection_error" in user_friendly_error:
|
1156
|
+
user_friendly_error = "Connection error with LLM service. Please check that your AI service is running."
|
1157
|
+
error_context = {"solution": "Ensure Ollama or your API service is running and accessible."}
|
1158
|
+
elif "Error type: api_error" in user_friendly_error:
|
1159
|
+
# Keep the original error message as it's already improved
|
1160
|
+
error_context = {"solution": "Check API configuration and credentials."}
|
1161
|
+
|
1162
|
+
# Update metadata with more context about the error
|
1163
|
+
metadata = {
|
1164
|
+
"phase": "error",
|
1165
|
+
"error": user_friendly_error
|
1166
|
+
}
|
1167
|
+
if error_context:
|
1168
|
+
metadata.update(error_context)
|
1169
|
+
|
1170
|
+
progress_callback(user_friendly_error, None, metadata)
|
701
1171
|
|
702
1172
|
conn = sqlite3.connect(DB_PATH)
|
703
1173
|
cursor = conn.cursor()
|
704
1174
|
|
705
1175
|
# If termination was requested, mark as suspended instead of failed
|
706
1176
|
status = 'suspended' if (research_id in termination_flags and termination_flags[research_id]) else 'failed'
|
707
|
-
message = "Research was terminated by user" if status == 'suspended' else
|
1177
|
+
message = "Research was terminated by user" if status == 'suspended' else user_friendly_error
|
708
1178
|
|
709
1179
|
# Calculate duration up to termination point - using UTC consistently
|
710
1180
|
now = datetime.utcnow()
|
@@ -721,7 +1191,7 @@ def run_research_process(research_id, query, mode):
|
|
721
1191
|
|
722
1192
|
cursor.execute(
|
723
1193
|
'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, metadata = ? WHERE id = ?',
|
724
|
-
(status, completed_at, duration_seconds, json.dumps(
|
1194
|
+
(status, completed_at, duration_seconds, json.dumps(metadata), research_id)
|
725
1195
|
)
|
726
1196
|
conn.commit()
|
727
1197
|
conn.close()
|
@@ -749,11 +1219,8 @@ def run_research_process(research_id, query, mode):
|
|
749
1219
|
except Exception as inner_e:
|
750
1220
|
print(f"Error in error handler: {str(inner_e)}")
|
751
1221
|
|
752
|
-
# Clean up resources
|
753
|
-
|
754
|
-
del active_research[research_id]
|
755
|
-
if research_id in termination_flags:
|
756
|
-
del termination_flags[research_id]
|
1222
|
+
# Clean up resources - moved to a separate function for reuse
|
1223
|
+
cleanup_research_resources(research_id)
|
757
1224
|
|
758
1225
|
@research_bp.route('/api/research/<int:research_id>/terminate', methods=['POST'])
|
759
1226
|
def terminate_research(research_id):
|
@@ -789,16 +1256,30 @@ def terminate_research(research_id):
|
|
789
1256
|
|
790
1257
|
# Log the termination request - using UTC timestamp
|
791
1258
|
timestamp = datetime.utcnow().isoformat()
|
1259
|
+
termination_message = "Research termination requested by user"
|
1260
|
+
current_progress = active_research[research_id]['progress']
|
1261
|
+
|
1262
|
+
# Create log entry
|
792
1263
|
log_entry = {
|
793
1264
|
"time": timestamp,
|
794
|
-
"message":
|
795
|
-
"progress":
|
1265
|
+
"message": termination_message,
|
1266
|
+
"progress": current_progress,
|
796
1267
|
"metadata": {"phase": "termination"}
|
797
1268
|
}
|
798
1269
|
|
1270
|
+
# Add to in-memory log
|
799
1271
|
active_research[research_id]['log'].append(log_entry)
|
800
1272
|
|
801
|
-
#
|
1273
|
+
# Add to database log
|
1274
|
+
add_log_to_db(
|
1275
|
+
research_id,
|
1276
|
+
termination_message,
|
1277
|
+
log_type='milestone',
|
1278
|
+
progress=current_progress,
|
1279
|
+
metadata={"phase": "termination"}
|
1280
|
+
)
|
1281
|
+
|
1282
|
+
# Update the log in the database (old way for backward compatibility)
|
802
1283
|
cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
|
803
1284
|
log_result = cursor.fetchone()
|
804
1285
|
if log_result:
|
@@ -812,14 +1293,16 @@ def terminate_research(research_id):
|
|
812
1293
|
(json.dumps(current_log), research_id)
|
813
1294
|
)
|
814
1295
|
|
1296
|
+
# IMMEDIATELY update the status to 'suspended' to avoid race conditions
|
1297
|
+
cursor.execute('UPDATE research_history SET status = ? WHERE id = ?', ('suspended', research_id))
|
815
1298
|
conn.commit()
|
816
1299
|
conn.close()
|
817
1300
|
|
818
1301
|
# Emit a socket event for the termination request
|
819
1302
|
try:
|
820
1303
|
event_data = {
|
821
|
-
'status': 'terminating'
|
822
|
-
'message': 'Research
|
1304
|
+
'status': 'suspended', # Changed from 'terminating' to 'suspended'
|
1305
|
+
'message': 'Research was suspended by user request'
|
823
1306
|
}
|
824
1307
|
|
825
1308
|
socketio.emit(f'research_progress_{research_id}', event_data)
|
@@ -877,8 +1360,6 @@ def delete_research(research_id):
|
|
877
1360
|
conn.close()
|
878
1361
|
|
879
1362
|
return jsonify({'status': 'success'})
|
880
|
-
|
881
|
-
# Main settings page that links to specialized config pages
|
882
1363
|
@research_bp.route('/settings', methods=['GET'])
|
883
1364
|
def settings_page():
|
884
1365
|
"""Main settings dashboard with links to specialized config pages"""
|
@@ -1120,6 +1601,47 @@ def open_file_location():
|
|
1120
1601
|
return redirect(url_for('research.collections_config_page'))
|
1121
1602
|
else:
|
1122
1603
|
return redirect(url_for('research.main_config_page'))
|
1604
|
+
|
1605
|
+
@research_bp.route('/api/research/<int:research_id>/logs')
|
1606
|
+
def get_research_logs(research_id):
|
1607
|
+
"""Get logs for a specific research ID"""
|
1608
|
+
# First check if the research exists
|
1609
|
+
conn = sqlite3.connect(DB_PATH)
|
1610
|
+
conn.row_factory = sqlite3.Row
|
1611
|
+
cursor = conn.cursor()
|
1612
|
+
cursor.execute('SELECT id FROM research_history WHERE id = ?', (research_id,))
|
1613
|
+
result = cursor.fetchone()
|
1614
|
+
conn.close()
|
1615
|
+
|
1616
|
+
if not result:
|
1617
|
+
return jsonify({'status': 'error', 'message': 'Research not found'}), 404
|
1618
|
+
|
1619
|
+
# Retrieve logs from the database
|
1620
|
+
logs = get_logs_for_research(research_id)
|
1621
|
+
|
1622
|
+
# Add any current logs from memory if this is an active research
|
1623
|
+
if research_id in active_research and active_research[research_id].get('log'):
|
1624
|
+
# Use the logs from memory temporarily until they're saved to the database
|
1625
|
+
memory_logs = active_research[research_id]['log']
|
1626
|
+
|
1627
|
+
# Filter out logs that are already in the database
|
1628
|
+
# We'll compare timestamps to avoid duplicates
|
1629
|
+
db_timestamps = {log['time'] for log in logs}
|
1630
|
+
unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
|
1631
|
+
|
1632
|
+
# Add unique memory logs to our return list
|
1633
|
+
logs.extend(unique_memory_logs)
|
1634
|
+
|
1635
|
+
# Sort logs by timestamp
|
1636
|
+
logs.sort(key=lambda x: x['time'])
|
1637
|
+
|
1638
|
+
return jsonify({
|
1639
|
+
'status': 'success',
|
1640
|
+
'logs': logs
|
1641
|
+
})
|
1642
|
+
|
1643
|
+
|
1644
|
+
|
1123
1645
|
# Register the blueprint
|
1124
1646
|
app.register_blueprint(research_bp)
|
1125
1647
|
|
@@ -1133,7 +1655,6 @@ def app_serve_static(path):
|
|
1133
1655
|
def favicon():
|
1134
1656
|
return send_from_directory(app.static_folder, 'favicon.ico', mimetype='image/x-icon')
|
1135
1657
|
|
1136
|
-
|
1137
1658
|
# Add this function to app.py
|
1138
1659
|
def convert_debug_to_markdown(raw_text, query):
|
1139
1660
|
"""
|
@@ -1146,43 +1667,75 @@ def convert_debug_to_markdown(raw_text, query):
|
|
1146
1667
|
Returns:
|
1147
1668
|
Clean markdown formatted text
|
1148
1669
|
"""
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1152
|
-
content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
|
1153
|
-
else:
|
1154
|
-
content = raw_text
|
1155
|
-
|
1156
|
-
# Remove divider lines with === symbols
|
1157
|
-
content = "\n".join([line for line in content.split("\n")
|
1158
|
-
if not line.strip().startswith("===") and not line.strip() == "="*80])
|
1159
|
-
|
1160
|
-
# If COMPLETE RESEARCH OUTPUT exists, remove that section
|
1161
|
-
if "COMPLETE RESEARCH OUTPUT" in content:
|
1162
|
-
content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
|
1163
|
-
|
1164
|
-
# Remove SEARCH QUESTIONS BY ITERATION section
|
1165
|
-
if "SEARCH QUESTIONS BY ITERATION:" in content:
|
1166
|
-
search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
|
1167
|
-
next_major_section = -1
|
1168
|
-
for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
|
1169
|
-
if marker in content[search_index:]:
|
1170
|
-
marker_pos = content.index(marker, search_index)
|
1171
|
-
if next_major_section == -1 or marker_pos < next_major_section:
|
1172
|
-
next_major_section = marker_pos
|
1670
|
+
try:
|
1671
|
+
print(f"Starting markdown conversion for query: {query}")
|
1672
|
+
print(f"Raw text type: {type(raw_text)}")
|
1173
1673
|
|
1174
|
-
|
1175
|
-
|
1674
|
+
# Handle None or empty input
|
1675
|
+
if not raw_text:
|
1676
|
+
print("WARNING: raw_text is empty or None")
|
1677
|
+
return f"No detailed findings available for '{query}'."
|
1678
|
+
|
1679
|
+
# If there's a "DETAILED FINDINGS:" section, extract everything after it
|
1680
|
+
if "DETAILED FINDINGS:" in raw_text:
|
1681
|
+
print("Found DETAILED FINDINGS section")
|
1682
|
+
detailed_index = raw_text.index("DETAILED FINDINGS:")
|
1683
|
+
content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
|
1176
1684
|
else:
|
1177
|
-
|
1178
|
-
content =
|
1179
|
-
|
1180
|
-
|
1685
|
+
print("No DETAILED FINDINGS section found, using full text")
|
1686
|
+
content = raw_text
|
1687
|
+
|
1688
|
+
# Remove divider lines with === symbols
|
1689
|
+
lines_before = len(content.split("\n"))
|
1690
|
+
content = "\n".join([line for line in content.split("\n")
|
1691
|
+
if not line.strip().startswith("===") and not line.strip() == "="*80])
|
1692
|
+
lines_after = len(content.split("\n"))
|
1693
|
+
print(f"Removed {lines_before - lines_after} divider lines")
|
1694
|
+
|
1695
|
+
# If COMPLETE RESEARCH OUTPUT exists, remove that section
|
1696
|
+
if "COMPLETE RESEARCH OUTPUT" in content:
|
1697
|
+
print("Found and removing COMPLETE RESEARCH OUTPUT section")
|
1698
|
+
content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
|
1699
|
+
|
1700
|
+
# Remove SEARCH QUESTIONS BY ITERATION section
|
1701
|
+
if "SEARCH QUESTIONS BY ITERATION:" in content:
|
1702
|
+
print("Found SEARCH QUESTIONS BY ITERATION section")
|
1703
|
+
search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
|
1704
|
+
next_major_section = -1
|
1705
|
+
for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
|
1706
|
+
if marker in content[search_index:]:
|
1707
|
+
marker_pos = content.index(marker, search_index)
|
1708
|
+
if next_major_section == -1 or marker_pos < next_major_section:
|
1709
|
+
next_major_section = marker_pos
|
1710
|
+
|
1711
|
+
if next_major_section != -1:
|
1712
|
+
print(f"Removing section from index {search_index} to {next_major_section}")
|
1713
|
+
content = content[:search_index] + content[next_major_section:]
|
1714
|
+
else:
|
1715
|
+
# If no later section, just remove everything from SEARCH QUESTIONS onwards
|
1716
|
+
print(f"Removing everything after index {search_index}")
|
1717
|
+
content = content[:search_index].strip()
|
1718
|
+
|
1719
|
+
print(f"Final markdown length: {len(content.strip())}")
|
1720
|
+
return content.strip()
|
1721
|
+
except Exception as e:
|
1722
|
+
print(f"Error in convert_debug_to_markdown: {str(e)}")
|
1723
|
+
print(traceback.format_exc())
|
1724
|
+
# Return a basic message with the original query as fallback
|
1725
|
+
return f"# Research on {query}\n\nThere was an error formatting the research results."
|
1726
|
+
|
1181
1727
|
def main():
|
1182
1728
|
"""
|
1183
1729
|
Entry point for the web application when run as a command.
|
1184
1730
|
This function is needed for the package's entry point to work properly.
|
1185
1731
|
"""
|
1732
|
+
# Import settings here to avoid circular imports
|
1733
|
+
from local_deep_research.config import settings
|
1734
|
+
|
1735
|
+
# Get web server settings with defaults
|
1736
|
+
port = settings.web.port
|
1737
|
+
host = settings.web.host
|
1738
|
+
debug = settings.web.debug
|
1186
1739
|
|
1187
1740
|
# Check for OpenAI availability but don't import it unless necessary
|
1188
1741
|
try:
|
@@ -1202,8 +1755,7 @@ def main():
|
|
1202
1755
|
except Exception as e:
|
1203
1756
|
print(f"Error checking OpenAI availability: {e}")
|
1204
1757
|
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1758
|
+
socketio.run(app, debug=debug, host=host, port=port, allow_unsafe_werkzeug=True)
|
1759
|
+
|
1208
1760
|
if __name__ == '__main__':
|
1209
1761
|
main()
|