local-deep-research 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/defaults/main.toml +5 -0
- local_deep_research/search_system.py +98 -38
- local_deep_research/web/app.py +360 -117
- local_deep_research/web/static/css/styles.css +28 -2
- local_deep_research/web/static/js/app.js +640 -197
- local_deep_research/web/templates/index.html +3 -1
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +454 -0
- local_deep_research/web_search_engines/search_engine_factory.py +20 -1
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/METADATA +16 -4
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/RECORD +14 -13
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/LICENSE +0 -0
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/WHEEL +0 -0
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.1.0.dist-info → local_deep_research-0.1.1.dist-info}/top_level.txt +0 -0
local_deep_research/web/app.py
CHANGED
@@ -320,12 +320,39 @@ def start_research():
|
|
320
320
|
if not query:
|
321
321
|
return jsonify({'status': 'error', 'message': 'Query is required'}), 400
|
322
322
|
|
323
|
-
# Check if there's any active research
|
323
|
+
# Check if there's any active research that's actually still running
|
324
324
|
if active_research:
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
325
|
+
# Verify each active research is still valid
|
326
|
+
stale_research_ids = []
|
327
|
+
for research_id, research_data in list(active_research.items()):
|
328
|
+
# Check database status
|
329
|
+
conn = sqlite3.connect(DB_PATH)
|
330
|
+
cursor = conn.cursor()
|
331
|
+
cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
|
332
|
+
result = cursor.fetchone()
|
333
|
+
conn.close()
|
334
|
+
|
335
|
+
# If the research doesn't exist in DB or is not in_progress, it's stale
|
336
|
+
if not result or result[0] != 'in_progress':
|
337
|
+
stale_research_ids.append(research_id)
|
338
|
+
# Also check if thread is still alive
|
339
|
+
elif not research_data.get('thread') or not research_data.get('thread').is_alive():
|
340
|
+
stale_research_ids.append(research_id)
|
341
|
+
|
342
|
+
# Clean up any stale research processes
|
343
|
+
for stale_id in stale_research_ids:
|
344
|
+
print(f"Cleaning up stale research process: {stale_id}")
|
345
|
+
if stale_id in active_research:
|
346
|
+
del active_research[stale_id]
|
347
|
+
if stale_id in termination_flags:
|
348
|
+
del termination_flags[stale_id]
|
349
|
+
|
350
|
+
# After cleanup, check if there's still active research
|
351
|
+
if active_research:
|
352
|
+
return jsonify({
|
353
|
+
'status': 'error',
|
354
|
+
'message': 'Another research is already in progress. Please wait for it to complete.'
|
355
|
+
}), 409
|
329
356
|
|
330
357
|
# Create a record in the database with explicit UTC timestamp
|
331
358
|
created_at = datetime.utcnow().isoformat()
|
@@ -467,6 +494,7 @@ def handle_disconnect():
|
|
467
494
|
subscribers.remove(request.sid)
|
468
495
|
if not subscribers:
|
469
496
|
socket_subscriptions.pop(research_id, None)
|
497
|
+
print(f"Removed empty subscription for research {research_id}")
|
470
498
|
except Exception as e:
|
471
499
|
print(f"Error handling disconnect: {e}")
|
472
500
|
|
@@ -474,23 +502,54 @@ def handle_disconnect():
|
|
474
502
|
def handle_subscribe(data):
|
475
503
|
research_id = data.get('research_id')
|
476
504
|
if research_id:
|
477
|
-
if
|
478
|
-
|
479
|
-
|
480
|
-
|
505
|
+
# First check if this research is still active
|
506
|
+
conn = sqlite3.connect(DB_PATH)
|
507
|
+
cursor = conn.cursor()
|
508
|
+
cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
|
509
|
+
result = cursor.fetchone()
|
510
|
+
conn.close()
|
481
511
|
|
482
|
-
#
|
483
|
-
if
|
484
|
-
|
485
|
-
|
512
|
+
# Only allow subscription to valid research
|
513
|
+
if result:
|
514
|
+
status = result[0]
|
515
|
+
|
516
|
+
# Initialize subscription set if needed
|
517
|
+
if research_id not in socket_subscriptions:
|
518
|
+
socket_subscriptions[research_id] = set()
|
486
519
|
|
487
|
-
|
520
|
+
# Add this client to the subscribers
|
521
|
+
socket_subscriptions[research_id].add(request.sid)
|
522
|
+
print(f"Client {request.sid} subscribed to research {research_id}")
|
523
|
+
|
524
|
+
# Send current status immediately if available
|
525
|
+
if research_id in active_research:
|
526
|
+
progress = active_research[research_id]['progress']
|
527
|
+
latest_log = active_research[research_id]['log'][-1] if active_research[research_id]['log'] else None
|
528
|
+
|
529
|
+
if latest_log:
|
530
|
+
emit(f'research_progress_{research_id}', {
|
531
|
+
'progress': progress,
|
532
|
+
'message': latest_log.get('message', 'Processing...'),
|
533
|
+
'status': 'in_progress',
|
534
|
+
'log_entry': latest_log
|
535
|
+
})
|
536
|
+
elif status in ['completed', 'failed', 'suspended']:
|
537
|
+
# Send final status for completed research
|
488
538
|
emit(f'research_progress_{research_id}', {
|
489
|
-
'progress':
|
490
|
-
'message':
|
491
|
-
|
492
|
-
'
|
539
|
+
'progress': 100 if status == 'completed' else 0,
|
540
|
+
'message': 'Research completed successfully' if status == 'completed' else
|
541
|
+
'Research failed' if status == 'failed' else 'Research was suspended',
|
542
|
+
'status': status,
|
543
|
+
'log_entry': {
|
544
|
+
'time': datetime.utcnow().isoformat(),
|
545
|
+
'message': f'Research is {status}',
|
546
|
+
'progress': 100 if status == 'completed' else 0,
|
547
|
+
'metadata': {'phase': 'complete' if status == 'completed' else 'error'}
|
548
|
+
}
|
493
549
|
})
|
550
|
+
else:
|
551
|
+
# Research not found
|
552
|
+
emit('error', {'message': f'Research ID {research_id} not found'})
|
494
553
|
|
495
554
|
@socketio.on_error
|
496
555
|
def handle_socket_error(e):
|
@@ -504,6 +563,47 @@ def handle_default_error(e):
|
|
504
563
|
# Don't propagate exceptions to avoid crashing the server
|
505
564
|
return False
|
506
565
|
|
566
|
+
# Function to clean up resources for a completed research
|
567
|
+
def cleanup_research_resources(research_id):
|
568
|
+
"""Clean up resources for a completed research"""
|
569
|
+
print(f"Cleaning up resources for research {research_id}")
|
570
|
+
|
571
|
+
# Remove from active research
|
572
|
+
if research_id in active_research:
|
573
|
+
del active_research[research_id]
|
574
|
+
|
575
|
+
# Remove from termination flags
|
576
|
+
if research_id in termination_flags:
|
577
|
+
del termination_flags[research_id]
|
578
|
+
|
579
|
+
# Send a final message to any remaining subscribers with explicit completed status
|
580
|
+
if research_id in socket_subscriptions and socket_subscriptions[research_id]:
|
581
|
+
final_message = {
|
582
|
+
'status': 'completed',
|
583
|
+
'message': 'Research process has ended and resources have been cleaned up',
|
584
|
+
'progress': 100,
|
585
|
+
}
|
586
|
+
|
587
|
+
try:
|
588
|
+
print(f"Sending final completion socket message for research {research_id}")
|
589
|
+
# Use emit to all, not just subscribers
|
590
|
+
socketio.emit(f'research_progress_{research_id}', final_message)
|
591
|
+
|
592
|
+
# Also emit to specific subscribers
|
593
|
+
for sid in socket_subscriptions[research_id]:
|
594
|
+
try:
|
595
|
+
socketio.emit(
|
596
|
+
f'research_progress_{research_id}',
|
597
|
+
final_message,
|
598
|
+
room=sid
|
599
|
+
)
|
600
|
+
except Exception as sub_err:
|
601
|
+
print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
|
602
|
+
except Exception as e:
|
603
|
+
print(f"Error sending final cleanup message: {e}")
|
604
|
+
|
605
|
+
# Don't immediately remove subscriptions - let clients disconnect naturally
|
606
|
+
|
507
607
|
def run_research_process(research_id, query, mode):
|
508
608
|
try:
|
509
609
|
system = AdvancedSearchSystem()
|
@@ -511,10 +611,36 @@ def run_research_process(research_id, query, mode):
|
|
511
611
|
# Set up progress callback
|
512
612
|
def progress_callback(message, progress_percent, metadata):
|
513
613
|
timestamp = datetime.utcnow().isoformat()
|
614
|
+
|
615
|
+
# Adjust progress based on research mode
|
616
|
+
adjusted_progress = progress_percent
|
617
|
+
if mode == 'detailed' and metadata.get('phase') == 'output_generation':
|
618
|
+
# For detailed mode, we need to adjust the progress range
|
619
|
+
# because detailed reports take longer after the search phase
|
620
|
+
adjusted_progress = min(80, progress_percent)
|
621
|
+
elif mode == 'detailed' and metadata.get('phase') == 'report_generation':
|
622
|
+
# Scale the progress from 80% to 95% for the report generation phase
|
623
|
+
# Map progress_percent values (0-100%) to the (80-95%) range
|
624
|
+
if progress_percent is not None:
|
625
|
+
normalized = progress_percent / 100
|
626
|
+
adjusted_progress = 80 + (normalized * 15)
|
627
|
+
elif mode == 'quick' and metadata.get('phase') == 'output_generation':
|
628
|
+
# For quick mode, ensure we're at least at 85% during output generation
|
629
|
+
adjusted_progress = max(85, progress_percent)
|
630
|
+
# Map any further progress within output_generation to 85-95% range
|
631
|
+
if progress_percent is not None and progress_percent > 0:
|
632
|
+
normalized = progress_percent / 100
|
633
|
+
adjusted_progress = 85 + (normalized * 10)
|
634
|
+
|
635
|
+
# Don't let progress go backwards
|
636
|
+
if research_id in active_research and adjusted_progress is not None:
|
637
|
+
current_progress = active_research[research_id].get('progress', 0)
|
638
|
+
adjusted_progress = max(current_progress, adjusted_progress)
|
639
|
+
|
514
640
|
log_entry = {
|
515
641
|
"time": timestamp,
|
516
642
|
"message": message,
|
517
|
-
"progress":
|
643
|
+
"progress": adjusted_progress,
|
518
644
|
"metadata": metadata
|
519
645
|
}
|
520
646
|
|
@@ -526,11 +652,11 @@ def run_research_process(research_id, query, mode):
|
|
526
652
|
# Update active research record
|
527
653
|
if research_id in active_research:
|
528
654
|
active_research[research_id]['log'].append(log_entry)
|
529
|
-
if
|
530
|
-
active_research[research_id]['progress'] =
|
655
|
+
if adjusted_progress is not None:
|
656
|
+
active_research[research_id]['progress'] = adjusted_progress
|
531
657
|
|
532
658
|
# Save to database (but not too frequently)
|
533
|
-
if
|
659
|
+
if adjusted_progress is None or adjusted_progress % 10 == 0 or metadata.get('phase') in ['complete', 'iteration_complete', 'output_generation', 'report_generation', 'report_complete']:
|
534
660
|
conn = sqlite3.connect(DB_PATH)
|
535
661
|
cursor = conn.cursor()
|
536
662
|
cursor.execute(
|
@@ -554,7 +680,7 @@ def run_research_process(research_id, query, mode):
|
|
554
680
|
# Emit socket event with try/except block to handle connection issues
|
555
681
|
try:
|
556
682
|
event_data = {
|
557
|
-
'progress':
|
683
|
+
'progress': adjusted_progress,
|
558
684
|
'message': message,
|
559
685
|
'status': 'in_progress',
|
560
686
|
'log_entry': log_entry
|
@@ -578,14 +704,44 @@ def run_research_process(research_id, query, mode):
|
|
578
704
|
except Exception as socket_error:
|
579
705
|
# Log socket error but continue with the research process
|
580
706
|
print(f"Socket emit error (non-critical): {str(socket_error)}")
|
707
|
+
|
708
|
+
return not (research_id in termination_flags and termination_flags[research_id])
|
581
709
|
|
582
710
|
# Set the progress callback in the system
|
583
711
|
system.set_progress_callback(progress_callback)
|
584
712
|
|
585
713
|
# Run the search
|
586
714
|
progress_callback("Starting research process", 5, {"phase": "init"})
|
587
|
-
|
588
|
-
|
715
|
+
|
716
|
+
try:
|
717
|
+
results = system.analyze_topic(query)
|
718
|
+
if mode == 'quick':
|
719
|
+
progress_callback("Search complete, preparing to generate summary...", 85, {"phase": "output_generation"})
|
720
|
+
else:
|
721
|
+
progress_callback("Search complete, generating output", 80, {"phase": "output_generation"})
|
722
|
+
except Exception as search_error:
|
723
|
+
# Better handling of specific search errors
|
724
|
+
error_message = str(search_error)
|
725
|
+
error_type = "unknown"
|
726
|
+
|
727
|
+
# Extract error details for common issues
|
728
|
+
if "status code: 503" in error_message:
|
729
|
+
error_message = "Ollama AI service is unavailable (HTTP 503). Please check that Ollama is running properly on your system."
|
730
|
+
error_type = "ollama_unavailable"
|
731
|
+
elif "status code: 404" in error_message:
|
732
|
+
error_message = "Ollama model not found (HTTP 404). Please check that you have pulled the required model."
|
733
|
+
error_type = "model_not_found"
|
734
|
+
elif "status code:" in error_message:
|
735
|
+
# Extract the status code for other HTTP errors
|
736
|
+
status_code = error_message.split("status code:")[1].strip()
|
737
|
+
error_message = f"API request failed with status code {status_code}. Please check your configuration."
|
738
|
+
error_type = "api_error"
|
739
|
+
elif "connection" in error_message.lower():
|
740
|
+
error_message = "Connection error. Please check that your LLM service (Ollama/API) is running and accessible."
|
741
|
+
error_type = "connection_error"
|
742
|
+
|
743
|
+
# Raise with improved error message
|
744
|
+
raise Exception(f"{error_message} (Error type: {error_type})")
|
589
745
|
|
590
746
|
# Generate output based on mode
|
591
747
|
if mode == 'quick':
|
@@ -593,56 +749,92 @@ def run_research_process(research_id, query, mode):
|
|
593
749
|
if results.get('findings'):
|
594
750
|
#initial_analysis = [finding['content'] for finding in results['findings']]
|
595
751
|
summary = ""
|
596
|
-
raw_formatted_findings = results['formatted_findings']
|
597
|
-
|
598
|
-
# ADDED CODE: Convert debug output to clean markdown
|
599
|
-
clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
|
600
|
-
|
601
|
-
# Save as markdown file
|
602
|
-
output_dir = "research_outputs"
|
603
|
-
if not os.path.exists(output_dir):
|
604
|
-
os.makedirs(output_dir)
|
605
|
-
|
606
|
-
safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
|
607
|
-
safe_query = safe_query.replace(" ", "_").lower()
|
608
|
-
report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
|
609
|
-
|
610
|
-
with open(report_path, "w", encoding="utf-8") as f:
|
611
|
-
f.write("# Quick Research Summary\n\n")
|
612
|
-
f.write(f"Query: {query}\n\n")
|
613
|
-
f.write(clean_markdown) # Use clean markdown instead of raw findings
|
614
|
-
f.write("\n\n## Research Metrics\n")
|
615
|
-
f.write(f"- Search Iterations: {results['iterations']}\n")
|
616
|
-
f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
|
617
752
|
|
618
|
-
#
|
619
|
-
|
620
|
-
'iterations': results['iterations'],
|
621
|
-
'generated_at': datetime.utcnow().isoformat()
|
622
|
-
}
|
753
|
+
# Safer access to formatted_findings with logging
|
754
|
+
print(f"Results keys: {list(results.keys())}")
|
623
755
|
|
624
|
-
#
|
625
|
-
|
626
|
-
|
756
|
+
# Check if formatted_findings exists in results
|
757
|
+
if 'formatted_findings' not in results:
|
758
|
+
print("WARNING: 'formatted_findings' not found in results, using fallback")
|
759
|
+
# Create fallback formatted findings from available data
|
760
|
+
raw_formatted_findings = "# Research Findings\n\n"
|
761
|
+
for i, finding in enumerate(results.get('findings', [])):
|
762
|
+
raw_formatted_findings += f"## Finding {i+1}\n\n{finding.get('content', '')}\n\n"
|
763
|
+
else:
|
764
|
+
raw_formatted_findings = results['formatted_findings']
|
765
|
+
print(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
|
627
766
|
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
767
|
+
try:
|
768
|
+
# ADDED CODE: Convert debug output to clean markdown
|
769
|
+
clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
|
770
|
+
print(f"Successfully converted to clean markdown of length: {len(clean_markdown)}")
|
771
|
+
|
772
|
+
# First send a progress update for generating the summary
|
773
|
+
progress_callback("Generating clean summary from research data...", 90, {"phase": "output_generation"})
|
774
|
+
|
775
|
+
# Save as markdown file
|
776
|
+
output_dir = "research_outputs"
|
777
|
+
if not os.path.exists(output_dir):
|
778
|
+
os.makedirs(output_dir)
|
779
|
+
|
780
|
+
safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
|
781
|
+
safe_query = safe_query.replace(" ", "_").lower()
|
782
|
+
report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
|
783
|
+
|
784
|
+
# Send progress update for writing to file
|
785
|
+
progress_callback("Writing research report to file...", 95, {"phase": "report_complete"})
|
786
|
+
|
787
|
+
print(f"Writing report to: {report_path}")
|
788
|
+
with open(report_path, "w", encoding="utf-8") as f:
|
789
|
+
f.write("# Quick Research Summary\n\n")
|
790
|
+
f.write(f"Query: {query}\n\n")
|
791
|
+
f.write(clean_markdown) # Use clean markdown instead of raw findings
|
792
|
+
f.write("\n\n## Research Metrics\n")
|
793
|
+
f.write(f"- Search Iterations: {results['iterations']}\n")
|
794
|
+
f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
|
795
|
+
|
796
|
+
# Update database
|
797
|
+
metadata = {
|
798
|
+
'iterations': results['iterations'],
|
799
|
+
'generated_at': datetime.utcnow().isoformat()
|
800
|
+
}
|
801
|
+
|
802
|
+
# Calculate duration in seconds - using UTC consistently
|
803
|
+
now = datetime.utcnow()
|
804
|
+
completed_at = now.isoformat()
|
805
|
+
|
806
|
+
print(f"Updating database for research_id: {research_id}")
|
807
|
+
# Get the start time from the database
|
808
|
+
conn = sqlite3.connect(DB_PATH)
|
809
|
+
cursor = conn.cursor()
|
810
|
+
cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
|
811
|
+
result = cursor.fetchone()
|
812
|
+
|
813
|
+
# Use the helper function for consistent duration calculation
|
814
|
+
duration_seconds = calculate_duration(result[0])
|
815
|
+
|
816
|
+
# Update the record
|
817
|
+
cursor.execute(
|
818
|
+
'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
|
819
|
+
('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
|
820
|
+
)
|
821
|
+
conn.commit()
|
822
|
+
conn.close()
|
823
|
+
print(f"Database updated successfully for research_id: {research_id}")
|
824
|
+
|
825
|
+
# Send the final completion message
|
826
|
+
progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
|
827
|
+
|
828
|
+
# Clean up resources
|
829
|
+
print(f"Cleaning up resources for research_id: {research_id}")
|
830
|
+
cleanup_research_resources(research_id)
|
831
|
+
print(f"Resources cleaned up for research_id: {research_id}")
|
832
|
+
except Exception as inner_e:
|
833
|
+
print(f"Error during quick summary generation: {str(inner_e)}")
|
834
|
+
print(traceback.format_exc())
|
835
|
+
raise Exception(f"Error generating quick summary: {str(inner_e)}")
|
836
|
+
else:
|
837
|
+
raise Exception("No research findings were generated. Please try again.")
|
646
838
|
else:
|
647
839
|
# Full Report
|
648
840
|
progress_callback("Generating detailed report...", 85, {"phase": "report_generation"})
|
@@ -688,23 +880,47 @@ def run_research_process(research_id, query, mode):
|
|
688
880
|
|
689
881
|
progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
|
690
882
|
|
691
|
-
|
692
|
-
|
693
|
-
del active_research[research_id]
|
883
|
+
# Clean up - moved to a separate function for reuse
|
884
|
+
cleanup_research_resources(research_id)
|
694
885
|
|
695
886
|
except Exception as e:
|
696
887
|
# Handle error
|
697
888
|
error_message = f"Research failed: {str(e)}"
|
698
889
|
print(f"Research error: {error_message}")
|
699
890
|
try:
|
700
|
-
|
891
|
+
# Check for common Ollama error patterns in the exception and provide more user-friendly errors
|
892
|
+
user_friendly_error = str(e)
|
893
|
+
error_context = {}
|
894
|
+
|
895
|
+
if "Error type: ollama_unavailable" in user_friendly_error:
|
896
|
+
user_friendly_error = "Ollama AI service is unavailable. Please check that Ollama is running properly on your system."
|
897
|
+
error_context = {"solution": "Start Ollama with 'ollama serve' or check if it's installed correctly."}
|
898
|
+
elif "Error type: model_not_found" in user_friendly_error:
|
899
|
+
user_friendly_error = "Required Ollama model not found. Please pull the model first."
|
900
|
+
error_context = {"solution": "Run 'ollama pull mistral' to download the required model."}
|
901
|
+
elif "Error type: connection_error" in user_friendly_error:
|
902
|
+
user_friendly_error = "Connection error with LLM service. Please check that your AI service is running."
|
903
|
+
error_context = {"solution": "Ensure Ollama or your API service is running and accessible."}
|
904
|
+
elif "Error type: api_error" in user_friendly_error:
|
905
|
+
# Keep the original error message as it's already improved
|
906
|
+
error_context = {"solution": "Check API configuration and credentials."}
|
907
|
+
|
908
|
+
# Update metadata with more context about the error
|
909
|
+
metadata = {
|
910
|
+
"phase": "error",
|
911
|
+
"error": user_friendly_error
|
912
|
+
}
|
913
|
+
if error_context:
|
914
|
+
metadata.update(error_context)
|
915
|
+
|
916
|
+
progress_callback(user_friendly_error, None, metadata)
|
701
917
|
|
702
918
|
conn = sqlite3.connect(DB_PATH)
|
703
919
|
cursor = conn.cursor()
|
704
920
|
|
705
921
|
# If termination was requested, mark as suspended instead of failed
|
706
922
|
status = 'suspended' if (research_id in termination_flags and termination_flags[research_id]) else 'failed'
|
707
|
-
message = "Research was terminated by user" if status == 'suspended' else
|
923
|
+
message = "Research was terminated by user" if status == 'suspended' else user_friendly_error
|
708
924
|
|
709
925
|
# Calculate duration up to termination point - using UTC consistently
|
710
926
|
now = datetime.utcnow()
|
@@ -721,7 +937,7 @@ def run_research_process(research_id, query, mode):
|
|
721
937
|
|
722
938
|
cursor.execute(
|
723
939
|
'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, metadata = ? WHERE id = ?',
|
724
|
-
(status, completed_at, duration_seconds, json.dumps(
|
940
|
+
(status, completed_at, duration_seconds, json.dumps(metadata), research_id)
|
725
941
|
)
|
726
942
|
conn.commit()
|
727
943
|
conn.close()
|
@@ -749,11 +965,8 @@ def run_research_process(research_id, query, mode):
|
|
749
965
|
except Exception as inner_e:
|
750
966
|
print(f"Error in error handler: {str(inner_e)}")
|
751
967
|
|
752
|
-
# Clean up resources
|
753
|
-
|
754
|
-
del active_research[research_id]
|
755
|
-
if research_id in termination_flags:
|
756
|
-
del termination_flags[research_id]
|
968
|
+
# Clean up resources - moved to a separate function
|
969
|
+
cleanup_research_resources(research_id)
|
757
970
|
|
758
971
|
@research_bp.route('/api/research/<int:research_id>/terminate', methods=['POST'])
|
759
972
|
def terminate_research(research_id):
|
@@ -1133,7 +1346,6 @@ def app_serve_static(path):
|
|
1133
1346
|
def favicon():
|
1134
1347
|
return send_from_directory(app.static_folder, 'favicon.ico', mimetype='image/x-icon')
|
1135
1348
|
|
1136
|
-
|
1137
1349
|
# Add this function to app.py
|
1138
1350
|
def convert_debug_to_markdown(raw_text, query):
|
1139
1351
|
"""
|
@@ -1146,43 +1358,75 @@ def convert_debug_to_markdown(raw_text, query):
|
|
1146
1358
|
Returns:
|
1147
1359
|
Clean markdown formatted text
|
1148
1360
|
"""
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1152
|
-
content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
|
1153
|
-
else:
|
1154
|
-
content = raw_text
|
1155
|
-
|
1156
|
-
# Remove divider lines with === symbols
|
1157
|
-
content = "\n".join([line for line in content.split("\n")
|
1158
|
-
if not line.strip().startswith("===") and not line.strip() == "="*80])
|
1159
|
-
|
1160
|
-
# If COMPLETE RESEARCH OUTPUT exists, remove that section
|
1161
|
-
if "COMPLETE RESEARCH OUTPUT" in content:
|
1162
|
-
content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
|
1163
|
-
|
1164
|
-
# Remove SEARCH QUESTIONS BY ITERATION section
|
1165
|
-
if "SEARCH QUESTIONS BY ITERATION:" in content:
|
1166
|
-
search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
|
1167
|
-
next_major_section = -1
|
1168
|
-
for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
|
1169
|
-
if marker in content[search_index:]:
|
1170
|
-
marker_pos = content.index(marker, search_index)
|
1171
|
-
if next_major_section == -1 or marker_pos < next_major_section:
|
1172
|
-
next_major_section = marker_pos
|
1361
|
+
try:
|
1362
|
+
print(f"Starting markdown conversion for query: {query}")
|
1363
|
+
print(f"Raw text type: {type(raw_text)}")
|
1173
1364
|
|
1174
|
-
|
1175
|
-
|
1365
|
+
# Handle None or empty input
|
1366
|
+
if not raw_text:
|
1367
|
+
print("WARNING: raw_text is empty or None")
|
1368
|
+
return f"No detailed findings available for '{query}'."
|
1369
|
+
|
1370
|
+
# If there's a "DETAILED FINDINGS:" section, extract everything after it
|
1371
|
+
if "DETAILED FINDINGS:" in raw_text:
|
1372
|
+
print("Found DETAILED FINDINGS section")
|
1373
|
+
detailed_index = raw_text.index("DETAILED FINDINGS:")
|
1374
|
+
content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
|
1176
1375
|
else:
|
1177
|
-
|
1178
|
-
content =
|
1179
|
-
|
1180
|
-
|
1376
|
+
print("No DETAILED FINDINGS section found, using full text")
|
1377
|
+
content = raw_text
|
1378
|
+
|
1379
|
+
# Remove divider lines with === symbols
|
1380
|
+
lines_before = len(content.split("\n"))
|
1381
|
+
content = "\n".join([line for line in content.split("\n")
|
1382
|
+
if not line.strip().startswith("===") and not line.strip() == "="*80])
|
1383
|
+
lines_after = len(content.split("\n"))
|
1384
|
+
print(f"Removed {lines_before - lines_after} divider lines")
|
1385
|
+
|
1386
|
+
# If COMPLETE RESEARCH OUTPUT exists, remove that section
|
1387
|
+
if "COMPLETE RESEARCH OUTPUT" in content:
|
1388
|
+
print("Found and removing COMPLETE RESEARCH OUTPUT section")
|
1389
|
+
content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
|
1390
|
+
|
1391
|
+
# Remove SEARCH QUESTIONS BY ITERATION section
|
1392
|
+
if "SEARCH QUESTIONS BY ITERATION:" in content:
|
1393
|
+
print("Found SEARCH QUESTIONS BY ITERATION section")
|
1394
|
+
search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
|
1395
|
+
next_major_section = -1
|
1396
|
+
for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
|
1397
|
+
if marker in content[search_index:]:
|
1398
|
+
marker_pos = content.index(marker, search_index)
|
1399
|
+
if next_major_section == -1 or marker_pos < next_major_section:
|
1400
|
+
next_major_section = marker_pos
|
1401
|
+
|
1402
|
+
if next_major_section != -1:
|
1403
|
+
print(f"Removing section from index {search_index} to {next_major_section}")
|
1404
|
+
content = content[:search_index] + content[next_major_section:]
|
1405
|
+
else:
|
1406
|
+
# If no later section, just remove everything from SEARCH QUESTIONS onwards
|
1407
|
+
print(f"Removing everything after index {search_index}")
|
1408
|
+
content = content[:search_index].strip()
|
1409
|
+
|
1410
|
+
print(f"Final markdown length: {len(content.strip())}")
|
1411
|
+
return content.strip()
|
1412
|
+
except Exception as e:
|
1413
|
+
print(f"Error in convert_debug_to_markdown: {str(e)}")
|
1414
|
+
print(traceback.format_exc())
|
1415
|
+
# Return a basic message with the original query as fallback
|
1416
|
+
return f"# Research on {query}\n\nThere was an error formatting the research results."
|
1417
|
+
|
1181
1418
|
def main():
|
1182
1419
|
"""
|
1183
1420
|
Entry point for the web application when run as a command.
|
1184
1421
|
This function is needed for the package's entry point to work properly.
|
1185
1422
|
"""
|
1423
|
+
# Import settings here to avoid circular imports
|
1424
|
+
from local_deep_research.config import settings
|
1425
|
+
|
1426
|
+
# Get web server settings with defaults
|
1427
|
+
port = settings.web.port
|
1428
|
+
host = settings.web.host
|
1429
|
+
debug = settings.web.debug
|
1186
1430
|
|
1187
1431
|
# Check for OpenAI availability but don't import it unless necessary
|
1188
1432
|
try:
|
@@ -1202,8 +1446,7 @@ def main():
|
|
1202
1446
|
except Exception as e:
|
1203
1447
|
print(f"Error checking OpenAI availability: {e}")
|
1204
1448
|
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1449
|
+
socketio.run(app, debug=debug, host=host, port=port, allow_unsafe_werkzeug=True)
|
1450
|
+
|
1208
1451
|
if __name__ == '__main__':
|
1209
1452
|
main()
|