local-deep-research 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -320,12 +320,39 @@ def start_research():
320
320
  if not query:
321
321
  return jsonify({'status': 'error', 'message': 'Query is required'}), 400
322
322
 
323
- # Check if there's any active research
323
+ # Check if there's any active research that's actually still running
324
324
  if active_research:
325
- return jsonify({
326
- 'status': 'error',
327
- 'message': 'Another research is already in progress. Please wait for it to complete.'
328
- }), 409
325
+ # Verify each active research is still valid
326
+ stale_research_ids = []
327
+ for research_id, research_data in list(active_research.items()):
328
+ # Check database status
329
+ conn = sqlite3.connect(DB_PATH)
330
+ cursor = conn.cursor()
331
+ cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
332
+ result = cursor.fetchone()
333
+ conn.close()
334
+
335
+ # If the research doesn't exist in DB or is not in_progress, it's stale
336
+ if not result or result[0] != 'in_progress':
337
+ stale_research_ids.append(research_id)
338
+ # Also check if thread is still alive
339
+ elif not research_data.get('thread') or not research_data.get('thread').is_alive():
340
+ stale_research_ids.append(research_id)
341
+
342
+ # Clean up any stale research processes
343
+ for stale_id in stale_research_ids:
344
+ print(f"Cleaning up stale research process: {stale_id}")
345
+ if stale_id in active_research:
346
+ del active_research[stale_id]
347
+ if stale_id in termination_flags:
348
+ del termination_flags[stale_id]
349
+
350
+ # After cleanup, check if there's still active research
351
+ if active_research:
352
+ return jsonify({
353
+ 'status': 'error',
354
+ 'message': 'Another research is already in progress. Please wait for it to complete.'
355
+ }), 409
329
356
 
330
357
  # Create a record in the database with explicit UTC timestamp
331
358
  created_at = datetime.utcnow().isoformat()
@@ -467,6 +494,7 @@ def handle_disconnect():
467
494
  subscribers.remove(request.sid)
468
495
  if not subscribers:
469
496
  socket_subscriptions.pop(research_id, None)
497
+ print(f"Removed empty subscription for research {research_id}")
470
498
  except Exception as e:
471
499
  print(f"Error handling disconnect: {e}")
472
500
 
@@ -474,23 +502,54 @@ def handle_disconnect():
474
502
  def handle_subscribe(data):
475
503
  research_id = data.get('research_id')
476
504
  if research_id:
477
- if research_id not in socket_subscriptions:
478
- socket_subscriptions[research_id] = set()
479
- socket_subscriptions[research_id].add(request.sid)
480
- print(f"Client {request.sid} subscribed to research {research_id}")
505
+ # First check if this research is still active
506
+ conn = sqlite3.connect(DB_PATH)
507
+ cursor = conn.cursor()
508
+ cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
509
+ result = cursor.fetchone()
510
+ conn.close()
481
511
 
482
- # Send current status immediately if available
483
- if research_id in active_research:
484
- progress = active_research[research_id]['progress']
485
- latest_log = active_research[research_id]['log'][-1] if active_research[research_id]['log'] else None
512
+ # Only allow subscription to valid research
513
+ if result:
514
+ status = result[0]
515
+
516
+ # Initialize subscription set if needed
517
+ if research_id not in socket_subscriptions:
518
+ socket_subscriptions[research_id] = set()
486
519
 
487
- if latest_log:
520
+ # Add this client to the subscribers
521
+ socket_subscriptions[research_id].add(request.sid)
522
+ print(f"Client {request.sid} subscribed to research {research_id}")
523
+
524
+ # Send current status immediately if available
525
+ if research_id in active_research:
526
+ progress = active_research[research_id]['progress']
527
+ latest_log = active_research[research_id]['log'][-1] if active_research[research_id]['log'] else None
528
+
529
+ if latest_log:
530
+ emit(f'research_progress_{research_id}', {
531
+ 'progress': progress,
532
+ 'message': latest_log.get('message', 'Processing...'),
533
+ 'status': 'in_progress',
534
+ 'log_entry': latest_log
535
+ })
536
+ elif status in ['completed', 'failed', 'suspended']:
537
+ # Send final status for completed research
488
538
  emit(f'research_progress_{research_id}', {
489
- 'progress': progress,
490
- 'message': latest_log.get('message', 'Processing...'),
491
- 'status': 'in_progress',
492
- 'log_entry': latest_log
539
+ 'progress': 100 if status == 'completed' else 0,
540
+ 'message': 'Research completed successfully' if status == 'completed' else
541
+ 'Research failed' if status == 'failed' else 'Research was suspended',
542
+ 'status': status,
543
+ 'log_entry': {
544
+ 'time': datetime.utcnow().isoformat(),
545
+ 'message': f'Research is {status}',
546
+ 'progress': 100 if status == 'completed' else 0,
547
+ 'metadata': {'phase': 'complete' if status == 'completed' else 'error'}
548
+ }
493
549
  })
550
+ else:
551
+ # Research not found
552
+ emit('error', {'message': f'Research ID {research_id} not found'})
494
553
 
495
554
  @socketio.on_error
496
555
  def handle_socket_error(e):
@@ -504,6 +563,47 @@ def handle_default_error(e):
504
563
  # Don't propagate exceptions to avoid crashing the server
505
564
  return False
506
565
 
566
+ # Function to clean up resources for a completed research
567
+ def cleanup_research_resources(research_id):
568
+ """Clean up resources for a completed research"""
569
+ print(f"Cleaning up resources for research {research_id}")
570
+
571
+ # Remove from active research
572
+ if research_id in active_research:
573
+ del active_research[research_id]
574
+
575
+ # Remove from termination flags
576
+ if research_id in termination_flags:
577
+ del termination_flags[research_id]
578
+
579
+ # Send a final message to any remaining subscribers with explicit completed status
580
+ if research_id in socket_subscriptions and socket_subscriptions[research_id]:
581
+ final_message = {
582
+ 'status': 'completed',
583
+ 'message': 'Research process has ended and resources have been cleaned up',
584
+ 'progress': 100,
585
+ }
586
+
587
+ try:
588
+ print(f"Sending final completion socket message for research {research_id}")
589
+ # Use emit to all, not just subscribers
590
+ socketio.emit(f'research_progress_{research_id}', final_message)
591
+
592
+ # Also emit to specific subscribers
593
+ for sid in socket_subscriptions[research_id]:
594
+ try:
595
+ socketio.emit(
596
+ f'research_progress_{research_id}',
597
+ final_message,
598
+ room=sid
599
+ )
600
+ except Exception as sub_err:
601
+ print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
602
+ except Exception as e:
603
+ print(f"Error sending final cleanup message: {e}")
604
+
605
+ # Don't immediately remove subscriptions - let clients disconnect naturally
606
+
507
607
  def run_research_process(research_id, query, mode):
508
608
  try:
509
609
  system = AdvancedSearchSystem()
@@ -511,10 +611,36 @@ def run_research_process(research_id, query, mode):
511
611
  # Set up progress callback
512
612
  def progress_callback(message, progress_percent, metadata):
513
613
  timestamp = datetime.utcnow().isoformat()
614
+
615
+ # Adjust progress based on research mode
616
+ adjusted_progress = progress_percent
617
+ if mode == 'detailed' and metadata.get('phase') == 'output_generation':
618
+ # For detailed mode, we need to adjust the progress range
619
+ # because detailed reports take longer after the search phase
620
+ adjusted_progress = min(80, progress_percent)
621
+ elif mode == 'detailed' and metadata.get('phase') == 'report_generation':
622
+ # Scale the progress from 80% to 95% for the report generation phase
623
+ # Map progress_percent values (0-100%) to the (80-95%) range
624
+ if progress_percent is not None:
625
+ normalized = progress_percent / 100
626
+ adjusted_progress = 80 + (normalized * 15)
627
+ elif mode == 'quick' and metadata.get('phase') == 'output_generation':
628
+ # For quick mode, ensure we're at least at 85% during output generation
629
+ adjusted_progress = max(85, progress_percent)
630
+ # Map any further progress within output_generation to 85-95% range
631
+ if progress_percent is not None and progress_percent > 0:
632
+ normalized = progress_percent / 100
633
+ adjusted_progress = 85 + (normalized * 10)
634
+
635
+ # Don't let progress go backwards
636
+ if research_id in active_research and adjusted_progress is not None:
637
+ current_progress = active_research[research_id].get('progress', 0)
638
+ adjusted_progress = max(current_progress, adjusted_progress)
639
+
514
640
  log_entry = {
515
641
  "time": timestamp,
516
642
  "message": message,
517
- "progress": progress_percent,
643
+ "progress": adjusted_progress,
518
644
  "metadata": metadata
519
645
  }
520
646
 
@@ -526,11 +652,11 @@ def run_research_process(research_id, query, mode):
526
652
  # Update active research record
527
653
  if research_id in active_research:
528
654
  active_research[research_id]['log'].append(log_entry)
529
- if progress_percent is not None:
530
- active_research[research_id]['progress'] = progress_percent
655
+ if adjusted_progress is not None:
656
+ active_research[research_id]['progress'] = adjusted_progress
531
657
 
532
658
  # Save to database (but not too frequently)
533
- if progress_percent is None or progress_percent % 10 == 0 or metadata.get('phase') in ['complete', 'iteration_complete']:
659
+ if adjusted_progress is None or adjusted_progress % 10 == 0 or metadata.get('phase') in ['complete', 'iteration_complete', 'output_generation', 'report_generation', 'report_complete']:
534
660
  conn = sqlite3.connect(DB_PATH)
535
661
  cursor = conn.cursor()
536
662
  cursor.execute(
@@ -554,7 +680,7 @@ def run_research_process(research_id, query, mode):
554
680
  # Emit socket event with try/except block to handle connection issues
555
681
  try:
556
682
  event_data = {
557
- 'progress': progress_percent,
683
+ 'progress': adjusted_progress,
558
684
  'message': message,
559
685
  'status': 'in_progress',
560
686
  'log_entry': log_entry
@@ -578,14 +704,44 @@ def run_research_process(research_id, query, mode):
578
704
  except Exception as socket_error:
579
705
  # Log socket error but continue with the research process
580
706
  print(f"Socket emit error (non-critical): {str(socket_error)}")
707
+
708
+ return not (research_id in termination_flags and termination_flags[research_id])
581
709
 
582
710
  # Set the progress callback in the system
583
711
  system.set_progress_callback(progress_callback)
584
712
 
585
713
  # Run the search
586
714
  progress_callback("Starting research process", 5, {"phase": "init"})
587
- results = system.analyze_topic(query)
588
- progress_callback("Search complete, generating output", 80, {"phase": "output_generation"})
715
+
716
+ try:
717
+ results = system.analyze_topic(query)
718
+ if mode == 'quick':
719
+ progress_callback("Search complete, preparing to generate summary...", 85, {"phase": "output_generation"})
720
+ else:
721
+ progress_callback("Search complete, generating output", 80, {"phase": "output_generation"})
722
+ except Exception as search_error:
723
+ # Better handling of specific search errors
724
+ error_message = str(search_error)
725
+ error_type = "unknown"
726
+
727
+ # Extract error details for common issues
728
+ if "status code: 503" in error_message:
729
+ error_message = "Ollama AI service is unavailable (HTTP 503). Please check that Ollama is running properly on your system."
730
+ error_type = "ollama_unavailable"
731
+ elif "status code: 404" in error_message:
732
+ error_message = "Ollama model not found (HTTP 404). Please check that you have pulled the required model."
733
+ error_type = "model_not_found"
734
+ elif "status code:" in error_message:
735
+ # Extract the status code for other HTTP errors
736
+ status_code = error_message.split("status code:")[1].strip()
737
+ error_message = f"API request failed with status code {status_code}. Please check your configuration."
738
+ error_type = "api_error"
739
+ elif "connection" in error_message.lower():
740
+ error_message = "Connection error. Please check that your LLM service (Ollama/API) is running and accessible."
741
+ error_type = "connection_error"
742
+
743
+ # Raise with improved error message
744
+ raise Exception(f"{error_message} (Error type: {error_type})")
589
745
 
590
746
  # Generate output based on mode
591
747
  if mode == 'quick':
@@ -593,56 +749,92 @@ def run_research_process(research_id, query, mode):
593
749
  if results.get('findings'):
594
750
  #initial_analysis = [finding['content'] for finding in results['findings']]
595
751
  summary = ""
596
- raw_formatted_findings = results['formatted_findings']
597
-
598
- # ADDED CODE: Convert debug output to clean markdown
599
- clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
600
-
601
- # Save as markdown file
602
- output_dir = "research_outputs"
603
- if not os.path.exists(output_dir):
604
- os.makedirs(output_dir)
605
-
606
- safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
607
- safe_query = safe_query.replace(" ", "_").lower()
608
- report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
609
-
610
- with open(report_path, "w", encoding="utf-8") as f:
611
- f.write("# Quick Research Summary\n\n")
612
- f.write(f"Query: {query}\n\n")
613
- f.write(clean_markdown) # Use clean markdown instead of raw findings
614
- f.write("\n\n## Research Metrics\n")
615
- f.write(f"- Search Iterations: {results['iterations']}\n")
616
- f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
617
752
 
618
- # Update database
619
- metadata = {
620
- 'iterations': results['iterations'],
621
- 'generated_at': datetime.utcnow().isoformat()
622
- }
753
+ # Safer access to formatted_findings with logging
754
+ print(f"Results keys: {list(results.keys())}")
623
755
 
624
- # Calculate duration in seconds - using UTC consistently
625
- now = datetime.utcnow()
626
- completed_at = now.isoformat()
756
+ # Check if formatted_findings exists in results
757
+ if 'formatted_findings' not in results:
758
+ print("WARNING: 'formatted_findings' not found in results, using fallback")
759
+ # Create fallback formatted findings from available data
760
+ raw_formatted_findings = "# Research Findings\n\n"
761
+ for i, finding in enumerate(results.get('findings', [])):
762
+ raw_formatted_findings += f"## Finding {i+1}\n\n{finding.get('content', '')}\n\n"
763
+ else:
764
+ raw_formatted_findings = results['formatted_findings']
765
+ print(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
627
766
 
628
- # Get the start time from the database
629
- conn = sqlite3.connect(DB_PATH)
630
- cursor = conn.cursor()
631
- cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
632
- result = cursor.fetchone()
633
-
634
- # Use the helper function for consistent duration calculation
635
- duration_seconds = calculate_duration(result[0])
636
-
637
- # Update the record
638
- cursor.execute(
639
- 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
640
- ('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
641
- )
642
- conn.commit()
643
- conn.close()
644
-
645
- progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
767
+ try:
768
+ # ADDED CODE: Convert debug output to clean markdown
769
+ clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
770
+ print(f"Successfully converted to clean markdown of length: {len(clean_markdown)}")
771
+
772
+ # First send a progress update for generating the summary
773
+ progress_callback("Generating clean summary from research data...", 90, {"phase": "output_generation"})
774
+
775
+ # Save as markdown file
776
+ output_dir = "research_outputs"
777
+ if not os.path.exists(output_dir):
778
+ os.makedirs(output_dir)
779
+
780
+ safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
781
+ safe_query = safe_query.replace(" ", "_").lower()
782
+ report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
783
+
784
+ # Send progress update for writing to file
785
+ progress_callback("Writing research report to file...", 95, {"phase": "report_complete"})
786
+
787
+ print(f"Writing report to: {report_path}")
788
+ with open(report_path, "w", encoding="utf-8") as f:
789
+ f.write("# Quick Research Summary\n\n")
790
+ f.write(f"Query: {query}\n\n")
791
+ f.write(clean_markdown) # Use clean markdown instead of raw findings
792
+ f.write("\n\n## Research Metrics\n")
793
+ f.write(f"- Search Iterations: {results['iterations']}\n")
794
+ f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
795
+
796
+ # Update database
797
+ metadata = {
798
+ 'iterations': results['iterations'],
799
+ 'generated_at': datetime.utcnow().isoformat()
800
+ }
801
+
802
+ # Calculate duration in seconds - using UTC consistently
803
+ now = datetime.utcnow()
804
+ completed_at = now.isoformat()
805
+
806
+ print(f"Updating database for research_id: {research_id}")
807
+ # Get the start time from the database
808
+ conn = sqlite3.connect(DB_PATH)
809
+ cursor = conn.cursor()
810
+ cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
811
+ result = cursor.fetchone()
812
+
813
+ # Use the helper function for consistent duration calculation
814
+ duration_seconds = calculate_duration(result[0])
815
+
816
+ # Update the record
817
+ cursor.execute(
818
+ 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
819
+ ('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
820
+ )
821
+ conn.commit()
822
+ conn.close()
823
+ print(f"Database updated successfully for research_id: {research_id}")
824
+
825
+ # Send the final completion message
826
+ progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
827
+
828
+ # Clean up resources
829
+ print(f"Cleaning up resources for research_id: {research_id}")
830
+ cleanup_research_resources(research_id)
831
+ print(f"Resources cleaned up for research_id: {research_id}")
832
+ except Exception as inner_e:
833
+ print(f"Error during quick summary generation: {str(inner_e)}")
834
+ print(traceback.format_exc())
835
+ raise Exception(f"Error generating quick summary: {str(inner_e)}")
836
+ else:
837
+ raise Exception("No research findings were generated. Please try again.")
646
838
  else:
647
839
  # Full Report
648
840
  progress_callback("Generating detailed report...", 85, {"phase": "report_generation"})
@@ -688,23 +880,47 @@ def run_research_process(research_id, query, mode):
688
880
 
689
881
  progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
690
882
 
691
- # Clean up
692
- if research_id in active_research:
693
- del active_research[research_id]
883
+ # Clean up - moved to a separate function for reuse
884
+ cleanup_research_resources(research_id)
694
885
 
695
886
  except Exception as e:
696
887
  # Handle error
697
888
  error_message = f"Research failed: {str(e)}"
698
889
  print(f"Research error: {error_message}")
699
890
  try:
700
- progress_callback(error_message, None, {"phase": "error", "error": str(e)})
891
+ # Check for common Ollama error patterns in the exception and provide more user-friendly errors
892
+ user_friendly_error = str(e)
893
+ error_context = {}
894
+
895
+ if "Error type: ollama_unavailable" in user_friendly_error:
896
+ user_friendly_error = "Ollama AI service is unavailable. Please check that Ollama is running properly on your system."
897
+ error_context = {"solution": "Start Ollama with 'ollama serve' or check if it's installed correctly."}
898
+ elif "Error type: model_not_found" in user_friendly_error:
899
+ user_friendly_error = "Required Ollama model not found. Please pull the model first."
900
+ error_context = {"solution": "Run 'ollama pull mistral' to download the required model."}
901
+ elif "Error type: connection_error" in user_friendly_error:
902
+ user_friendly_error = "Connection error with LLM service. Please check that your AI service is running."
903
+ error_context = {"solution": "Ensure Ollama or your API service is running and accessible."}
904
+ elif "Error type: api_error" in user_friendly_error:
905
+ # Keep the original error message as it's already improved
906
+ error_context = {"solution": "Check API configuration and credentials."}
907
+
908
+ # Update metadata with more context about the error
909
+ metadata = {
910
+ "phase": "error",
911
+ "error": user_friendly_error
912
+ }
913
+ if error_context:
914
+ metadata.update(error_context)
915
+
916
+ progress_callback(user_friendly_error, None, metadata)
701
917
 
702
918
  conn = sqlite3.connect(DB_PATH)
703
919
  cursor = conn.cursor()
704
920
 
705
921
  # If termination was requested, mark as suspended instead of failed
706
922
  status = 'suspended' if (research_id in termination_flags and termination_flags[research_id]) else 'failed'
707
- message = "Research was terminated by user" if status == 'suspended' else str(e)
923
+ message = "Research was terminated by user" if status == 'suspended' else user_friendly_error
708
924
 
709
925
  # Calculate duration up to termination point - using UTC consistently
710
926
  now = datetime.utcnow()
@@ -721,7 +937,7 @@ def run_research_process(research_id, query, mode):
721
937
 
722
938
  cursor.execute(
723
939
  'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, metadata = ? WHERE id = ?',
724
- (status, completed_at, duration_seconds, json.dumps({'error': message}), research_id)
940
+ (status, completed_at, duration_seconds, json.dumps(metadata), research_id)
725
941
  )
726
942
  conn.commit()
727
943
  conn.close()
@@ -749,11 +965,8 @@ def run_research_process(research_id, query, mode):
749
965
  except Exception as inner_e:
750
966
  print(f"Error in error handler: {str(inner_e)}")
751
967
 
752
- # Clean up resources
753
- if research_id in active_research:
754
- del active_research[research_id]
755
- if research_id in termination_flags:
756
- del termination_flags[research_id]
968
+ # Clean up resources - moved to a separate function
969
+ cleanup_research_resources(research_id)
757
970
 
758
971
  @research_bp.route('/api/research/<int:research_id>/terminate', methods=['POST'])
759
972
  def terminate_research(research_id):
@@ -1133,7 +1346,6 @@ def app_serve_static(path):
1133
1346
  def favicon():
1134
1347
  return send_from_directory(app.static_folder, 'favicon.ico', mimetype='image/x-icon')
1135
1348
 
1136
-
1137
1349
  # Add this function to app.py
1138
1350
  def convert_debug_to_markdown(raw_text, query):
1139
1351
  """
@@ -1146,43 +1358,75 @@ def convert_debug_to_markdown(raw_text, query):
1146
1358
  Returns:
1147
1359
  Clean markdown formatted text
1148
1360
  """
1149
- # If there's a "DETAILED FINDINGS:" section, extract everything after it
1150
- if "DETAILED FINDINGS:" in raw_text:
1151
- detailed_index = raw_text.index("DETAILED FINDINGS:")
1152
- content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
1153
- else:
1154
- content = raw_text
1155
-
1156
- # Remove divider lines with === symbols
1157
- content = "\n".join([line for line in content.split("\n")
1158
- if not line.strip().startswith("===") and not line.strip() == "="*80])
1159
-
1160
- # If COMPLETE RESEARCH OUTPUT exists, remove that section
1161
- if "COMPLETE RESEARCH OUTPUT" in content:
1162
- content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
1163
-
1164
- # Remove SEARCH QUESTIONS BY ITERATION section
1165
- if "SEARCH QUESTIONS BY ITERATION:" in content:
1166
- search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
1167
- next_major_section = -1
1168
- for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
1169
- if marker in content[search_index:]:
1170
- marker_pos = content.index(marker, search_index)
1171
- if next_major_section == -1 or marker_pos < next_major_section:
1172
- next_major_section = marker_pos
1361
+ try:
1362
+ print(f"Starting markdown conversion for query: {query}")
1363
+ print(f"Raw text type: {type(raw_text)}")
1173
1364
 
1174
- if next_major_section != -1:
1175
- content = content[:search_index] + content[next_major_section:]
1365
+ # Handle None or empty input
1366
+ if not raw_text:
1367
+ print("WARNING: raw_text is empty or None")
1368
+ return f"No detailed findings available for '{query}'."
1369
+
1370
+ # If there's a "DETAILED FINDINGS:" section, extract everything after it
1371
+ if "DETAILED FINDINGS:" in raw_text:
1372
+ print("Found DETAILED FINDINGS section")
1373
+ detailed_index = raw_text.index("DETAILED FINDINGS:")
1374
+ content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
1176
1375
  else:
1177
- # If no later section, just remove everything from SEARCH QUESTIONS onwards
1178
- content = content[:search_index].strip()
1179
-
1180
- return content.strip()
1376
+ print("No DETAILED FINDINGS section found, using full text")
1377
+ content = raw_text
1378
+
1379
+ # Remove divider lines with === symbols
1380
+ lines_before = len(content.split("\n"))
1381
+ content = "\n".join([line for line in content.split("\n")
1382
+ if not line.strip().startswith("===") and not line.strip() == "="*80])
1383
+ lines_after = len(content.split("\n"))
1384
+ print(f"Removed {lines_before - lines_after} divider lines")
1385
+
1386
+ # If COMPLETE RESEARCH OUTPUT exists, remove that section
1387
+ if "COMPLETE RESEARCH OUTPUT" in content:
1388
+ print("Found and removing COMPLETE RESEARCH OUTPUT section")
1389
+ content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
1390
+
1391
+ # Remove SEARCH QUESTIONS BY ITERATION section
1392
+ if "SEARCH QUESTIONS BY ITERATION:" in content:
1393
+ print("Found SEARCH QUESTIONS BY ITERATION section")
1394
+ search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
1395
+ next_major_section = -1
1396
+ for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
1397
+ if marker in content[search_index:]:
1398
+ marker_pos = content.index(marker, search_index)
1399
+ if next_major_section == -1 or marker_pos < next_major_section:
1400
+ next_major_section = marker_pos
1401
+
1402
+ if next_major_section != -1:
1403
+ print(f"Removing section from index {search_index} to {next_major_section}")
1404
+ content = content[:search_index] + content[next_major_section:]
1405
+ else:
1406
+ # If no later section, just remove everything from SEARCH QUESTIONS onwards
1407
+ print(f"Removing everything after index {search_index}")
1408
+ content = content[:search_index].strip()
1409
+
1410
+ print(f"Final markdown length: {len(content.strip())}")
1411
+ return content.strip()
1412
+ except Exception as e:
1413
+ print(f"Error in convert_debug_to_markdown: {str(e)}")
1414
+ print(traceback.format_exc())
1415
+ # Return a basic message with the original query as fallback
1416
+ return f"# Research on {query}\n\nThere was an error formatting the research results."
1417
+
1181
1418
  def main():
1182
1419
  """
1183
1420
  Entry point for the web application when run as a command.
1184
1421
  This function is needed for the package's entry point to work properly.
1185
1422
  """
1423
+ # Import settings here to avoid circular imports
1424
+ from local_deep_research.config import settings
1425
+
1426
+ # Get web server settings with defaults
1427
+ port = settings.web.port
1428
+ host = settings.web.host
1429
+ debug = settings.web.debug
1186
1430
 
1187
1431
  # Check for OpenAI availability but don't import it unless necessary
1188
1432
  try:
@@ -1202,8 +1446,7 @@ def main():
1202
1446
  except Exception as e:
1203
1447
  print(f"Error checking OpenAI availability: {e}")
1204
1448
 
1205
-
1206
- socketio.run(app, debug=True, host='0.0.0.0', port=5000, allow_unsafe_werkzeug=True)
1207
-
1449
+ socketio.run(app, debug=debug, host=host, port=port, allow_unsafe_werkzeug=True)
1450
+
1208
1451
  if __name__ == '__main__':
1209
1452
  main()