local-deep-research 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. local_deep_research/__init__.py +24 -0
  2. local_deep_research/citation_handler.py +113 -0
  3. local_deep_research/config.py +166 -0
  4. local_deep_research/defaults/__init__.py +44 -0
  5. local_deep_research/defaults/llm_config.py +269 -0
  6. local_deep_research/defaults/local_collections.toml +47 -0
  7. local_deep_research/defaults/main.toml +57 -0
  8. local_deep_research/defaults/search_engines.toml +244 -0
  9. local_deep_research/local_collections.py +141 -0
  10. local_deep_research/main.py +113 -0
  11. local_deep_research/report_generator.py +206 -0
  12. local_deep_research/search_system.py +241 -0
  13. local_deep_research/utilties/__init__.py +0 -0
  14. local_deep_research/utilties/enums.py +9 -0
  15. local_deep_research/utilties/llm_utils.py +116 -0
  16. local_deep_research/utilties/search_utilities.py +115 -0
  17. local_deep_research/utilties/setup_utils.py +6 -0
  18. local_deep_research/web/__init__.py +2 -0
  19. local_deep_research/web/app.py +1209 -0
  20. local_deep_research/web/static/css/styles.css +1008 -0
  21. local_deep_research/web/static/js/app.js +2078 -0
  22. local_deep_research/web/templates/api_keys_config.html +82 -0
  23. local_deep_research/web/templates/collections_config.html +90 -0
  24. local_deep_research/web/templates/index.html +312 -0
  25. local_deep_research/web/templates/llm_config.html +120 -0
  26. local_deep_research/web/templates/main_config.html +89 -0
  27. local_deep_research/web/templates/search_engines_config.html +154 -0
  28. local_deep_research/web/templates/settings.html +519 -0
  29. local_deep_research/web/templates/settings_dashboard.html +207 -0
  30. local_deep_research/web_search_engines/__init__.py +0 -0
  31. local_deep_research/web_search_engines/engines/__init__.py +0 -0
  32. local_deep_research/web_search_engines/engines/full_search.py +128 -0
  33. local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
  34. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
  35. local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
  36. local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
  37. local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
  38. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
  39. local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
  40. local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
  41. local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
  42. local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
  43. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
  44. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
  45. local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
  46. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
  47. local_deep_research/web_search_engines/full_search.py +254 -0
  48. local_deep_research/web_search_engines/search_engine_base.py +197 -0
  49. local_deep_research/web_search_engines/search_engine_factory.py +233 -0
  50. local_deep_research/web_search_engines/search_engines_config.py +54 -0
  51. local_deep_research-0.1.0.dist-info/LICENSE +21 -0
  52. local_deep_research-0.1.0.dist-info/METADATA +328 -0
  53. local_deep_research-0.1.0.dist-info/RECORD +56 -0
  54. local_deep_research-0.1.0.dist-info/WHEEL +5 -0
  55. local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
  56. local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1209 @@
1
+ import os
2
+ import json
3
+ import time
4
+ import sqlite3
5
+ import threading
6
+ from datetime import datetime
7
+ from flask import Flask, render_template, request, jsonify, send_from_directory, Response, make_response, current_app, Blueprint, redirect, url_for, flash
8
+ from flask_socketio import SocketIO, emit
9
+ from local_deep_research.search_system import AdvancedSearchSystem
10
+ from local_deep_research.report_generator import IntegratedReportGenerator
11
+ # Move this import up to ensure it's available globally
12
+ from dateutil import parser
13
+ import traceback
14
+ import pkg_resources
15
+ # Import the new configuration manager
16
+ from local_deep_research.config import get_config_dir
17
+ import logging
18
+ logger = logging.getLogger(__name__)
19
+
20
+ CONFIG_DIR = get_config_dir() / "config"
21
+ MAIN_CONFIG_FILE = CONFIG_DIR / "settings.toml"
22
+ LLM_CONFIG_FILE = CONFIG_DIR / "llm_config.py"
23
+ LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
24
+ import toml
25
+
26
+ # Set flag for tracking OpenAI availability - we'll check it only when needed
27
+ OPENAI_AVAILABLE = False
28
+
29
+ # Initialize Flask app
30
+ try:
31
+ import os
32
+ import logging
33
+ from local_deep_research.utilties.setup_utils import setup_user_directories
34
+
35
+ # Configure logging
36
+ logging.basicConfig(level=logging.INFO)
37
+ logger = logging.getLogger(__name__)
38
+
39
+ # Explicitly run setup
40
+ logger.info("Initializing configuration...")
41
+ setup_user_directories()
42
+
43
+ # Get directories based on package installation
44
+ PACKAGE_DIR = pkg_resources.resource_filename('local_deep_research', 'web')
45
+ STATIC_DIR = os.path.join(PACKAGE_DIR, 'static')
46
+ TEMPLATE_DIR = os.path.join(PACKAGE_DIR, 'templates')
47
+
48
+ # Setup logging
49
+ logging.basicConfig(level=logging.INFO)
50
+
51
+ # Create directories and default configs if needed
52
+ setup_user_directories()
53
+
54
+ # Initialize Flask app with package directories
55
+ app = Flask(__name__,
56
+ static_folder=STATIC_DIR,
57
+ template_folder=TEMPLATE_DIR)
58
+ print(f"Using package static path: {STATIC_DIR}")
59
+ print(f"Using package template path: {TEMPLATE_DIR}")
60
+ except Exception as e:
61
+ # Fallback for development
62
+ print(f"Package directories not found, using fallback paths: {str(e)}")
63
+ app = Flask(__name__,
64
+ static_folder=os.path.abspath('static'),
65
+ template_folder=os.path.abspath('templates'))
66
+ app.config['SECRET_KEY'] = 'deep-research-secret-key'
67
+
68
+ # Create a Blueprint for the research application
69
+ research_bp = Blueprint('research', __name__, url_prefix='/research')
70
+
71
+ # Add improved Socket.IO configuration with better error handling
72
+ socketio = SocketIO(
73
+ app,
74
+ cors_allowed_origins="*",
75
+ async_mode='threading',
76
+ path='/research/socket.io',
77
+ logger=True,
78
+ engineio_logger=True,
79
+ ping_timeout=20,
80
+ ping_interval=5
81
+ )
82
+
83
+ # Active research processes and socket subscriptions
84
+ active_research = {}
85
+ socket_subscriptions = {}
86
+
87
+ # Add termination flags dictionary
88
+ termination_flags = {}
89
+
90
+ # Database setup
91
+ DB_PATH = 'research_history.db'
92
+
93
+ # Add Content Security Policy headers to allow Socket.IO to function
94
+ @app.after_request
95
+ def add_security_headers(response):
96
+ # Define a permissive CSP for development that allows Socket.IO to function
97
+ csp = (
98
+ "default-src 'self'; "
99
+ "connect-src 'self' ws: wss: http: https:; "
100
+ "script-src 'self' 'unsafe-inline' 'unsafe-eval' cdnjs.cloudflare.com cdn.jsdelivr.net unpkg.com; "
101
+ "style-src 'self' 'unsafe-inline' cdnjs.cloudflare.com; "
102
+ "font-src 'self' cdnjs.cloudflare.com; "
103
+ "img-src 'self' data:; "
104
+ "worker-src blob:; "
105
+ "frame-src 'self';"
106
+ )
107
+
108
+ response.headers['Content-Security-Policy'] = csp
109
+ response.headers['X-Content-Security-Policy'] = csp
110
+
111
+ # Add CORS headers for API requests
112
+ if request.path.startswith('/api/'):
113
+ response.headers['Access-Control-Allow-Origin'] = '*'
114
+ response.headers['Access-Control-Allow-Methods'] = 'GET, POST, DELETE, OPTIONS'
115
+ response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
116
+
117
+ return response
118
+
119
+ # Add a middleware layer to handle abrupt disconnections
120
+ @app.before_request
121
+ def handle_websocket_requests():
122
+ if request.path.startswith('/research/socket.io'):
123
+ try:
124
+ if not request.environ.get('werkzeug.socket'):
125
+ return
126
+ except Exception as e:
127
+ print(f"WebSocket preprocessing error: {e}")
128
+ # Return empty response to prevent further processing
129
+ return '', 200
130
+
131
+ # Initialize the database
132
+ def init_db():
133
+ conn = sqlite3.connect(DB_PATH)
134
+ cursor = conn.cursor()
135
+
136
+ # Create the table if it doesn't exist
137
+ cursor.execute('''
138
+ CREATE TABLE IF NOT EXISTS research_history (
139
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
140
+ query TEXT NOT NULL,
141
+ mode TEXT NOT NULL,
142
+ status TEXT NOT NULL,
143
+ created_at TEXT NOT NULL,
144
+ completed_at TEXT,
145
+ duration_seconds INTEGER,
146
+ report_path TEXT,
147
+ metadata TEXT,
148
+ progress_log TEXT
149
+ )
150
+ ''')
151
+
152
+ # Check if the duration_seconds column exists, add it if missing
153
+ cursor.execute('PRAGMA table_info(research_history)')
154
+ columns = [column[1] for column in cursor.fetchall()]
155
+
156
+ if 'duration_seconds' not in columns:
157
+ print("Adding missing 'duration_seconds' column to research_history table")
158
+ cursor.execute('ALTER TABLE research_history ADD COLUMN duration_seconds INTEGER')
159
+
160
+ conn.commit()
161
+ conn.close()
162
+
163
+ # Helper function to calculate duration between created_at and completed_at timestamps
164
+ def calculate_duration(created_at_str):
165
+ """
166
+ Calculate duration in seconds between created_at timestamp and now.
167
+ Handles various timestamp formats and returns None if calculation fails.
168
+ """
169
+ if not created_at_str:
170
+ return None
171
+
172
+ now = datetime.utcnow()
173
+ duration_seconds = None
174
+
175
+ try:
176
+ # Proper parsing of ISO format
177
+ if 'T' in created_at_str: # ISO format with T separator
178
+ start_time = datetime.fromisoformat(created_at_str)
179
+ else: # Older format without T
180
+ # Try different formats
181
+ try:
182
+ start_time = datetime.strptime(created_at_str, '%Y-%m-%d %H:%M:%S.%f')
183
+ except ValueError:
184
+ try:
185
+ start_time = datetime.strptime(created_at_str, '%Y-%m-%d %H:%M:%S')
186
+ except ValueError:
187
+ # Last resort fallback
188
+ start_time = datetime.fromisoformat(created_at_str.replace(' ', 'T'))
189
+
190
+ # Ensure we're comparing UTC times
191
+ duration_seconds = int((now - start_time).total_seconds())
192
+ except Exception as e:
193
+ print(f"Error calculating duration: {str(e)}")
194
+ # Fallback method if parsing fails
195
+ try:
196
+ start_time_fallback = parser.parse(created_at_str)
197
+ duration_seconds = int((now - start_time_fallback).total_seconds())
198
+ except:
199
+ print(f"Fallback duration calculation also failed for timestamp: {created_at_str}")
200
+
201
+ return duration_seconds
202
+
203
+ # Initialize the database on startup
204
+ def initialize():
205
+ init_db()
206
+
207
+ # Call initialize immediately when app is created
208
+ initialize()
209
+
210
+ # Route for index page - keep this at root level for easy access
211
+ @app.route('/')
212
+ def root_index():
213
+ return redirect(url_for('research.index'))
214
+
215
+ # Update all routes with the research prefix
216
+ @research_bp.route('/')
217
+ def index():
218
+ return render_template('index.html')
219
+
220
+ @research_bp.route('/static/<path:path>')
221
+ def serve_static(path):
222
+ try:
223
+ print(f"Serving static file: {path}")
224
+ print(f"Static folder path: {app.static_folder}")
225
+ return send_from_directory(app.static_folder, path)
226
+ except Exception as e:
227
+ print(f"Error serving static file {path}: {str(e)}")
228
+ return f"Error serving file: {str(e)}", 404
229
+
230
+ @research_bp.route('/api/history', methods=['GET'])
231
+ def get_history():
232
+ """Get the research history"""
233
+ try:
234
+ conn = sqlite3.connect(DB_PATH)
235
+ conn.row_factory = sqlite3.Row
236
+ cursor = conn.cursor()
237
+
238
+ # Get all history records ordered by latest first
239
+ cursor.execute('SELECT * FROM research_history ORDER BY created_at DESC')
240
+ results = cursor.fetchall()
241
+ conn.close()
242
+
243
+ # Convert to list of dicts
244
+ history = []
245
+ for result in results:
246
+ item = dict(result)
247
+
248
+ # Ensure all keys exist with default values
249
+ if 'id' not in item:
250
+ item['id'] = None
251
+ if 'query' not in item:
252
+ item['query'] = 'Untitled Research'
253
+ if 'mode' not in item:
254
+ item['mode'] = 'quick'
255
+ if 'status' not in item:
256
+ item['status'] = 'unknown'
257
+ if 'created_at' not in item:
258
+ item['created_at'] = None
259
+ if 'completed_at' not in item:
260
+ item['completed_at'] = None
261
+ if 'duration_seconds' not in item:
262
+ item['duration_seconds'] = None
263
+ if 'report_path' not in item:
264
+ item['report_path'] = None
265
+ if 'metadata' not in item:
266
+ item['metadata'] = '{}'
267
+ if 'progress_log' not in item:
268
+ item['progress_log'] = '[]'
269
+
270
+ # Ensure timestamps are in ISO format
271
+ if item['created_at'] and 'T' not in item['created_at']:
272
+ try:
273
+ # Convert to ISO format if it's not already
274
+ dt = parser.parse(item['created_at'])
275
+ item['created_at'] = dt.isoformat()
276
+ except:
277
+ pass
278
+
279
+ if item['completed_at'] and 'T' not in item['completed_at']:
280
+ try:
281
+ # Convert to ISO format if it's not already
282
+ dt = parser.parse(item['completed_at'])
283
+ item['completed_at'] = dt.isoformat()
284
+ except:
285
+ pass
286
+
287
+ # Recalculate duration based on timestamps if it's null but both timestamps exist
288
+ if item['duration_seconds'] is None and item['created_at'] and item['completed_at']:
289
+ try:
290
+ start_time = parser.parse(item['created_at'])
291
+ end_time = parser.parse(item['completed_at'])
292
+ item['duration_seconds'] = int((end_time - start_time).total_seconds())
293
+ except Exception as e:
294
+ print(f"Error recalculating duration: {str(e)}")
295
+
296
+ history.append(item)
297
+
298
+ # Add CORS headers
299
+ response = make_response(jsonify(history))
300
+ response.headers.add('Access-Control-Allow-Origin', '*')
301
+ response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
302
+ response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
303
+ return response
304
+ except Exception as e:
305
+ print(f"Error getting history: {str(e)}")
306
+ print(traceback.format_exc())
307
+ # Return empty array with CORS headers
308
+ response = make_response(jsonify([]))
309
+ response.headers.add('Access-Control-Allow-Origin', '*')
310
+ response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
311
+ response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
312
+ return response
313
+
314
+ @research_bp.route('/api/start_research', methods=['POST'])
315
+ def start_research():
316
+ data = request.json
317
+ query = data.get('query')
318
+ mode = data.get('mode', 'quick')
319
+
320
+ if not query:
321
+ return jsonify({'status': 'error', 'message': 'Query is required'}), 400
322
+
323
+ # Check if there's any active research
324
+ if active_research:
325
+ return jsonify({
326
+ 'status': 'error',
327
+ 'message': 'Another research is already in progress. Please wait for it to complete.'
328
+ }), 409
329
+
330
+ # Create a record in the database with explicit UTC timestamp
331
+ created_at = datetime.utcnow().isoformat()
332
+ conn = sqlite3.connect(DB_PATH)
333
+ cursor = conn.cursor()
334
+ cursor.execute(
335
+ 'INSERT INTO research_history (query, mode, status, created_at, progress_log) VALUES (?, ?, ?, ?, ?)',
336
+ (query, mode, 'in_progress', created_at, json.dumps([{"time": created_at, "message": "Research started", "progress": 0}]))
337
+ )
338
+ research_id = cursor.lastrowid
339
+ conn.commit()
340
+ conn.close()
341
+
342
+ # Start research process in a background thread
343
+ thread = threading.Thread(
344
+ target=run_research_process,
345
+ args=(research_id, query, mode)
346
+ )
347
+ thread.daemon = True
348
+ thread.start()
349
+
350
+ active_research[research_id] = {
351
+ 'thread': thread,
352
+ 'progress': 0,
353
+ 'status': 'in_progress',
354
+ 'log': [{"time": created_at, "message": "Research started", "progress": 0}]
355
+ }
356
+
357
+ return jsonify({
358
+ 'status': 'success',
359
+ 'research_id': research_id
360
+ })
361
+
362
+ @research_bp.route('/api/research/<int:research_id>')
363
+ def get_research_status(research_id):
364
+ conn = sqlite3.connect(DB_PATH)
365
+ conn.row_factory = sqlite3.Row
366
+ cursor = conn.cursor()
367
+ cursor.execute('SELECT * FROM research_history WHERE id = ?', (research_id,))
368
+ result = dict(cursor.fetchone() or {})
369
+ conn.close()
370
+
371
+ if not result:
372
+ return jsonify({'status': 'error', 'message': 'Research not found'}), 404
373
+
374
+ # Add progress information
375
+ if research_id in active_research:
376
+ result['progress'] = active_research[research_id]['progress']
377
+ result['log'] = active_research[research_id]['log']
378
+ elif result.get('status') == 'completed':
379
+ result['progress'] = 100
380
+ try:
381
+ result['log'] = json.loads(result.get('progress_log', '[]'))
382
+ except:
383
+ result['log'] = []
384
+ else:
385
+ result['progress'] = 0
386
+ try:
387
+ result['log'] = json.loads(result.get('progress_log', '[]'))
388
+ except:
389
+ result['log'] = []
390
+
391
+ return jsonify(result)
392
+
393
+ @research_bp.route('/api/research/<int:research_id>/details')
394
+ def get_research_details(research_id):
395
+ """Get detailed progress log for a specific research"""
396
+ conn = sqlite3.connect(DB_PATH)
397
+ conn.row_factory = sqlite3.Row
398
+ cursor = conn.cursor()
399
+ cursor.execute('SELECT * FROM research_history WHERE id = ?', (research_id,))
400
+ result = dict(cursor.fetchone() or {})
401
+ conn.close()
402
+
403
+ if not result:
404
+ return jsonify({'status': 'error', 'message': 'Research not found'}), 404
405
+
406
+ try:
407
+ # Get the progress log
408
+ progress_log = json.loads(result.get('progress_log', '[]'))
409
+ except:
410
+ progress_log = []
411
+
412
+ # If this is an active research, get the latest log
413
+ if research_id in active_research:
414
+ progress_log = active_research[research_id]['log']
415
+
416
+ return jsonify({
417
+ 'status': 'success',
418
+ 'research_id': research_id,
419
+ 'query': result.get('query'),
420
+ 'mode': result.get('mode'),
421
+ 'status': result.get('status'),
422
+ 'progress': active_research.get(research_id, {}).get('progress', 100 if result.get('status') == 'completed' else 0),
423
+ 'created_at': result.get('created_at'),
424
+ 'completed_at': result.get('completed_at'),
425
+ 'log': progress_log
426
+ })
427
+
428
+ @research_bp.route('/api/report/<int:research_id>')
429
+ def get_report(research_id):
430
+ conn = sqlite3.connect(DB_PATH)
431
+ conn.row_factory = sqlite3.Row
432
+ cursor = conn.cursor()
433
+ cursor.execute('SELECT * FROM research_history WHERE id = ?', (research_id,))
434
+ result = dict(cursor.fetchone() or {})
435
+ conn.close()
436
+
437
+ if not result or not result.get('report_path'):
438
+ return jsonify({'status': 'error', 'message': 'Report not found'}), 404
439
+
440
+ try:
441
+ with open(result['report_path'], 'r', encoding='utf-8') as f:
442
+ content = f.read()
443
+ return jsonify({
444
+ 'status': 'success',
445
+ 'content': content,
446
+ 'metadata': json.loads(result.get('metadata', '{}'))
447
+ })
448
+ except Exception as e:
449
+ return jsonify({'status': 'error', 'message': str(e)}), 500
450
+
451
+ @research_bp.route('/research/details/<int:research_id>')
452
+ def research_details_page(research_id):
453
+ """Render the research details page"""
454
+ return render_template('index.html')
455
+
456
+ @socketio.on('connect')
457
+ def handle_connect():
458
+ print(f"Client connected: {request.sid}")
459
+
460
+ @socketio.on('disconnect')
461
+ def handle_disconnect():
462
+ try:
463
+ print(f"Client disconnected: {request.sid}")
464
+ # Clean up subscriptions for this client
465
+ for research_id, subscribers in list(socket_subscriptions.items()):
466
+ if request.sid in subscribers:
467
+ subscribers.remove(request.sid)
468
+ if not subscribers:
469
+ socket_subscriptions.pop(research_id, None)
470
+ except Exception as e:
471
+ print(f"Error handling disconnect: {e}")
472
+
473
+ @socketio.on('subscribe_to_research')
474
+ def handle_subscribe(data):
475
+ research_id = data.get('research_id')
476
+ if research_id:
477
+ if research_id not in socket_subscriptions:
478
+ socket_subscriptions[research_id] = set()
479
+ socket_subscriptions[research_id].add(request.sid)
480
+ print(f"Client {request.sid} subscribed to research {research_id}")
481
+
482
+ # Send current status immediately if available
483
+ if research_id in active_research:
484
+ progress = active_research[research_id]['progress']
485
+ latest_log = active_research[research_id]['log'][-1] if active_research[research_id]['log'] else None
486
+
487
+ if latest_log:
488
+ emit(f'research_progress_{research_id}', {
489
+ 'progress': progress,
490
+ 'message': latest_log.get('message', 'Processing...'),
491
+ 'status': 'in_progress',
492
+ 'log_entry': latest_log
493
+ })
494
+
495
+ @socketio.on_error
496
+ def handle_socket_error(e):
497
+ print(f"Socket.IO error: {str(e)}")
498
+ # Don't propagate exceptions to avoid crashing the server
499
+ return False
500
+
501
+ @socketio.on_error_default
502
+ def handle_default_error(e):
503
+ print(f"Unhandled Socket.IO error: {str(e)}")
504
+ # Don't propagate exceptions to avoid crashing the server
505
+ return False
506
+
507
+ def run_research_process(research_id, query, mode):
508
+ try:
509
+ system = AdvancedSearchSystem()
510
+
511
+ # Set up progress callback
512
+ def progress_callback(message, progress_percent, metadata):
513
+ timestamp = datetime.utcnow().isoformat()
514
+ log_entry = {
515
+ "time": timestamp,
516
+ "message": message,
517
+ "progress": progress_percent,
518
+ "metadata": metadata
519
+ }
520
+
521
+ # Check if termination was requested
522
+ if research_id in termination_flags and termination_flags[research_id]:
523
+ # Clean up and exit
524
+ raise Exception("Research was terminated by user")
525
+
526
+ # Update active research record
527
+ if research_id in active_research:
528
+ active_research[research_id]['log'].append(log_entry)
529
+ if progress_percent is not None:
530
+ active_research[research_id]['progress'] = progress_percent
531
+
532
+ # Save to database (but not too frequently)
533
+ if progress_percent is None or progress_percent % 10 == 0 or metadata.get('phase') in ['complete', 'iteration_complete']:
534
+ conn = sqlite3.connect(DB_PATH)
535
+ cursor = conn.cursor()
536
+ cursor.execute(
537
+ 'SELECT progress_log FROM research_history WHERE id = ?',
538
+ (research_id,)
539
+ )
540
+ result = cursor.fetchone()
541
+ if result:
542
+ try:
543
+ current_log = json.loads(result[0])
544
+ except:
545
+ current_log = []
546
+ current_log.append(log_entry)
547
+ cursor.execute(
548
+ 'UPDATE research_history SET progress_log = ? WHERE id = ?',
549
+ (json.dumps(current_log), research_id)
550
+ )
551
+ conn.commit()
552
+ conn.close()
553
+
554
+ # Emit socket event with try/except block to handle connection issues
555
+ try:
556
+ event_data = {
557
+ 'progress': progress_percent,
558
+ 'message': message,
559
+ 'status': 'in_progress',
560
+ 'log_entry': log_entry
561
+ }
562
+
563
+ # Emit to the specific research channel
564
+ socketio.emit(f'research_progress_{research_id}', event_data)
565
+
566
+ # Also emit to specific subscribers if available
567
+ if research_id in socket_subscriptions and socket_subscriptions[research_id]:
568
+ for sid in socket_subscriptions[research_id]:
569
+ try:
570
+ socketio.emit(
571
+ f'research_progress_{research_id}',
572
+ event_data,
573
+ room=sid
574
+ )
575
+ except Exception as sub_err:
576
+ print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
577
+
578
+ except Exception as socket_error:
579
+ # Log socket error but continue with the research process
580
+ print(f"Socket emit error (non-critical): {str(socket_error)}")
581
+
582
+ # Set the progress callback in the system
583
+ system.set_progress_callback(progress_callback)
584
+
585
+ # Run the search
586
+ progress_callback("Starting research process", 5, {"phase": "init"})
587
+ results = system.analyze_topic(query)
588
+ progress_callback("Search complete, generating output", 80, {"phase": "output_generation"})
589
+
590
+ # Generate output based on mode
591
+ if mode == 'quick':
592
+ # Quick Summary
593
+ if results.get('findings'):
594
+ #initial_analysis = [finding['content'] for finding in results['findings']]
595
+ summary = ""
596
+ raw_formatted_findings = results['formatted_findings']
597
+
598
+ # ADDED CODE: Convert debug output to clean markdown
599
+ clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
600
+
601
+ # Save as markdown file
602
+ output_dir = "research_outputs"
603
+ if not os.path.exists(output_dir):
604
+ os.makedirs(output_dir)
605
+
606
+ safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
607
+ safe_query = safe_query.replace(" ", "_").lower()
608
+ report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
609
+
610
+ with open(report_path, "w", encoding="utf-8") as f:
611
+ f.write("# Quick Research Summary\n\n")
612
+ f.write(f"Query: {query}\n\n")
613
+ f.write(clean_markdown) # Use clean markdown instead of raw findings
614
+ f.write("\n\n## Research Metrics\n")
615
+ f.write(f"- Search Iterations: {results['iterations']}\n")
616
+ f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
617
+
618
+ # Update database
619
+ metadata = {
620
+ 'iterations': results['iterations'],
621
+ 'generated_at': datetime.utcnow().isoformat()
622
+ }
623
+
624
+ # Calculate duration in seconds - using UTC consistently
625
+ now = datetime.utcnow()
626
+ completed_at = now.isoformat()
627
+
628
+ # Get the start time from the database
629
+ conn = sqlite3.connect(DB_PATH)
630
+ cursor = conn.cursor()
631
+ cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
632
+ result = cursor.fetchone()
633
+
634
+ # Use the helper function for consistent duration calculation
635
+ duration_seconds = calculate_duration(result[0])
636
+
637
+ # Update the record
638
+ cursor.execute(
639
+ 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
640
+ ('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
641
+ )
642
+ conn.commit()
643
+ conn.close()
644
+
645
+ progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
646
+ else:
647
+ # Full Report
648
+ progress_callback("Generating detailed report...", 85, {"phase": "report_generation"})
649
+ report_generator = IntegratedReportGenerator()
650
+ final_report = report_generator.generate_report(results, query)
651
+ progress_callback("Report generation complete", 95, {"phase": "report_complete"})
652
+
653
+ # Save as markdown file
654
+ output_dir = "research_outputs"
655
+ if not os.path.exists(output_dir):
656
+ os.makedirs(output_dir)
657
+
658
+ safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
659
+ safe_query = safe_query.replace(" ", "_").lower()
660
+ report_path = os.path.join(output_dir, f"detailed_report_{safe_query}.md")
661
+
662
+ with open(report_path, "w", encoding="utf-8") as f:
663
+ f.write(final_report['content'])
664
+
665
+ # Update database
666
+ metadata = final_report['metadata']
667
+ metadata['iterations'] = results['iterations']
668
+
669
+ # Calculate duration in seconds - using UTC consistently
670
+ now = datetime.utcnow()
671
+ completed_at = now.isoformat()
672
+
673
+ # Get the start time from the database
674
+ conn = sqlite3.connect(DB_PATH)
675
+ cursor = conn.cursor()
676
+ cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
677
+ result = cursor.fetchone()
678
+
679
+ # Use the helper function for consistent duration calculation
680
+ duration_seconds = calculate_duration(result[0])
681
+
682
+ cursor.execute(
683
+ 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
684
+ ('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
685
+ )
686
+ conn.commit()
687
+ conn.close()
688
+
689
+ progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
690
+
691
+ # Clean up
692
+ if research_id in active_research:
693
+ del active_research[research_id]
694
+
695
+ except Exception as e:
696
+ # Handle error
697
+ error_message = f"Research failed: {str(e)}"
698
+ print(f"Research error: {error_message}")
699
+ try:
700
+ progress_callback(error_message, None, {"phase": "error", "error": str(e)})
701
+
702
+ conn = sqlite3.connect(DB_PATH)
703
+ cursor = conn.cursor()
704
+
705
+ # If termination was requested, mark as suspended instead of failed
706
+ status = 'suspended' if (research_id in termination_flags and termination_flags[research_id]) else 'failed'
707
+ message = "Research was terminated by user" if status == 'suspended' else str(e)
708
+
709
+ # Calculate duration up to termination point - using UTC consistently
710
+ now = datetime.utcnow()
711
+ completed_at = now.isoformat()
712
+
713
+ # Get the start time from the database
714
+ duration_seconds = None
715
+ cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
716
+ result = cursor.fetchone()
717
+
718
+ # Use the helper function for consistent duration calculation
719
+ if result and result[0]:
720
+ duration_seconds = calculate_duration(result[0])
721
+
722
+ cursor.execute(
723
+ 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, metadata = ? WHERE id = ?',
724
+ (status, completed_at, duration_seconds, json.dumps({'error': message}), research_id)
725
+ )
726
+ conn.commit()
727
+ conn.close()
728
+
729
+ try:
730
+ socketio.emit(f'research_progress_{research_id}', {
731
+ 'status': status,
732
+ 'error': message
733
+ })
734
+
735
+ # Also notify specific subscribers
736
+ if research_id in socket_subscriptions and socket_subscriptions[research_id]:
737
+ for sid in socket_subscriptions[research_id]:
738
+ try:
739
+ socketio.emit(
740
+ f'research_progress_{research_id}',
741
+ {'status': status, 'error': message},
742
+ room=sid
743
+ )
744
+ except Exception as sub_err:
745
+ print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
746
+
747
+ except Exception as socket_error:
748
+ print(f"Failed to emit error via socket: {str(socket_error)}")
749
+ except Exception as inner_e:
750
+ print(f"Error in error handler: {str(inner_e)}")
751
+
752
+ # Clean up resources
753
+ if research_id in active_research:
754
+ del active_research[research_id]
755
+ if research_id in termination_flags:
756
+ del termination_flags[research_id]
757
+
758
+ @research_bp.route('/api/research/<int:research_id>/terminate', methods=['POST'])
759
+ def terminate_research(research_id):
760
+ """Terminate an in-progress research process"""
761
+
762
+ # Check if the research exists and is in progress
763
+ conn = sqlite3.connect(DB_PATH)
764
+ cursor = conn.cursor()
765
+ cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
766
+ result = cursor.fetchone()
767
+
768
+ if not result:
769
+ conn.close()
770
+ return jsonify({'status': 'error', 'message': 'Research not found'}), 404
771
+
772
+ status = result[0]
773
+
774
+ # If it's not in progress, return an error
775
+ if status != 'in_progress':
776
+ conn.close()
777
+ return jsonify({'status': 'error', 'message': 'Research is not in progress'}), 400
778
+
779
+ # Check if it's in the active_research dict
780
+ if research_id not in active_research:
781
+ # Update the status in the database
782
+ cursor.execute('UPDATE research_history SET status = ? WHERE id = ?', ('suspended', research_id))
783
+ conn.commit()
784
+ conn.close()
785
+ return jsonify({'status': 'success', 'message': 'Research terminated'})
786
+
787
+ # Set the termination flag
788
+ termination_flags[research_id] = True
789
+
790
+ # Log the termination request - using UTC timestamp
791
+ timestamp = datetime.utcnow().isoformat()
792
+ log_entry = {
793
+ "time": timestamp,
794
+ "message": "Research termination requested by user",
795
+ "progress": active_research[research_id]['progress'],
796
+ "metadata": {"phase": "termination"}
797
+ }
798
+
799
+ active_research[research_id]['log'].append(log_entry)
800
+
801
+ # Update the log in the database
802
+ cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
803
+ log_result = cursor.fetchone()
804
+ if log_result:
805
+ try:
806
+ current_log = json.loads(log_result[0])
807
+ except:
808
+ current_log = []
809
+ current_log.append(log_entry)
810
+ cursor.execute(
811
+ 'UPDATE research_history SET progress_log = ? WHERE id = ?',
812
+ (json.dumps(current_log), research_id)
813
+ )
814
+
815
+ conn.commit()
816
+ conn.close()
817
+
818
+ # Emit a socket event for the termination request
819
+ try:
820
+ event_data = {
821
+ 'status': 'terminating',
822
+ 'message': 'Research termination requested by user'
823
+ }
824
+
825
+ socketio.emit(f'research_progress_{research_id}', event_data)
826
+
827
+ if research_id in socket_subscriptions and socket_subscriptions[research_id]:
828
+ for sid in socket_subscriptions[research_id]:
829
+ try:
830
+ socketio.emit(
831
+ f'research_progress_{research_id}',
832
+ event_data,
833
+ room=sid
834
+ )
835
+ except Exception as err:
836
+ print(f"Error emitting to subscriber {sid}: {str(err)}")
837
+
838
+ except Exception as socket_error:
839
+ print(f"Socket emit error (non-critical): {str(socket_error)}")
840
+
841
+ return jsonify({'status': 'success', 'message': 'Research termination requested'})
842
+
843
+ @research_bp.route('/api/research/<int:research_id>/delete', methods=['DELETE'])
844
+ def delete_research(research_id):
845
+ """Delete a research record"""
846
+ conn = sqlite3.connect(DB_PATH)
847
+ cursor = conn.cursor()
848
+
849
+ # First check if the research exists and is not in progress
850
+ cursor.execute('SELECT status, report_path FROM research_history WHERE id = ?', (research_id,))
851
+ result = cursor.fetchone()
852
+
853
+ if not result:
854
+ conn.close()
855
+ return jsonify({'status': 'error', 'message': 'Research not found'}), 404
856
+
857
+ status, report_path = result
858
+
859
+ # Don't allow deleting research in progress
860
+ if status == 'in_progress' and research_id in active_research:
861
+ conn.close()
862
+ return jsonify({
863
+ 'status': 'error',
864
+ 'message': 'Cannot delete research that is in progress'
865
+ }), 400
866
+
867
+ # Delete report file if it exists
868
+ if report_path and os.path.exists(report_path):
869
+ try:
870
+ os.remove(report_path)
871
+ except Exception as e:
872
+ print(f"Error removing report file: {str(e)}")
873
+
874
+ # Delete the database record
875
+ cursor.execute('DELETE FROM research_history WHERE id = ?', (research_id,))
876
+ conn.commit()
877
+ conn.close()
878
+
879
+ return jsonify({'status': 'success'})
880
+
881
+ # Main settings page that links to specialized config pages
882
+ @research_bp.route('/settings', methods=['GET'])
883
+ def settings_page():
884
+ """Main settings dashboard with links to specialized config pages"""
885
+ return render_template('settings_dashboard.html')
886
+
887
+ @research_bp.route('/settings/main', methods=['GET'])
888
+ def main_config_page():
889
+ """Edit main configuration with search parameters"""
890
+ return render_template('main_config.html', main_file_path=MAIN_CONFIG_FILE)
891
+
892
+ @research_bp.route('/settings/llm', methods=['GET'])
893
+ def llm_config_page():
894
+ """Edit LLM configuration using raw file editor"""
895
+ return render_template('llm_config.html', llm_file_path=LLM_CONFIG_FILE)
896
+
897
+ @research_bp.route('/settings/collections', methods=['GET'])
898
+ def collections_config_page():
899
+ """Edit local collections configuration using raw file editor"""
900
+ return render_template('collections_config.html', collections_file_path=LOCAL_COLLECTIONS_FILE)
901
+
902
+ @research_bp.route('/settings/api_keys', methods=['GET'])
903
+ def api_keys_config_page():
904
+ """Edit API keys configuration"""
905
+ # Get the secrets file path
906
+ secrets_file = CONFIG_DIR / ".secrets.toml"
907
+
908
+ return render_template('api_keys_config.html', secrets_file_path=secrets_file)
909
+ # Add to the imports section
910
+ from local_deep_research.config import SEARCH_ENGINES_FILE
911
+
912
+ # Add a new route for search engines configuration page
913
+ @research_bp.route('/settings/search_engines', methods=['GET'])
914
+ def search_engines_config_page():
915
+ """Edit search engines configuration using raw file editor"""
916
+ # Read the current config file
917
+ raw_config = ""
918
+ try:
919
+ with open(SEARCH_ENGINES_FILE, 'r') as f:
920
+ raw_config = f.read()
921
+ except Exception as e:
922
+ flash(f'Error reading search engines configuration: {str(e)}', 'error')
923
+ raw_config = "# Error reading configuration file"
924
+
925
+ # Get list of engine names for display
926
+ engine_names = []
927
+ try:
928
+ from local_deep_research.web_search_engines.search_engines_config import SEARCH_ENGINES
929
+ engine_names = list(SEARCH_ENGINES.keys())
930
+ engine_names.sort() # Alphabetical order
931
+ except Exception as e:
932
+ logger.error(f"Error getting engine names: {e}")
933
+
934
+ return render_template('search_engines_config.html',
935
+ search_engines_file_path=SEARCH_ENGINES_FILE,
936
+ raw_config=raw_config,
937
+ engine_names=engine_names)
938
+
939
+ # Add a route to save search engines configuration
940
+ @research_bp.route('/api/save_search_engines_config', methods=['POST'])
941
+ def save_search_engines_config():
942
+ try:
943
+ data = request.get_json()
944
+ raw_config = data.get('raw_config', '')
945
+
946
+ # Validate TOML syntax
947
+ try:
948
+ toml.loads(raw_config)
949
+ except toml.TomlDecodeError as e:
950
+ return jsonify({'success': False, 'error': f'TOML syntax error: {str(e)}'})
951
+
952
+ # Ensure directory exists
953
+ os.makedirs(os.path.dirname(SEARCH_ENGINES_FILE), exist_ok=True)
954
+
955
+ # Create a backup first
956
+ backup_path = f"{SEARCH_ENGINES_FILE}.bak"
957
+ if os.path.exists(SEARCH_ENGINES_FILE):
958
+ import shutil
959
+ shutil.copy2(SEARCH_ENGINES_FILE, backup_path)
960
+
961
+ # Write new config
962
+ with open(SEARCH_ENGINES_FILE, 'w') as f:
963
+ f.write(raw_config)
964
+
965
+ return jsonify({'success': True})
966
+ except Exception as e:
967
+ return jsonify({'success': False, 'error': str(e)})
968
+
969
+
970
+ # API endpoint to save raw LLM config
971
+ @research_bp.route('/api/save_llm_config', methods=['POST'])
972
+ def save_llm_config():
973
+ try:
974
+ data = request.get_json()
975
+ raw_config = data.get('raw_config', '')
976
+
977
+ # Validate Python syntax
978
+ try:
979
+ compile(raw_config, '<string>', 'exec')
980
+ except SyntaxError as e:
981
+ return jsonify({'success': False, 'error': f'Syntax error: {str(e)}'})
982
+
983
+ # Ensure directory exists
984
+ os.makedirs(os.path.dirname(LLM_CONFIG_FILE), exist_ok=True)
985
+
986
+ # Create a backup first
987
+ backup_path = f"{LLM_CONFIG_FILE}.bak"
988
+ if os.path.exists(LLM_CONFIG_FILE):
989
+ import shutil
990
+ shutil.copy2(LLM_CONFIG_FILE, backup_path)
991
+
992
+ # Write new config
993
+ with open(LLM_CONFIG_FILE, 'w') as f:
994
+ f.write(raw_config)
995
+
996
+ return jsonify({'success': True})
997
+ except Exception as e:
998
+ return jsonify({'success': False, 'error': str(e)})
999
+
1000
+ # API endpoint to save raw collections config
1001
+ @research_bp.route('/api/save_collections_config', methods=['POST'])
1002
+ def save_collections_config():
1003
+ try:
1004
+ data = request.get_json()
1005
+ raw_config = data.get('raw_config', '')
1006
+
1007
+ # Validate TOML syntax
1008
+ try:
1009
+ toml.loads(raw_config)
1010
+ except toml.TomlDecodeError as e:
1011
+ return jsonify({'success': False, 'error': f'TOML syntax error: {str(e)}'})
1012
+
1013
+ # Ensure directory exists
1014
+ os.makedirs(os.path.dirname(LOCAL_COLLECTIONS_FILE), exist_ok=True)
1015
+
1016
+ # Create a backup first
1017
+ backup_path = f"{LOCAL_COLLECTIONS_FILE}.bak"
1018
+ if os.path.exists(LOCAL_COLLECTIONS_FILE):
1019
+ import shutil
1020
+ shutil.copy2(LOCAL_COLLECTIONS_FILE, backup_path)
1021
+
1022
+ # Write new config
1023
+ with open(LOCAL_COLLECTIONS_FILE, 'w') as f:
1024
+ f.write(raw_config)
1025
+
1026
+ # Also trigger a reload in the collections system
1027
+ try:
1028
+ load_local_collections(reload=True)
1029
+ except Exception as reload_error:
1030
+ return jsonify({'success': True, 'warning': f'Config saved, but error reloading: {str(reload_error)}'})
1031
+
1032
+ return jsonify({'success': True})
1033
+ except Exception as e:
1034
+ return jsonify({'success': False, 'error': str(e)})
1035
+
1036
+ # API endpoint to save raw main config
1037
+ @research_bp.route('/api/save_main_config', methods=['POST'])
1038
+ def save_raw_main_config():
1039
+ try:
1040
+ data = request.get_json()
1041
+ raw_config = data.get('raw_config', '')
1042
+
1043
+ # Validate TOML syntax
1044
+ try:
1045
+ toml.loads(raw_config)
1046
+ except toml.TomlDecodeError as e:
1047
+ return jsonify({'success': False, 'error': f'TOML syntax error: {str(e)}'})
1048
+
1049
+ # Ensure directory exists
1050
+ os.makedirs(os.path.dirname(MAIN_CONFIG_FILE), exist_ok=True)
1051
+
1052
+ # Create a backup first
1053
+ backup_path = f"{MAIN_CONFIG_FILE}.bak"
1054
+ if os.path.exists(MAIN_CONFIG_FILE):
1055
+ import shutil
1056
+ shutil.copy2(MAIN_CONFIG_FILE, backup_path)
1057
+
1058
+ # Write new config
1059
+ with open(MAIN_CONFIG_FILE, 'w') as f:
1060
+ f.write(raw_config)
1061
+
1062
+ return jsonify({'success': True})
1063
+ except Exception as e:
1064
+ return jsonify({'success': False, 'error': str(e)})
1065
+ @research_bp.route('/raw_config')
1066
+ def get_raw_config():
1067
+ """Return the raw configuration file content"""
1068
+ try:
1069
+ # Determine which config file to load based on a query parameter
1070
+ config_type = request.args.get('type', 'main')
1071
+
1072
+ if config_type == 'main':
1073
+ config_path = os.path.join(app.config['CONFIG_DIR'], 'config.toml')
1074
+ with open(config_path, 'r') as f:
1075
+ return f.read()
1076
+ elif config_type == 'llm':
1077
+ config_path = os.path.join(app.config['CONFIG_DIR'], 'llm_config.py')
1078
+ with open(config_path, 'r') as f:
1079
+ return f.read()
1080
+ elif config_type == 'collections':
1081
+ config_path = os.path.join(app.config['CONFIG_DIR'], 'collections.toml')
1082
+ with open(config_path, 'r') as f:
1083
+ return f.read()
1084
+ else:
1085
+ return "Unknown configuration type", 400
1086
+ except Exception as e:
1087
+ return str(e), 500
1088
+ import os
1089
+ import subprocess
1090
+ import platform
1091
+
1092
+ @research_bp.route('/open_file_location', methods=['POST'])
1093
+ def open_file_location():
1094
+ file_path = request.form.get('file_path')
1095
+
1096
+ if not file_path:
1097
+ flash('No file path provided', 'error')
1098
+ return redirect(url_for('research.settings_page'))
1099
+
1100
+ # Get the directory containing the file
1101
+ dir_path = os.path.dirname(os.path.abspath(file_path))
1102
+
1103
+ # Open the directory in the file explorer
1104
+ try:
1105
+ if platform.system() == "Windows":
1106
+ subprocess.Popen(f'explorer "{dir_path}"')
1107
+ elif platform.system() == "Darwin": # macOS
1108
+ subprocess.Popen(["open", dir_path])
1109
+ else: # Linux
1110
+ subprocess.Popen(["xdg-open", dir_path])
1111
+
1112
+ flash(f'Opening folder: {dir_path}', 'success')
1113
+ except Exception as e:
1114
+ flash(f'Error opening folder: {str(e)}', 'error')
1115
+
1116
+ # Redirect back to the settings page
1117
+ if 'llm' in file_path:
1118
+ return redirect(url_for('research.llm_config_page'))
1119
+ elif 'collections' in file_path:
1120
+ return redirect(url_for('research.collections_config_page'))
1121
+ else:
1122
+ return redirect(url_for('research.main_config_page'))
1123
+ # Register the blueprint
1124
+ app.register_blueprint(research_bp)
1125
+
1126
+ # Also add the static route at the app level for compatibility
1127
+ @app.route('/static/<path:path>')
1128
+ def app_serve_static(path):
1129
+ return send_from_directory(app.static_folder, path)
1130
+
1131
+ # Add favicon route to prevent 404 errors
1132
+ @app.route('/favicon.ico')
1133
+ def favicon():
1134
+ return send_from_directory(app.static_folder, 'favicon.ico', mimetype='image/x-icon')
1135
+
1136
+
1137
+ # Add this function to app.py
1138
+ def convert_debug_to_markdown(raw_text, query):
1139
+ """
1140
+ Convert the debug-formatted text to clean markdown.
1141
+
1142
+ Args:
1143
+ raw_text: The raw formatted findings with debug symbols
1144
+ query: Original research query
1145
+
1146
+ Returns:
1147
+ Clean markdown formatted text
1148
+ """
1149
+ # If there's a "DETAILED FINDINGS:" section, extract everything after it
1150
+ if "DETAILED FINDINGS:" in raw_text:
1151
+ detailed_index = raw_text.index("DETAILED FINDINGS:")
1152
+ content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
1153
+ else:
1154
+ content = raw_text
1155
+
1156
+ # Remove divider lines with === symbols
1157
+ content = "\n".join([line for line in content.split("\n")
1158
+ if not line.strip().startswith("===") and not line.strip() == "="*80])
1159
+
1160
+ # If COMPLETE RESEARCH OUTPUT exists, remove that section
1161
+ if "COMPLETE RESEARCH OUTPUT" in content:
1162
+ content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
1163
+
1164
+ # Remove SEARCH QUESTIONS BY ITERATION section
1165
+ if "SEARCH QUESTIONS BY ITERATION:" in content:
1166
+ search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
1167
+ next_major_section = -1
1168
+ for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
1169
+ if marker in content[search_index:]:
1170
+ marker_pos = content.index(marker, search_index)
1171
+ if next_major_section == -1 or marker_pos < next_major_section:
1172
+ next_major_section = marker_pos
1173
+
1174
+ if next_major_section != -1:
1175
+ content = content[:search_index] + content[next_major_section:]
1176
+ else:
1177
+ # If no later section, just remove everything from SEARCH QUESTIONS onwards
1178
+ content = content[:search_index].strip()
1179
+
1180
+ return content.strip()
1181
+ def main():
1182
+ """
1183
+ Entry point for the web application when run as a command.
1184
+ This function is needed for the package's entry point to work properly.
1185
+ """
1186
+
1187
+ # Check for OpenAI availability but don't import it unless necessary
1188
+ try:
1189
+ import os
1190
+ api_key = os.environ.get("OPENAI_API_KEY")
1191
+ if api_key:
1192
+ try:
1193
+ # Only try to import if we have an API key
1194
+ import openai
1195
+ openai.api_key = api_key
1196
+ OPENAI_AVAILABLE = True
1197
+ print("OpenAI integration is available")
1198
+ except ImportError:
1199
+ print("OpenAI package not installed, integration disabled")
1200
+ else:
1201
+ print("OPENAI_API_KEY not found in environment variables, OpenAI integration disabled")
1202
+ except Exception as e:
1203
+ print(f"Error checking OpenAI availability: {e}")
1204
+
1205
+
1206
+ socketio.run(app, debug=True, host='0.0.0.0', port=5000, allow_unsafe_werkzeug=True)
1207
+
1208
+ if __name__ == '__main__':
1209
+ main()