local-deep-research 0.1.26__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. local_deep_research/__init__.py +23 -22
  2. local_deep_research/__main__.py +16 -0
  3. local_deep_research/advanced_search_system/__init__.py +7 -0
  4. local_deep_research/advanced_search_system/filters/__init__.py +8 -0
  5. local_deep_research/advanced_search_system/filters/base_filter.py +38 -0
  6. local_deep_research/advanced_search_system/filters/cross_engine_filter.py +200 -0
  7. local_deep_research/advanced_search_system/findings/base_findings.py +81 -0
  8. local_deep_research/advanced_search_system/findings/repository.py +452 -0
  9. local_deep_research/advanced_search_system/knowledge/__init__.py +1 -0
  10. local_deep_research/advanced_search_system/knowledge/base_knowledge.py +151 -0
  11. local_deep_research/advanced_search_system/knowledge/standard_knowledge.py +159 -0
  12. local_deep_research/advanced_search_system/questions/__init__.py +1 -0
  13. local_deep_research/advanced_search_system/questions/base_question.py +64 -0
  14. local_deep_research/advanced_search_system/questions/decomposition_question.py +445 -0
  15. local_deep_research/advanced_search_system/questions/standard_question.py +119 -0
  16. local_deep_research/advanced_search_system/repositories/__init__.py +7 -0
  17. local_deep_research/advanced_search_system/strategies/__init__.py +1 -0
  18. local_deep_research/advanced_search_system/strategies/base_strategy.py +118 -0
  19. local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +450 -0
  20. local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +312 -0
  21. local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +270 -0
  22. local_deep_research/advanced_search_system/strategies/standard_strategy.py +300 -0
  23. local_deep_research/advanced_search_system/tools/__init__.py +1 -0
  24. local_deep_research/advanced_search_system/tools/base_tool.py +100 -0
  25. local_deep_research/advanced_search_system/tools/knowledge_tools/__init__.py +1 -0
  26. local_deep_research/advanced_search_system/tools/question_tools/__init__.py +1 -0
  27. local_deep_research/advanced_search_system/tools/search_tools/__init__.py +1 -0
  28. local_deep_research/api/__init__.py +5 -5
  29. local_deep_research/api/research_functions.py +154 -160
  30. local_deep_research/app.py +8 -0
  31. local_deep_research/citation_handler.py +25 -16
  32. local_deep_research/{config.py → config/config_files.py} +102 -110
  33. local_deep_research/config/llm_config.py +472 -0
  34. local_deep_research/config/search_config.py +77 -0
  35. local_deep_research/defaults/__init__.py +10 -5
  36. local_deep_research/defaults/main.toml +2 -2
  37. local_deep_research/defaults/search_engines.toml +60 -34
  38. local_deep_research/main.py +121 -19
  39. local_deep_research/migrate_db.py +147 -0
  40. local_deep_research/report_generator.py +87 -45
  41. local_deep_research/search_system.py +153 -283
  42. local_deep_research/setup_data_dir.py +35 -0
  43. local_deep_research/test_migration.py +178 -0
  44. local_deep_research/utilities/__init__.py +0 -0
  45. local_deep_research/utilities/db_utils.py +49 -0
  46. local_deep_research/{utilties → utilities}/enums.py +2 -2
  47. local_deep_research/{utilties → utilities}/llm_utils.py +63 -29
  48. local_deep_research/utilities/search_utilities.py +242 -0
  49. local_deep_research/{utilties → utilities}/setup_utils.py +4 -2
  50. local_deep_research/web/__init__.py +0 -1
  51. local_deep_research/web/app.py +86 -1709
  52. local_deep_research/web/app_factory.py +289 -0
  53. local_deep_research/web/database/README.md +70 -0
  54. local_deep_research/web/database/migrate_to_ldr_db.py +289 -0
  55. local_deep_research/web/database/migrations.py +447 -0
  56. local_deep_research/web/database/models.py +117 -0
  57. local_deep_research/web/database/schema_upgrade.py +107 -0
  58. local_deep_research/web/models/database.py +294 -0
  59. local_deep_research/web/models/settings.py +94 -0
  60. local_deep_research/web/routes/api_routes.py +559 -0
  61. local_deep_research/web/routes/history_routes.py +354 -0
  62. local_deep_research/web/routes/research_routes.py +715 -0
  63. local_deep_research/web/routes/settings_routes.py +1583 -0
  64. local_deep_research/web/services/research_service.py +947 -0
  65. local_deep_research/web/services/resource_service.py +149 -0
  66. local_deep_research/web/services/settings_manager.py +669 -0
  67. local_deep_research/web/services/settings_service.py +187 -0
  68. local_deep_research/web/services/socket_service.py +210 -0
  69. local_deep_research/web/static/css/custom_dropdown.css +277 -0
  70. local_deep_research/web/static/css/settings.css +1223 -0
  71. local_deep_research/web/static/css/styles.css +525 -48
  72. local_deep_research/web/static/js/components/custom_dropdown.js +428 -0
  73. local_deep_research/web/static/js/components/detail.js +348 -0
  74. local_deep_research/web/static/js/components/fallback/formatting.js +122 -0
  75. local_deep_research/web/static/js/components/fallback/ui.js +215 -0
  76. local_deep_research/web/static/js/components/history.js +487 -0
  77. local_deep_research/web/static/js/components/logpanel.js +949 -0
  78. local_deep_research/web/static/js/components/progress.js +1107 -0
  79. local_deep_research/web/static/js/components/research.js +1865 -0
  80. local_deep_research/web/static/js/components/results.js +766 -0
  81. local_deep_research/web/static/js/components/settings.js +3981 -0
  82. local_deep_research/web/static/js/components/settings_sync.js +106 -0
  83. local_deep_research/web/static/js/main.js +226 -0
  84. local_deep_research/web/static/js/services/api.js +253 -0
  85. local_deep_research/web/static/js/services/audio.js +31 -0
  86. local_deep_research/web/static/js/services/formatting.js +119 -0
  87. local_deep_research/web/static/js/services/pdf.js +622 -0
  88. local_deep_research/web/static/js/services/socket.js +882 -0
  89. local_deep_research/web/static/js/services/ui.js +546 -0
  90. local_deep_research/web/templates/base.html +72 -0
  91. local_deep_research/web/templates/components/custom_dropdown.html +47 -0
  92. local_deep_research/web/templates/components/log_panel.html +32 -0
  93. local_deep_research/web/templates/components/mobile_nav.html +22 -0
  94. local_deep_research/web/templates/components/settings_form.html +299 -0
  95. local_deep_research/web/templates/components/sidebar.html +21 -0
  96. local_deep_research/web/templates/pages/details.html +73 -0
  97. local_deep_research/web/templates/pages/history.html +51 -0
  98. local_deep_research/web/templates/pages/progress.html +57 -0
  99. local_deep_research/web/templates/pages/research.html +139 -0
  100. local_deep_research/web/templates/pages/results.html +59 -0
  101. local_deep_research/web/templates/settings_dashboard.html +78 -192
  102. local_deep_research/web/utils/__init__.py +0 -0
  103. local_deep_research/web/utils/formatters.py +76 -0
  104. local_deep_research/web_search_engines/engines/full_search.py +18 -16
  105. local_deep_research/web_search_engines/engines/meta_search_engine.py +182 -131
  106. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +224 -139
  107. local_deep_research/web_search_engines/engines/search_engine_brave.py +88 -71
  108. local_deep_research/web_search_engines/engines/search_engine_ddg.py +48 -39
  109. local_deep_research/web_search_engines/engines/search_engine_github.py +415 -204
  110. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +123 -90
  111. local_deep_research/web_search_engines/engines/search_engine_guardian.py +210 -157
  112. local_deep_research/web_search_engines/engines/search_engine_local.py +532 -369
  113. local_deep_research/web_search_engines/engines/search_engine_local_all.py +42 -36
  114. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +358 -266
  115. local_deep_research/web_search_engines/engines/search_engine_searxng.py +212 -160
  116. local_deep_research/web_search_engines/engines/search_engine_semantic_scholar.py +213 -170
  117. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +84 -68
  118. local_deep_research/web_search_engines/engines/search_engine_wayback.py +186 -154
  119. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +115 -77
  120. local_deep_research/web_search_engines/search_engine_base.py +174 -99
  121. local_deep_research/web_search_engines/search_engine_factory.py +192 -102
  122. local_deep_research/web_search_engines/search_engines_config.py +22 -15
  123. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/METADATA +177 -97
  124. local_deep_research-0.2.2.dist-info/RECORD +135 -0
  125. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/WHEEL +1 -2
  126. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/entry_points.txt +3 -0
  127. local_deep_research/defaults/llm_config.py +0 -338
  128. local_deep_research/utilties/search_utilities.py +0 -114
  129. local_deep_research/web/static/js/app.js +0 -3763
  130. local_deep_research/web/templates/api_keys_config.html +0 -82
  131. local_deep_research/web/templates/collections_config.html +0 -90
  132. local_deep_research/web/templates/index.html +0 -348
  133. local_deep_research/web/templates/llm_config.html +0 -120
  134. local_deep_research/web/templates/main_config.html +0 -89
  135. local_deep_research/web/templates/search_engines_config.html +0 -154
  136. local_deep_research/web/templates/settings.html +0 -519
  137. local_deep_research-0.1.26.dist-info/RECORD +0 -61
  138. local_deep_research-0.1.26.dist-info/top_level.txt +0 -1
  139. /local_deep_research/{utilties → config}/__init__.py +0 -0
  140. {local_deep_research-0.1.26.dist-info → local_deep_research-0.2.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,1746 +1,123 @@
1
- import os
2
- import json
3
- import time
4
- import sqlite3
5
- import threading
6
- from datetime import datetime
7
- from flask import Flask, render_template, request, jsonify, send_from_directory, Response, make_response, current_app, Blueprint, redirect, url_for, flash
8
- from flask_socketio import SocketIO, emit
9
- from local_deep_research.search_system import AdvancedSearchSystem
10
- from local_deep_research.report_generator import IntegratedReportGenerator
11
- # Move this import up to ensure it's available globally
12
- from dateutil import parser
13
- import traceback
14
- import pkg_resources
15
- # Import the new configuration manager
16
- from local_deep_research.config import get_config_dir
17
1
  import logging
18
- logger = logging.getLogger(__name__)
19
-
20
- CONFIG_DIR = get_config_dir() / "config"
21
- MAIN_CONFIG_FILE = CONFIG_DIR / "settings.toml"
22
- LLM_CONFIG_FILE = CONFIG_DIR / "llm_config.py"
23
- LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
24
- import toml
25
-
26
- # Set flag for tracking OpenAI availability - we'll check it only when needed
27
- OPENAI_AVAILABLE = False
28
-
29
- # Initialize Flask app
30
- try:
31
- import os
32
- import logging
33
- from local_deep_research.utilties.setup_utils import setup_user_directories
34
-
35
- # Configure logging
36
- logging.basicConfig(level=logging.INFO)
37
- logger = logging.getLogger(__name__)
38
-
39
- # Explicitly run setup
40
- logger.info("Initializing configuration...")
41
- setup_user_directories()
42
-
43
- # Get directories based on package installation
44
- PACKAGE_DIR = pkg_resources.resource_filename('local_deep_research', 'web')
45
- STATIC_DIR = os.path.join(PACKAGE_DIR, 'static')
46
- TEMPLATE_DIR = os.path.join(PACKAGE_DIR, 'templates')
47
-
48
- # Setup logging
49
- logging.basicConfig(level=logging.INFO)
50
-
51
- # Create directories and default configs if needed
52
- setup_user_directories()
53
-
54
- # Initialize Flask app with package directories
55
- app = Flask(__name__,
56
- static_folder=STATIC_DIR,
57
- template_folder=TEMPLATE_DIR)
58
- print(f"Using package static path: {STATIC_DIR}")
59
- print(f"Using package template path: {TEMPLATE_DIR}")
60
- except Exception as e:
61
- # Fallback for development
62
- print(f"Package directories not found, using fallback paths: {str(e)}")
63
- app = Flask(__name__,
64
- static_folder=os.path.abspath('static'),
65
- template_folder=os.path.abspath('templates'))
66
- app.config['SECRET_KEY'] = 'deep-research-secret-key'
67
-
68
- # Create a Blueprint for the research application
69
- research_bp = Blueprint('research', __name__, url_prefix='/research')
70
-
71
- # Add improved Socket.IO configuration with better error handling
72
- socketio = SocketIO(
73
- app,
74
- cors_allowed_origins="*",
75
- async_mode='threading',
76
- path='/research/socket.io',
77
- logger=True,
78
- engineio_logger=True,
79
- ping_timeout=20,
80
- ping_interval=5
2
+ import os
3
+ import sys
4
+
5
+ from ..config.config_files import settings
6
+ from ..setup_data_dir import setup_data_dir
7
+ from ..utilities.db_utils import get_db_setting
8
+ from .app_factory import create_app
9
+ from .models.database import (
10
+ DB_PATH,
11
+ LEGACY_DEEP_RESEARCH_DB,
12
+ LEGACY_RESEARCH_HISTORY_DB,
81
13
  )
82
14
 
83
- # Active research processes and socket subscriptions
84
- active_research = {}
85
- socket_subscriptions = {}
86
-
87
- # Add termination flags dictionary
88
- termination_flags = {}
89
-
90
- # Database setup
91
- DB_PATH = 'research_history.db'
92
-
93
- # Output directory for research results
94
- OUTPUT_DIR = 'research_outputs'
95
-
96
- # Add Content Security Policy headers to allow Socket.IO to function
97
- @app.after_request
98
- def add_security_headers(response):
99
- # Define a permissive CSP for development that allows Socket.IO to function
100
- csp = (
101
- "default-src 'self'; "
102
- "connect-src 'self' ws: wss: http: https:; "
103
- "script-src 'self' 'unsafe-inline' 'unsafe-eval' cdnjs.cloudflare.com cdn.jsdelivr.net unpkg.com; "
104
- "style-src 'self' 'unsafe-inline' cdnjs.cloudflare.com; "
105
- "font-src 'self' cdnjs.cloudflare.com; "
106
- "img-src 'self' data:; "
107
- "worker-src blob:; "
108
- "frame-src 'self';"
109
- )
110
-
111
- response.headers['Content-Security-Policy'] = csp
112
- response.headers['X-Content-Security-Policy'] = csp
113
-
114
- # Add CORS headers for API requests
115
- if request.path.startswith('/api/'):
116
- response.headers['Access-Control-Allow-Origin'] = '*'
117
- response.headers['Access-Control-Allow-Methods'] = 'GET, POST, DELETE, OPTIONS'
118
- response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
119
-
120
- return response
121
-
122
- # Add a middleware layer to handle abrupt disconnections
123
- @app.before_request
124
- def handle_websocket_requests():
125
- if request.path.startswith('/research/socket.io'):
126
- try:
127
- if not request.environ.get('werkzeug.socket'):
128
- return
129
- except Exception as e:
130
- print(f"WebSocket preprocessing error: {e}")
131
- # Return empty response to prevent further processing
132
- return '', 200
133
-
134
- def init_db():
135
- conn = sqlite3.connect(DB_PATH)
136
- cursor = conn.cursor()
137
-
138
- # Create the table if it doesn't exist
139
- cursor.execute('''
140
- CREATE TABLE IF NOT EXISTS research_history (
141
- id INTEGER PRIMARY KEY AUTOINCREMENT,
142
- query TEXT NOT NULL,
143
- mode TEXT NOT NULL,
144
- status TEXT NOT NULL,
145
- created_at TEXT NOT NULL,
146
- completed_at TEXT,
147
- duration_seconds INTEGER,
148
- report_path TEXT,
149
- metadata TEXT,
150
- progress_log TEXT,
151
- progress INTEGER
152
- )
153
- ''')
154
-
155
- # Create a dedicated table for research logs
156
- cursor.execute('''
157
- CREATE TABLE IF NOT EXISTS research_logs (
158
- id INTEGER PRIMARY KEY AUTOINCREMENT,
159
- research_id INTEGER NOT NULL,
160
- timestamp TEXT NOT NULL,
161
- message TEXT NOT NULL,
162
- log_type TEXT NOT NULL,
163
- progress INTEGER,
164
- metadata TEXT,
165
- FOREIGN KEY (research_id) REFERENCES research_history (id) ON DELETE CASCADE
166
- )
167
- ''')
168
-
169
- # Check if the duration_seconds column exists, add it if missing
170
- cursor.execute('PRAGMA table_info(research_history)')
171
- columns = [column[1] for column in cursor.fetchall()]
172
-
173
- if 'duration_seconds' not in columns:
174
- print("Adding missing 'duration_seconds' column to research_history table")
175
- cursor.execute('ALTER TABLE research_history ADD COLUMN duration_seconds INTEGER')
176
-
177
- # Check if the progress column exists, add it if missing
178
- if 'progress' not in columns:
179
- print("Adding missing 'progress' column to research_history table")
180
- cursor.execute('ALTER TABLE research_history ADD COLUMN progress INTEGER')
181
-
182
- # Enable foreign key support
183
- cursor.execute('PRAGMA foreign_keys = ON')
184
-
185
- conn.commit()
186
- conn.close()
187
-
188
- # Helper function to calculate duration between created_at and completed_at timestamps
189
- def calculate_duration(created_at_str):
190
- """
191
- Calculate duration in seconds between created_at timestamp and now.
192
- Handles various timestamp formats and returns None if calculation fails.
193
- """
194
- if not created_at_str:
195
- return None
196
-
197
- now = datetime.utcnow()
198
- duration_seconds = None
199
-
200
- try:
201
- # Proper parsing of ISO format
202
- if 'T' in created_at_str: # ISO format with T separator
203
- start_time = datetime.fromisoformat(created_at_str)
204
- else: # Older format without T
205
- # Try different formats
206
- try:
207
- start_time = datetime.strptime(created_at_str, '%Y-%m-%d %H:%M:%S.%f')
208
- except ValueError:
209
- try:
210
- start_time = datetime.strptime(created_at_str, '%Y-%m-%d %H:%M:%S')
211
- except ValueError:
212
- # Last resort fallback
213
- start_time = datetime.fromisoformat(created_at_str.replace(' ', 'T'))
214
-
215
- # Ensure we're comparing UTC times
216
- duration_seconds = int((now - start_time).total_seconds())
217
- except Exception as e:
218
- print(f"Error calculating duration: {str(e)}")
219
- # Fallback method if parsing fails
220
- try:
221
- start_time_fallback = parser.parse(created_at_str)
222
- duration_seconds = int((now - start_time_fallback).total_seconds())
223
- except:
224
- print(f"Fallback duration calculation also failed for timestamp: {created_at_str}")
225
-
226
- return duration_seconds
227
-
228
- # Add these helper functions after the calculate_duration function
229
-
230
-
231
- def add_log_to_db(research_id, message, log_type='info', progress=None, metadata=None):
232
- """
233
- Store a log entry in the database
234
-
235
- Args:
236
- research_id: ID of the research
237
- message: Log message text
238
- log_type: Type of log (info, error, milestone)
239
- progress: Progress percentage (0-100)
240
- metadata: Additional metadata as dictionary (will be stored as JSON)
241
- """
242
- try:
243
- timestamp = datetime.utcnow().isoformat()
244
- metadata_json = json.dumps(metadata) if metadata else None
245
-
246
- conn = sqlite3.connect(DB_PATH)
247
- cursor = conn.cursor()
248
- cursor.execute(
249
- 'INSERT INTO research_logs (research_id, timestamp, message, log_type, progress, metadata) '
250
- 'VALUES (?, ?, ?, ?, ?, ?)',
251
- (research_id, timestamp, message, log_type, progress, metadata_json)
252
- )
253
- conn.commit()
254
- conn.close()
255
- return True
256
- except Exception as e:
257
- print(f"Error adding log to database: {str(e)}")
258
- print(traceback.format_exc())
259
- return False
260
-
261
- def get_logs_for_research(research_id):
262
- """
263
- Retrieve all logs for a specific research ID
264
-
265
- Args:
266
- research_id: ID of the research
267
-
268
- Returns:
269
- List of log entries as dictionaries
270
- """
271
- try:
272
- conn = sqlite3.connect(DB_PATH)
273
- conn.row_factory = sqlite3.Row
274
- cursor = conn.cursor()
275
- cursor.execute(
276
- 'SELECT * FROM research_logs WHERE research_id = ? ORDER BY timestamp ASC',
277
- (research_id,)
278
- )
279
- results = cursor.fetchall()
280
- conn.close()
281
-
282
- logs = []
283
- for result in results:
284
- log_entry = dict(result)
285
- # Parse metadata JSON if it exists
286
- if log_entry.get('metadata'):
287
- try:
288
- log_entry['metadata'] = json.loads(log_entry['metadata'])
289
- except:
290
- log_entry['metadata'] = {}
291
- else:
292
- log_entry['metadata'] = {}
293
-
294
- # Convert entry for frontend consumption
295
- formatted_entry = {
296
- 'time': log_entry['timestamp'],
297
- 'message': log_entry['message'],
298
- 'progress': log_entry['progress'],
299
- 'metadata': log_entry['metadata'],
300
- 'type': log_entry['log_type']
301
- }
302
- logs.append(formatted_entry)
303
-
304
- return logs
305
- except Exception as e:
306
- print(f"Error retrieving logs from database: {str(e)}")
307
- print(traceback.format_exc())
308
- return []
309
-
310
- # Initialize the database on startup
311
- def initialize():
312
- init_db()
313
-
314
- # Call initialize immediately when app is created
315
- initialize()
316
-
317
- # Route for index page - keep this at root level for easy access
318
- @app.route('/')
319
- def root_index():
320
- return redirect(url_for('research.index'))
321
-
322
- # Update all routes with the research prefix
323
- @research_bp.route('/')
324
- def index():
325
- return render_template('index.html')
326
-
327
- @research_bp.route('/static/<path:path>')
328
- def serve_static(path):
329
- try:
330
- print(f"Serving static file: {path}")
331
- print(f"Static folder path: {app.static_folder}")
332
- return send_from_directory(app.static_folder, path)
333
- except Exception as e:
334
- print(f"Error serving static file {path}: {str(e)}")
335
- return f"Error serving file: {str(e)}", 404
336
-
337
- @research_bp.route('/api/history', methods=['GET'])
338
- def get_history():
339
- """Get the research history"""
340
- try:
341
- conn = sqlite3.connect(DB_PATH)
342
- conn.row_factory = sqlite3.Row
343
- cursor = conn.cursor()
344
-
345
- # Get all history records ordered by latest first
346
- cursor.execute('SELECT * FROM research_history ORDER BY created_at DESC')
347
- results = cursor.fetchall()
348
- conn.close()
349
-
350
- # Convert to list of dicts
351
- history = []
352
- for result in results:
353
- item = dict(result)
354
-
355
- # Ensure all keys exist with default values
356
- if 'id' not in item:
357
- item['id'] = None
358
- if 'query' not in item:
359
- item['query'] = 'Untitled Research'
360
- if 'mode' not in item:
361
- item['mode'] = 'quick'
362
- if 'status' not in item:
363
- item['status'] = 'unknown'
364
- if 'created_at' not in item:
365
- item['created_at'] = None
366
- if 'completed_at' not in item:
367
- item['completed_at'] = None
368
- if 'duration_seconds' not in item:
369
- item['duration_seconds'] = None
370
- if 'report_path' not in item:
371
- item['report_path'] = None
372
- if 'metadata' not in item:
373
- item['metadata'] = '{}'
374
- if 'progress_log' not in item:
375
- item['progress_log'] = '[]'
376
-
377
- # Ensure timestamps are in ISO format
378
- if item['created_at'] and 'T' not in item['created_at']:
379
- try:
380
- # Convert to ISO format if it's not already
381
- dt = parser.parse(item['created_at'])
382
- item['created_at'] = dt.isoformat()
383
- except:
384
- pass
385
-
386
- if item['completed_at'] and 'T' not in item['completed_at']:
387
- try:
388
- # Convert to ISO format if it's not already
389
- dt = parser.parse(item['completed_at'])
390
- item['completed_at'] = dt.isoformat()
391
- except:
392
- pass
393
-
394
- # Recalculate duration based on timestamps if it's null but both timestamps exist
395
- if item['duration_seconds'] is None and item['created_at'] and item['completed_at']:
396
- try:
397
- start_time = parser.parse(item['created_at'])
398
- end_time = parser.parse(item['completed_at'])
399
- item['duration_seconds'] = int((end_time - start_time).total_seconds())
400
- except Exception as e:
401
- print(f"Error recalculating duration: {str(e)}")
402
-
403
- history.append(item)
404
-
405
- # Add CORS headers
406
- response = make_response(jsonify(history))
407
- response.headers.add('Access-Control-Allow-Origin', '*')
408
- response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
409
- response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
410
- return response
411
- except Exception as e:
412
- print(f"Error getting history: {str(e)}")
413
- print(traceback.format_exc())
414
- # Return empty array with CORS headers
415
- response = make_response(jsonify([]))
416
- response.headers.add('Access-Control-Allow-Origin', '*')
417
- response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
418
- response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
419
- return response
420
-
421
- @research_bp.route('/api/start_research', methods=['POST'])
422
- def start_research():
423
- data = request.json
424
- query = data.get('query')
425
- mode = data.get('mode', 'quick')
426
-
427
- if not query:
428
- return jsonify({'status': 'error', 'message': 'Query is required'}), 400
429
-
430
- # Check if there's any active research that's actually still running
431
- if active_research:
432
- # Verify each active research is still valid
433
- stale_research_ids = []
434
- for research_id, research_data in list(active_research.items()):
435
- # Check database status
436
- conn = sqlite3.connect(DB_PATH)
437
- cursor = conn.cursor()
438
- cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
439
- result = cursor.fetchone()
440
- conn.close()
441
-
442
- # If the research doesn't exist in DB or is not in_progress, it's stale
443
- if not result or result[0] != 'in_progress':
444
- stale_research_ids.append(research_id)
445
- # Also check if thread is still alive
446
- elif not research_data.get('thread') or not research_data.get('thread').is_alive():
447
- stale_research_ids.append(research_id)
448
-
449
- # Clean up any stale research processes
450
- for stale_id in stale_research_ids:
451
- print(f"Cleaning up stale research process: {stale_id}")
452
- if stale_id in active_research:
453
- del active_research[stale_id]
454
- if stale_id in termination_flags:
455
- del termination_flags[stale_id]
456
-
457
- # After cleanup, check if there's still active research
458
- if active_research:
459
- return jsonify({
460
- 'status': 'error',
461
- 'message': 'Another research is already in progress. Please wait for it to complete.'
462
- }), 409
463
-
464
- # Create a record in the database with explicit UTC timestamp
465
- created_at = datetime.utcnow().isoformat()
466
- conn = sqlite3.connect(DB_PATH)
467
- cursor = conn.cursor()
468
- cursor.execute(
469
- 'INSERT INTO research_history (query, mode, status, created_at, progress_log) VALUES (?, ?, ?, ?, ?)',
470
- (query, mode, 'in_progress', created_at, json.dumps([{"time": created_at, "message": "Research started", "progress": 0}]))
471
- )
472
- research_id = cursor.lastrowid
473
- conn.commit()
474
- conn.close()
475
-
476
- # Start research process in a background thread
477
- thread = threading.Thread(
478
- target=run_research_process,
479
- args=(research_id, query, mode)
480
- )
481
- thread.daemon = True
482
- thread.start()
483
-
484
- active_research[research_id] = {
485
- 'thread': thread,
486
- 'progress': 0,
487
- 'status': 'in_progress',
488
- 'log': [{"time": created_at, "message": "Research started", "progress": 0}]
489
- }
490
-
491
- return jsonify({
492
- 'status': 'success',
493
- 'research_id': research_id
494
- })
495
-
496
- @research_bp.route('/api/research/<int:research_id>')
497
- def get_research_status(research_id):
498
- conn = sqlite3.connect(DB_PATH)
499
- conn.row_factory = sqlite3.Row
500
- cursor = conn.cursor()
501
- cursor.execute('SELECT * FROM research_history WHERE id = ?', (research_id,))
502
- result = dict(cursor.fetchone() or {})
503
- conn.close()
504
-
505
- if not result:
506
- return jsonify({'status': 'error', 'message': 'Research not found'}), 404
507
-
508
- # Add progress information
509
- if research_id in active_research:
510
- result['progress'] = active_research[research_id]['progress']
511
- result['log'] = active_research[research_id]['log']
512
- elif result.get('status') == 'completed':
513
- result['progress'] = 100
514
- try:
515
- result['log'] = json.loads(result.get('progress_log', '[]'))
516
- except:
517
- result['log'] = []
518
- else:
519
- result['progress'] = 0
520
- try:
521
- result['log'] = json.loads(result.get('progress_log', '[]'))
522
- except:
523
- result['log'] = []
524
-
525
- return jsonify(result)
15
+ # Initialize logger
16
+ logger = logging.getLogger(__name__)
526
17
 
527
- @research_bp.route('/api/research/<int:research_id>/details')
528
- def get_research_details(research_id):
529
- """Get detailed progress log for a specific research"""
530
- conn = sqlite3.connect(DB_PATH)
531
- conn.row_factory = sqlite3.Row
532
- cursor = conn.cursor()
533
- cursor.execute('SELECT * FROM research_history WHERE id = ?', (research_id,))
534
- result = dict(cursor.fetchone() or {})
535
- conn.close()
536
-
537
- if not result:
538
- return jsonify({'status': 'error', 'message': 'Research not found'}), 404
539
-
540
- # Get logs from the dedicated log database
541
- logs = get_logs_for_research(research_id)
542
-
543
- # If this is an active research, merge with any in-memory logs
544
- if research_id in active_research:
545
- # Use the logs from memory temporarily until they're saved to the database
546
- memory_logs = active_research[research_id]['log']
547
-
548
- # Filter out logs that are already in the database by timestamp
549
- db_timestamps = {log['time'] for log in logs}
550
- unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
551
-
552
- # Add unique memory logs to our return list
553
- logs.extend(unique_memory_logs)
554
-
555
- # Sort logs by timestamp
556
- logs.sort(key=lambda x: x['time'])
557
-
558
- return jsonify({
559
- 'status': 'success',
560
- 'research_id': research_id,
561
- 'query': result.get('query'),
562
- 'mode': result.get('mode'),
563
- 'status': result.get('status'),
564
- 'progress': active_research.get(research_id, {}).get('progress', 100 if result.get('status') == 'completed' else 0),
565
- 'created_at': result.get('created_at'),
566
- 'completed_at': result.get('completed_at'),
567
- 'log': logs
568
- })
18
+ # Ensure data directory exists
19
+ setup_data_dir()
569
20
 
570
- @research_bp.route('/api/report/<int:research_id>')
571
- def get_report(research_id):
572
- conn = sqlite3.connect(DB_PATH)
573
- conn.row_factory = sqlite3.Row
574
- cursor = conn.cursor()
575
- cursor.execute('SELECT * FROM research_history WHERE id = ?', (research_id,))
576
- result = dict(cursor.fetchone() or {})
577
- conn.close()
578
-
579
- if not result or not result.get('report_path'):
580
- return jsonify({'status': 'error', 'message': 'Report not found'}), 404
581
-
21
+ # Run schema upgrades if database exists
22
+ if os.path.exists(DB_PATH):
582
23
  try:
583
- with open(result['report_path'], 'r', encoding='utf-8') as f:
584
- content = f.read()
585
- return jsonify({
586
- 'status': 'success',
587
- 'content': content,
588
- 'metadata': json.loads(result.get('metadata', '{}'))
589
- })
590
- except Exception as e:
591
- return jsonify({'status': 'error', 'message': str(e)}), 500
592
-
593
- @research_bp.route('/research/details/<int:research_id>')
594
- def research_details_page(research_id):
595
- """Render the research details page"""
596
- return render_template('index.html')
597
-
598
- @socketio.on('connect')
599
- def handle_connect():
600
- print(f"Client connected: {request.sid}")
24
+ logger.info("Running schema upgrades on existing database")
25
+ from .database.schema_upgrade import run_schema_upgrades
601
26
 
602
- @socketio.on('disconnect')
603
- def handle_disconnect():
604
- try:
605
- print(f"Client disconnected: {request.sid}")
606
- # Clean up subscriptions for this client
607
- for research_id, subscribers in list(socket_subscriptions.items()):
608
- if request.sid in subscribers:
609
- subscribers.remove(request.sid)
610
- if not subscribers:
611
- socket_subscriptions.pop(research_id, None)
612
- print(f"Removed empty subscription for research {research_id}")
27
+ run_schema_upgrades()
613
28
  except Exception as e:
614
- print(f"Error handling disconnect: {e}")
615
-
616
- @socketio.on('subscribe_to_research')
617
- def handle_subscribe(data):
618
- research_id = data.get('research_id')
619
- if research_id:
620
- # First check if this research is still active
621
- conn = sqlite3.connect(DB_PATH)
622
- cursor = conn.cursor()
623
- cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
624
- result = cursor.fetchone()
625
- conn.close()
626
-
627
- # Only allow subscription to valid research
628
- if result:
629
- status = result[0]
630
-
631
- # Initialize subscription set if needed
632
- if research_id not in socket_subscriptions:
633
- socket_subscriptions[research_id] = set()
634
-
635
- # Add this client to the subscribers
636
- socket_subscriptions[research_id].add(request.sid)
637
- print(f"Client {request.sid} subscribed to research {research_id}")
638
-
639
- # Send current status immediately if available
640
- if research_id in active_research:
641
- progress = active_research[research_id]['progress']
642
- latest_log = active_research[research_id]['log'][-1] if active_research[research_id]['log'] else None
643
-
644
- if latest_log:
645
- emit(f'research_progress_{research_id}', {
646
- 'progress': progress,
647
- 'message': latest_log.get('message', 'Processing...'),
648
- 'status': 'in_progress',
649
- 'log_entry': latest_log
650
- })
651
- elif status in ['completed', 'failed', 'suspended']:
652
- # Send final status for completed research
653
- emit(f'research_progress_{research_id}', {
654
- 'progress': 100 if status == 'completed' else 0,
655
- 'message': 'Research completed successfully' if status == 'completed' else
656
- 'Research failed' if status == 'failed' else 'Research was suspended',
657
- 'status': status,
658
- 'log_entry': {
659
- 'time': datetime.utcnow().isoformat(),
660
- 'message': f'Research is {status}',
661
- 'progress': 100 if status == 'completed' else 0,
662
- 'metadata': {'phase': 'complete' if status == 'completed' else 'error'}
663
- }
664
- })
665
- else:
666
- # Research not found
667
- emit('error', {'message': f'Research ID {research_id} not found'})
668
-
669
- @socketio.on_error
670
- def handle_socket_error(e):
671
- print(f"Socket.IO error: {str(e)}")
672
- # Don't propagate exceptions to avoid crashing the server
673
- return False
674
-
675
- @socketio.on_error_default
676
- def handle_default_error(e):
677
- print(f"Unhandled Socket.IO error: {str(e)}")
678
- # Don't propagate exceptions to avoid crashing the server
679
- return False
680
-
681
- # Function to clean up resources for a completed research
682
- def cleanup_research_resources(research_id):
683
- """Clean up resources for a completed research"""
684
- print(f"Cleaning up resources for research {research_id}")
685
-
686
- # Get the current status from the database to determine the final status message
687
- current_status = "completed" # Default
688
- try:
689
- conn = sqlite3.connect(DB_PATH)
690
- cursor = conn.cursor()
691
- cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
692
- result = cursor.fetchone()
693
- if result and result[0]:
694
- current_status = result[0]
695
- conn.close()
696
- except Exception as e:
697
- print(f"Error retrieving research status during cleanup: {e}")
698
-
699
- # Remove from active research
700
- if research_id in active_research:
701
- del active_research[research_id]
702
-
703
- # Remove from termination flags
704
- if research_id in termination_flags:
705
- del termination_flags[research_id]
706
-
707
- # Send a final message to any remaining subscribers with explicit status
708
- if research_id in socket_subscriptions and socket_subscriptions[research_id]:
709
- # Use the proper status message based on database status
710
- if current_status == 'suspended' or current_status == 'failed':
711
- final_message = {
712
- 'status': current_status,
713
- 'message': f'Research was {current_status}',
714
- 'progress': 0, # For suspended research, show 0% not 100%
715
- }
716
- else:
717
- final_message = {
718
- 'status': 'completed',
719
- 'message': 'Research process has ended and resources have been cleaned up',
720
- 'progress': 100,
721
- }
722
-
723
- try:
724
- print(f"Sending final {current_status} socket message for research {research_id}")
725
- # Use emit to all, not just subscribers
726
- socketio.emit(f'research_progress_{research_id}', final_message)
727
-
728
- # Also emit to specific subscribers
729
- for sid in socket_subscriptions[research_id]:
730
- try:
731
- socketio.emit(
732
- f'research_progress_{research_id}',
733
- final_message,
734
- room=sid
735
- )
736
- except Exception as sub_err:
737
- print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
738
- except Exception as e:
739
- print(f"Error sending final cleanup message: {e}")
740
-
741
- # Don't immediately remove subscriptions - let clients disconnect naturally
29
+ logger.error(f"Error running schema upgrades: {e}")
30
+ logger.warning("Continuing without schema upgrades")
742
31
 
743
- def run_research_process(research_id, query, mode):
744
- """Run the research process in the background for a given research ID"""
745
- try:
746
- # Check if this research has been terminated before we even start
747
- if research_id in termination_flags and termination_flags[research_id]:
748
- print(f"Research {research_id} was terminated before starting")
749
- cleanup_research_resources(research_id)
750
- return
751
-
752
- print(f"Starting research process for ID {research_id}, query: {query}")
753
-
754
- # Set up the AI Context Manager
755
- output_dir = os.path.join(OUTPUT_DIR, f"research_{research_id}")
756
- os.makedirs(output_dir, exist_ok=True)
757
-
758
- # Set up progress callback
759
- def progress_callback(message, progress_percent, metadata):
760
- # FREQUENT TERMINATION CHECK: Check for termination at each callback
761
- if research_id in termination_flags and termination_flags[research_id]:
762
- # Explicitly set the status to suspended in the database
763
- conn = sqlite3.connect(DB_PATH)
764
- cursor = conn.cursor()
765
- # Calculate duration up to termination point - using UTC consistently
766
- now = datetime.utcnow()
767
- completed_at = now.isoformat()
768
-
769
- # Get the start time from the database
770
- cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
771
- result = cursor.fetchone()
772
-
773
- # Calculate the duration
774
- duration_seconds = calculate_duration(result[0]) if result and result[0] else None
775
-
776
- # Update the database with suspended status
777
- cursor.execute(
778
- 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
779
- ('suspended', completed_at, duration_seconds, research_id)
780
- )
781
- conn.commit()
782
- conn.close()
783
-
784
- # Clean up resources
785
- cleanup_research_resources(research_id)
786
-
787
- # Raise exception to exit the process
788
- raise Exception("Research was terminated by user")
789
-
790
- timestamp = datetime.utcnow().isoformat()
791
-
792
- # Adjust progress based on research mode
793
- adjusted_progress = progress_percent
794
- if mode == 'detailed' and metadata.get('phase') == 'output_generation':
795
- # For detailed mode, we need to adjust the progress range
796
- # because detailed reports take longer after the search phase
797
- adjusted_progress = min(80, progress_percent)
798
- elif mode == 'detailed' and metadata.get('phase') == 'report_generation':
799
- # Scale the progress from 80% to 95% for the report generation phase
800
- # Map progress_percent values (0-100%) to the (80-95%) range
801
- if progress_percent is not None:
802
- normalized = progress_percent / 100
803
- adjusted_progress = 80 + (normalized * 15)
804
- elif mode == 'quick' and metadata.get('phase') == 'output_generation':
805
- # For quick mode, ensure we're at least at 85% during output generation
806
- adjusted_progress = max(85, progress_percent)
807
- # Map any further progress within output_generation to 85-95% range
808
- if progress_percent is not None and progress_percent > 0:
809
- normalized = progress_percent / 100
810
- adjusted_progress = 85 + (normalized * 10)
811
-
812
- # Don't let progress go backwards
813
- if research_id in active_research and adjusted_progress is not None:
814
- current_progress = active_research[research_id].get('progress', 0)
815
- adjusted_progress = max(current_progress, adjusted_progress)
816
-
817
- log_entry = {
818
- "time": timestamp,
819
- "message": message,
820
- "progress": adjusted_progress,
821
- "metadata": metadata
822
- }
823
-
824
- # Check if termination was requested
825
- if research_id in termination_flags and termination_flags[research_id]:
826
- # Explicitly set the status to suspended in the database
827
- conn = sqlite3.connect(DB_PATH)
828
- cursor = conn.cursor()
829
- # Calculate duration up to termination point - using UTC consistently
830
- now = datetime.utcnow()
831
- completed_at = now.isoformat()
832
-
833
- # Get the start time from the database
834
- cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
835
- result = cursor.fetchone()
836
-
837
- # Calculate the duration
838
- duration_seconds = calculate_duration(result[0]) if result and result[0] else None
839
-
840
- # Update the database with suspended status
841
- cursor.execute(
842
- 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
843
- ('suspended', completed_at, duration_seconds, research_id)
844
- )
845
- conn.commit()
846
- conn.close()
847
-
848
- # Clean up resources
849
- cleanup_research_resources(research_id)
850
-
851
- # Raise exception to exit the process
852
- raise Exception("Research was terminated by user")
853
-
854
- # Update active research record
855
- if research_id in active_research:
856
- active_research[research_id]['log'].append(log_entry)
857
- if adjusted_progress is not None:
858
- active_research[research_id]['progress'] = adjusted_progress
859
-
860
- # Determine log type for database storage
861
- log_type = 'info'
862
- if metadata and metadata.get('phase'):
863
- phase = metadata.get('phase')
864
- if phase in ['complete', 'iteration_complete']:
865
- log_type = 'milestone'
866
- elif phase == 'error' or 'error' in message.lower():
867
- log_type = 'error'
868
-
869
- # Always save logs to the new research_logs table
870
- add_log_to_db(
871
- research_id,
872
- message,
873
- log_type=log_type,
874
- progress=adjusted_progress,
875
- metadata=metadata
876
- )
877
-
878
- # Update progress in the research_history table (for backward compatibility)
879
- conn = sqlite3.connect(DB_PATH)
880
- cursor = conn.cursor()
881
-
882
- # Update the progress and log separately to avoid race conditions with reading/writing the log
883
- if adjusted_progress is not None:
884
- cursor.execute(
885
- 'UPDATE research_history SET progress = ? WHERE id = ?',
886
- (adjusted_progress, research_id)
887
- )
888
-
889
- # Add the log entry to the progress_log
890
- cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
891
- log_result = cursor.fetchone()
892
-
893
- if log_result:
894
- try:
895
- current_log = json.loads(log_result[0])
896
- except:
897
- current_log = []
898
-
899
- current_log.append(log_entry)
900
- cursor.execute(
901
- 'UPDATE research_history SET progress_log = ? WHERE id = ?',
902
- (json.dumps(current_log), research_id)
903
- )
904
-
905
- conn.commit()
906
- conn.close()
907
-
908
- # Emit a socket event
909
- try:
910
- # Basic event data
911
- event_data = {
912
- 'message': message,
913
- 'progress': adjusted_progress
914
- }
915
-
916
- # Add log entry in full format for detailed logging on client
917
- if metadata:
918
- event_data['log_entry'] = log_entry
919
-
920
- # Send to all subscribers and broadcast channel
921
- socketio.emit(f'research_progress_{research_id}', event_data)
922
-
923
- if research_id in socket_subscriptions:
924
- for sid in socket_subscriptions[research_id]:
925
- try:
926
- socketio.emit(
927
- f'research_progress_{research_id}',
928
- event_data,
929
- room=sid
930
- )
931
- except Exception as err:
932
- print(f"Error emitting to subscriber {sid}: {str(err)}")
933
- except Exception as e:
934
- print(f"Socket emit error (non-critical): {str(e)}")
935
-
936
- # FUNCTION TO CHECK TERMINATION DURING LONG-RUNNING OPERATIONS
937
- def check_termination():
938
- if research_id in termination_flags and termination_flags[research_id]:
939
- # Explicitly set the status to suspended in the database
940
- conn = sqlite3.connect(DB_PATH)
941
- cursor = conn.cursor()
942
- now = datetime.utcnow()
943
- completed_at = now.isoformat()
944
-
945
- cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
946
- result = cursor.fetchone()
947
- duration_seconds = calculate_duration(result[0]) if result and result[0] else None
948
-
949
- cursor.execute(
950
- 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ? WHERE id = ?',
951
- ('suspended', completed_at, duration_seconds, research_id)
952
- )
953
- conn.commit()
954
- conn.close()
955
-
956
- # Clean up resources
957
- cleanup_research_resources(research_id)
958
-
959
- # Raise exception to exit the process
960
- raise Exception("Research was terminated by user during long-running operation")
961
- return False # Not terminated
962
-
963
- # Set the progress callback in the system
964
- system = AdvancedSearchSystem()
965
- system.set_progress_callback(progress_callback)
966
-
967
- # Run the search
968
- progress_callback("Starting research process", 5, {"phase": "init"})
969
-
970
- try:
971
- results = system.analyze_topic(query)
972
- if mode == 'quick':
973
- progress_callback("Search complete, preparing to generate summary...", 85, {"phase": "output_generation"})
974
- else:
975
- progress_callback("Search complete, generating output", 80, {"phase": "output_generation"})
976
- except Exception as search_error:
977
- # Better handling of specific search errors
978
- error_message = str(search_error)
979
- error_type = "unknown"
980
-
981
- # Extract error details for common issues
982
- if "status code: 503" in error_message:
983
- error_message = "Ollama AI service is unavailable (HTTP 503). Please check that Ollama is running properly on your system."
984
- error_type = "ollama_unavailable"
985
- elif "status code: 404" in error_message:
986
- error_message = "Ollama model not found (HTTP 404). Please check that you have pulled the required model."
987
- error_type = "model_not_found"
988
- elif "status code:" in error_message:
989
- # Extract the status code for other HTTP errors
990
- status_code = error_message.split("status code:")[1].strip()
991
- error_message = f"API request failed with status code {status_code}. Please check your configuration."
992
- error_type = "api_error"
993
- elif "connection" in error_message.lower():
994
- error_message = "Connection error. Please check that your LLM service (Ollama/API) is running and accessible."
995
- error_type = "connection_error"
996
-
997
- # Raise with improved error message
998
- raise Exception(f"{error_message} (Error type: {error_type})")
999
-
1000
- # Generate output based on mode
1001
- if mode == 'quick':
1002
- # Quick Summary
1003
- if results.get('findings'):
1004
-
1005
- raw_formatted_findings = results['formatted_findings']
1006
- logger.info(f"Found formatted_findings of length: {len(str(raw_formatted_findings))}")
1007
-
1008
- try:
1009
- clean_markdown = raw_formatted_findings
1010
- # ADDED CODE: Convert debug output to clean markdown
1011
- #clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
1012
- print(f"Successfully converted to clean markdown of length: {len(clean_markdown)}")
1013
-
1014
- # First send a progress update for generating the summary
1015
- progress_callback("Generating clean summary from research data...", 90, {"phase": "output_generation"})
1016
-
1017
- # Save as markdown file
1018
- output_dir = "research_outputs"
1019
- if not os.path.exists(output_dir):
1020
- os.makedirs(output_dir)
1021
-
1022
- safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
1023
- safe_query = safe_query.replace(" ", "_").lower()
1024
- report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
1025
-
1026
- # Send progress update for writing to file
1027
- progress_callback("Writing research report to file...", 95, {"phase": "report_complete"})
1028
-
1029
- print(f"Writing report to: {report_path}")
1030
- with open(report_path, "w", encoding="utf-8") as f:
1031
- f.write("# Quick Research Summary\n\n")
1032
- f.write(f"Query: {query}\n\n")
1033
- f.write(clean_markdown) # Use clean markdown instead of raw findings
1034
- f.write("\n\n## Research Metrics\n")
1035
- f.write(f"- Search Iterations: {results['iterations']}\n")
1036
- f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
1037
-
1038
- # Update database
1039
- metadata = {
1040
- 'iterations': results['iterations'],
1041
- 'generated_at': datetime.utcnow().isoformat()
1042
- }
1043
-
1044
- # Calculate duration in seconds - using UTC consistently
1045
- now = datetime.utcnow()
1046
- completed_at = now.isoformat()
1047
-
1048
- print(f"Updating database for research_id: {research_id}")
1049
- # Get the start time from the database
1050
- conn = sqlite3.connect(DB_PATH)
1051
- cursor = conn.cursor()
1052
- cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
1053
- result = cursor.fetchone()
1054
-
1055
- # Use the helper function for consistent duration calculation
1056
- duration_seconds = calculate_duration(result[0])
1057
-
1058
- # Update the record
1059
- cursor.execute(
1060
- 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
1061
- ('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
1062
- )
1063
- conn.commit()
1064
- conn.close()
1065
- print(f"Database updated successfully for research_id: {research_id}")
1066
-
1067
- # Send the final completion message
1068
- progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
1069
-
1070
- # Clean up resources
1071
- print(f"Cleaning up resources for research_id: {research_id}")
1072
- cleanup_research_resources(research_id)
1073
- print(f"Resources cleaned up for research_id: {research_id}")
1074
- except Exception as inner_e:
1075
- print(f"Error during quick summary generation: {str(inner_e)}")
1076
- print(traceback.format_exc())
1077
- raise Exception(f"Error generating quick summary: {str(inner_e)}")
1078
- else:
1079
- raise Exception("No research findings were generated. Please try again.")
1080
- else:
1081
- # Full Report
1082
- progress_callback("Generating detailed report...", 85, {"phase": "report_generation"})
1083
- report_generator = IntegratedReportGenerator()
1084
- final_report = report_generator.generate_report(results, query)
1085
- progress_callback("Report generation complete", 95, {"phase": "report_complete"})
1086
-
1087
- # Save as markdown file
1088
- output_dir = "research_outputs"
1089
- if not os.path.exists(output_dir):
1090
- os.makedirs(output_dir)
1091
-
1092
- safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
1093
- safe_query = safe_query.replace(" ", "_").lower()
1094
- report_path = os.path.join(output_dir, f"detailed_report_{safe_query}.md")
1095
-
1096
- with open(report_path, "w", encoding="utf-8") as f:
1097
- f.write(final_report['content'])
1098
-
1099
- # Update database
1100
- metadata = final_report['metadata']
1101
- metadata['iterations'] = results['iterations']
1102
-
1103
- # Calculate duration in seconds - using UTC consistently
1104
- now = datetime.utcnow()
1105
- completed_at = now.isoformat()
1106
-
1107
- # Get the start time from the database
1108
- conn = sqlite3.connect(DB_PATH)
1109
- cursor = conn.cursor()
1110
- cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
1111
- result = cursor.fetchone()
1112
-
1113
- # Use the helper function for consistent duration calculation
1114
- duration_seconds = calculate_duration(result[0])
1115
-
1116
- cursor.execute(
1117
- 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
1118
- ('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
1119
- )
1120
- conn.commit()
1121
- conn.close()
1122
-
1123
- progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
1124
-
1125
- # Clean up - moved to a separate function for reuse
1126
- cleanup_research_resources(research_id)
1127
-
1128
- except Exception as e:
1129
- # Handle error
1130
- error_message = f"Research failed: {str(e)}"
1131
- print(f"Research error: {error_message}")
1132
- try:
1133
- # Check for common Ollama error patterns in the exception and provide more user-friendly errors
1134
- user_friendly_error = str(e)
1135
- error_context = {}
1136
-
1137
- if "Error type: ollama_unavailable" in user_friendly_error:
1138
- user_friendly_error = "Ollama AI service is unavailable. Please check that Ollama is running properly on your system."
1139
- error_context = {"solution": "Start Ollama with 'ollama serve' or check if it's installed correctly."}
1140
- elif "Error type: model_not_found" in user_friendly_error:
1141
- user_friendly_error = "Required Ollama model not found. Please pull the model first."
1142
- error_context = {"solution": "Run 'ollama pull mistral' to download the required model."}
1143
- elif "Error type: connection_error" in user_friendly_error:
1144
- user_friendly_error = "Connection error with LLM service. Please check that your AI service is running."
1145
- error_context = {"solution": "Ensure Ollama or your API service is running and accessible."}
1146
- elif "Error type: api_error" in user_friendly_error:
1147
- # Keep the original error message as it's already improved
1148
- error_context = {"solution": "Check API configuration and credentials."}
1149
-
1150
- # Update metadata with more context about the error
1151
- metadata = {
1152
- "phase": "error",
1153
- "error": user_friendly_error
1154
- }
1155
- if error_context:
1156
- metadata.update(error_context)
1157
-
1158
- progress_callback(user_friendly_error, None, metadata)
1159
-
1160
- conn = sqlite3.connect(DB_PATH)
1161
- cursor = conn.cursor()
1162
-
1163
- # If termination was requested, mark as suspended instead of failed
1164
- status = 'suspended' if (research_id in termination_flags and termination_flags[research_id]) else 'failed'
1165
- message = "Research was terminated by user" if status == 'suspended' else user_friendly_error
1166
-
1167
- # Calculate duration up to termination point - using UTC consistently
1168
- now = datetime.utcnow()
1169
- completed_at = now.isoformat()
1170
-
1171
- # Get the start time from the database
1172
- duration_seconds = None
1173
- cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
1174
- result = cursor.fetchone()
1175
-
1176
- # Use the helper function for consistent duration calculation
1177
- if result and result[0]:
1178
- duration_seconds = calculate_duration(result[0])
1179
-
1180
- cursor.execute(
1181
- 'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, metadata = ? WHERE id = ?',
1182
- (status, completed_at, duration_seconds, json.dumps(metadata), research_id)
1183
- )
1184
- conn.commit()
1185
- conn.close()
1186
-
1187
- try:
1188
- socketio.emit(f'research_progress_{research_id}', {
1189
- 'status': status,
1190
- 'error': message
1191
- })
1192
-
1193
- # Also notify specific subscribers
1194
- if research_id in socket_subscriptions and socket_subscriptions[research_id]:
1195
- for sid in socket_subscriptions[research_id]:
1196
- try:
1197
- socketio.emit(
1198
- f'research_progress_{research_id}',
1199
- {'status': status, 'error': message},
1200
- room=sid
1201
- )
1202
- except Exception as sub_err:
1203
- print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
1204
-
1205
- except Exception as socket_error:
1206
- print(f"Failed to emit error via socket: {str(socket_error)}")
1207
- except Exception as inner_e:
1208
- print(f"Error in error handler: {str(inner_e)}")
1209
-
1210
- # Clean up resources - moved to a separate function for reuse
1211
- cleanup_research_resources(research_id)
1212
32
 
1213
- @research_bp.route('/api/research/<int:research_id>/terminate', methods=['POST'])
1214
- def terminate_research(research_id):
1215
- """Terminate an in-progress research process"""
1216
-
1217
- # Check if the research exists and is in progress
1218
- conn = sqlite3.connect(DB_PATH)
1219
- cursor = conn.cursor()
1220
- cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
1221
- result = cursor.fetchone()
1222
-
1223
- if not result:
1224
- conn.close()
1225
- return jsonify({'status': 'error', 'message': 'Research not found'}), 404
1226
-
1227
- status = result[0]
1228
-
1229
- # If it's not in progress, return an error
1230
- if status != 'in_progress':
1231
- conn.close()
1232
- return jsonify({'status': 'error', 'message': 'Research is not in progress'}), 400
1233
-
1234
- # Check if it's in the active_research dict
1235
- if research_id not in active_research:
1236
- # Update the status in the database
1237
- cursor.execute('UPDATE research_history SET status = ? WHERE id = ?', ('suspended', research_id))
1238
- conn.commit()
1239
- conn.close()
1240
- return jsonify({'status': 'success', 'message': 'Research terminated'})
1241
-
1242
- # Set the termination flag
1243
- termination_flags[research_id] = True
1244
-
1245
- # Log the termination request - using UTC timestamp
1246
- timestamp = datetime.utcnow().isoformat()
1247
- termination_message = "Research termination requested by user"
1248
- current_progress = active_research[research_id]['progress']
1249
-
1250
- # Create log entry
1251
- log_entry = {
1252
- "time": timestamp,
1253
- "message": termination_message,
1254
- "progress": current_progress,
1255
- "metadata": {"phase": "termination"}
1256
- }
1257
-
1258
- # Add to in-memory log
1259
- active_research[research_id]['log'].append(log_entry)
1260
-
1261
- # Add to database log
1262
- add_log_to_db(
1263
- research_id,
1264
- termination_message,
1265
- log_type='milestone',
1266
- progress=current_progress,
1267
- metadata={"phase": "termination"}
1268
- )
1269
-
1270
- # Update the log in the database (old way for backward compatibility)
1271
- cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
1272
- log_result = cursor.fetchone()
1273
- if log_result:
1274
- try:
1275
- current_log = json.loads(log_result[0])
1276
- except:
1277
- current_log = []
1278
- current_log.append(log_entry)
1279
- cursor.execute(
1280
- 'UPDATE research_history SET progress_log = ? WHERE id = ?',
1281
- (json.dumps(current_log), research_id)
33
+ # Check if we need to run database migration
34
+ def check_migration_needed():
35
+ """Check if database migration is needed, based on presence of legacy files and absence of new DB"""
36
+ if not os.path.exists(DB_PATH):
37
+ # The new database doesn't exist, check if legacy databases exist
38
+ legacy_files_exist = os.path.exists(LEGACY_DEEP_RESEARCH_DB) or os.path.exists(
39
+ LEGACY_RESEARCH_HISTORY_DB
1282
40
  )
1283
-
1284
- # IMMEDIATELY update the status to 'suspended' to avoid race conditions
1285
- cursor.execute('UPDATE research_history SET status = ? WHERE id = ?', ('suspended', research_id))
1286
- conn.commit()
1287
- conn.close()
1288
-
1289
- # Emit a socket event for the termination request
1290
- try:
1291
- event_data = {
1292
- 'status': 'suspended', # Changed from 'terminating' to 'suspended'
1293
- 'message': 'Research was suspended by user request'
1294
- }
1295
-
1296
- socketio.emit(f'research_progress_{research_id}', event_data)
1297
-
1298
- if research_id in socket_subscriptions and socket_subscriptions[research_id]:
1299
- for sid in socket_subscriptions[research_id]:
1300
- try:
1301
- socketio.emit(
1302
- f'research_progress_{research_id}',
1303
- event_data,
1304
- room=sid
1305
- )
1306
- except Exception as err:
1307
- print(f"Error emitting to subscriber {sid}: {str(err)}")
1308
-
1309
- except Exception as socket_error:
1310
- print(f"Socket emit error (non-critical): {str(socket_error)}")
1311
-
1312
- return jsonify({'status': 'success', 'message': 'Research termination requested'})
1313
-
1314
- @research_bp.route('/api/research/<int:research_id>/delete', methods=['DELETE'])
1315
- def delete_research(research_id):
1316
- """Delete a research record"""
1317
- conn = sqlite3.connect(DB_PATH)
1318
- cursor = conn.cursor()
1319
-
1320
- # First check if the research exists and is not in progress
1321
- cursor.execute('SELECT status, report_path FROM research_history WHERE id = ?', (research_id,))
1322
- result = cursor.fetchone()
1323
-
1324
- if not result:
1325
- conn.close()
1326
- return jsonify({'status': 'error', 'message': 'Research not found'}), 404
1327
-
1328
- status, report_path = result
1329
-
1330
- # Don't allow deleting research in progress
1331
- if status == 'in_progress' and research_id in active_research:
1332
- conn.close()
1333
- return jsonify({
1334
- 'status': 'error',
1335
- 'message': 'Cannot delete research that is in progress'
1336
- }), 400
1337
-
1338
- # Delete report file if it exists
1339
- if report_path and os.path.exists(report_path):
1340
- try:
1341
- os.remove(report_path)
1342
- except Exception as e:
1343
- print(f"Error removing report file: {str(e)}")
1344
-
1345
- # Delete the database record
1346
- cursor.execute('DELETE FROM research_history WHERE id = ?', (research_id,))
1347
- conn.commit()
1348
- conn.close()
1349
-
1350
- return jsonify({'status': 'success'})
1351
- @research_bp.route('/settings', methods=['GET'])
1352
- def settings_page():
1353
- """Main settings dashboard with links to specialized config pages"""
1354
- return render_template('settings_dashboard.html')
1355
-
1356
- @research_bp.route('/settings/main', methods=['GET'])
1357
- def main_config_page():
1358
- """Edit main configuration with search parameters"""
1359
- return render_template('main_config.html', main_file_path=MAIN_CONFIG_FILE)
1360
-
1361
- @research_bp.route('/settings/llm', methods=['GET'])
1362
- def llm_config_page():
1363
- """Edit LLM configuration using raw file editor"""
1364
- return render_template('llm_config.html', llm_file_path=LLM_CONFIG_FILE)
1365
-
1366
- @research_bp.route('/settings/collections', methods=['GET'])
1367
- def collections_config_page():
1368
- """Edit local collections configuration using raw file editor"""
1369
- return render_template('collections_config.html', collections_file_path=LOCAL_COLLECTIONS_FILE)
1370
-
1371
- @research_bp.route('/settings/api_keys', methods=['GET'])
1372
- def api_keys_config_page():
1373
- """Edit API keys configuration"""
1374
- # Get the secrets file path
1375
- secrets_file = CONFIG_DIR / ".secrets.toml"
1376
-
1377
- return render_template('api_keys_config.html', secrets_file_path=secrets_file)
1378
- # Add to the imports section
1379
- from local_deep_research.config import SEARCH_ENGINES_FILE
1380
-
1381
- # Add a new route for search engines configuration page
1382
- @research_bp.route('/settings/search_engines', methods=['GET'])
1383
- def search_engines_config_page():
1384
- """Edit search engines configuration using raw file editor"""
1385
- # Read the current config file
1386
- raw_config = ""
1387
- try:
1388
- with open(SEARCH_ENGINES_FILE, 'r') as f:
1389
- raw_config = f.read()
1390
- except Exception as e:
1391
- flash(f'Error reading search engines configuration: {str(e)}', 'error')
1392
- raw_config = "# Error reading configuration file"
1393
-
1394
- # Get list of engine names for display
1395
- engine_names = []
1396
- try:
1397
- from local_deep_research.web_search_engines.search_engines_config import SEARCH_ENGINES
1398
- engine_names = list(SEARCH_ENGINES.keys())
1399
- engine_names.sort() # Alphabetical order
1400
- except Exception as e:
1401
- logger.error(f"Error getting engine names: {e}")
1402
-
1403
- return render_template('search_engines_config.html',
1404
- search_engines_file_path=SEARCH_ENGINES_FILE,
1405
- raw_config=raw_config,
1406
- engine_names=engine_names)
1407
-
1408
- # Add a route to save search engines configuration
1409
- @research_bp.route('/api/save_search_engines_config', methods=['POST'])
1410
- def save_search_engines_config():
1411
- try:
1412
- data = request.get_json()
1413
- raw_config = data.get('raw_config', '')
1414
-
1415
- # Validate TOML syntax
1416
- try:
1417
- toml.loads(raw_config)
1418
- except toml.TomlDecodeError as e:
1419
- return jsonify({'success': False, 'error': f'TOML syntax error: {str(e)}'})
1420
-
1421
- # Ensure directory exists
1422
- os.makedirs(os.path.dirname(SEARCH_ENGINES_FILE), exist_ok=True)
1423
-
1424
- # Create a backup first
1425
- backup_path = f"{SEARCH_ENGINES_FILE}.bak"
1426
- if os.path.exists(SEARCH_ENGINES_FILE):
1427
- import shutil
1428
- shutil.copy2(SEARCH_ENGINES_FILE, backup_path)
1429
-
1430
- # Write new config
1431
- with open(SEARCH_ENGINES_FILE, 'w') as f:
1432
- f.write(raw_config)
1433
-
1434
- return jsonify({'success': True})
1435
- except Exception as e:
1436
- return jsonify({'success': False, 'error': str(e)})
1437
-
1438
-
1439
- # API endpoint to save raw LLM config
1440
- @research_bp.route('/api/save_llm_config', methods=['POST'])
1441
- def save_llm_config():
1442
- try:
1443
- data = request.get_json()
1444
- raw_config = data.get('raw_config', '')
1445
-
1446
- # Validate Python syntax
1447
- try:
1448
- compile(raw_config, '<string>', 'exec')
1449
- except SyntaxError as e:
1450
- return jsonify({'success': False, 'error': f'Syntax error: {str(e)}'})
1451
-
1452
- # Ensure directory exists
1453
- os.makedirs(os.path.dirname(LLM_CONFIG_FILE), exist_ok=True)
1454
-
1455
- # Create a backup first
1456
- backup_path = f"{LLM_CONFIG_FILE}.bak"
1457
- if os.path.exists(LLM_CONFIG_FILE):
1458
- import shutil
1459
- shutil.copy2(LLM_CONFIG_FILE, backup_path)
1460
-
1461
- # Write new config
1462
- with open(LLM_CONFIG_FILE, 'w') as f:
1463
- f.write(raw_config)
1464
-
1465
- return jsonify({'success': True})
1466
- except Exception as e:
1467
- return jsonify({'success': False, 'error': str(e)})
1468
-
1469
- # API endpoint to save raw collections config
1470
- @research_bp.route('/api/save_collections_config', methods=['POST'])
1471
- def save_collections_config():
1472
- try:
1473
- data = request.get_json()
1474
- raw_config = data.get('raw_config', '')
1475
-
1476
- # Validate TOML syntax
1477
- try:
1478
- toml.loads(raw_config)
1479
- except toml.TomlDecodeError as e:
1480
- return jsonify({'success': False, 'error': f'TOML syntax error: {str(e)}'})
1481
-
1482
- # Ensure directory exists
1483
- os.makedirs(os.path.dirname(LOCAL_COLLECTIONS_FILE), exist_ok=True)
1484
-
1485
- # Create a backup first
1486
- backup_path = f"{LOCAL_COLLECTIONS_FILE}.bak"
1487
- if os.path.exists(LOCAL_COLLECTIONS_FILE):
1488
- import shutil
1489
- shutil.copy2(LOCAL_COLLECTIONS_FILE, backup_path)
1490
-
1491
- # Write new config
1492
- with open(LOCAL_COLLECTIONS_FILE, 'w') as f:
1493
- f.write(raw_config)
1494
-
1495
- # Also trigger a reload in the collections system
1496
- try:
1497
- load_local_collections(reload=True)
1498
- except Exception as reload_error:
1499
- return jsonify({'success': True, 'warning': f'Config saved, but error reloading: {str(reload_error)}'})
1500
-
1501
- return jsonify({'success': True})
1502
- except Exception as e:
1503
- return jsonify({'success': False, 'error': str(e)})
1504
-
1505
- # API endpoint to save raw main config
1506
- @research_bp.route('/api/save_main_config', methods=['POST'])
1507
- def save_raw_main_config():
1508
- try:
1509
- data = request.get_json()
1510
- raw_config = data.get('raw_config', '')
1511
-
1512
- # Validate TOML syntax
1513
- try:
1514
- toml.loads(raw_config)
1515
- except toml.TomlDecodeError as e:
1516
- return jsonify({'success': False, 'error': f'TOML syntax error: {str(e)}'})
1517
-
1518
- # Ensure directory exists
1519
- os.makedirs(os.path.dirname(MAIN_CONFIG_FILE), exist_ok=True)
1520
-
1521
- # Create a backup first
1522
- backup_path = f"{MAIN_CONFIG_FILE}.bak"
1523
- if os.path.exists(MAIN_CONFIG_FILE):
1524
- import shutil
1525
- shutil.copy2(MAIN_CONFIG_FILE, backup_path)
1526
-
1527
- # Write new config
1528
- with open(MAIN_CONFIG_FILE, 'w') as f:
1529
- f.write(raw_config)
1530
-
1531
- return jsonify({'success': True})
1532
- except Exception as e:
1533
- return jsonify({'success': False, 'error': str(e)})
1534
- @research_bp.route('/raw_config')
1535
- def get_raw_config():
1536
- """Return the raw configuration file content"""
1537
- try:
1538
- # Determine which config file to load based on a query parameter
1539
- config_type = request.args.get('type', 'main')
1540
-
1541
- if config_type == 'main':
1542
- config_path = os.path.join(app.config['CONFIG_DIR'], 'config.toml')
1543
- with open(config_path, 'r') as f:
1544
- return f.read()
1545
- elif config_type == 'llm':
1546
- config_path = os.path.join(app.config['CONFIG_DIR'], 'llm_config.py')
1547
- with open(config_path, 'r') as f:
1548
- return f.read()
1549
- elif config_type == 'collections':
1550
- config_path = os.path.join(app.config['CONFIG_DIR'], 'collections.toml')
1551
- with open(config_path, 'r') as f:
1552
- return f.read()
1553
- else:
1554
- return "Unknown configuration type", 400
1555
- except Exception as e:
1556
- return str(e), 500
1557
- import os
1558
- import subprocess
1559
- import platform
1560
-
1561
- @research_bp.route('/open_file_location', methods=['POST'])
1562
- def open_file_location():
1563
- file_path = request.form.get('file_path')
1564
-
1565
- if not file_path:
1566
- flash('No file path provided', 'error')
1567
- return redirect(url_for('research.settings_page'))
1568
-
1569
- # Get the directory containing the file
1570
- dir_path = os.path.dirname(os.path.abspath(file_path))
1571
-
1572
- # Open the directory in the file explorer
1573
- try:
1574
- if platform.system() == "Windows":
1575
- subprocess.Popen(f'explorer "{dir_path}"')
1576
- elif platform.system() == "Darwin": # macOS
1577
- subprocess.Popen(["open", dir_path])
1578
- else: # Linux
1579
- subprocess.Popen(["xdg-open", dir_path])
1580
-
1581
- flash(f'Opening folder: {dir_path}', 'success')
1582
- except Exception as e:
1583
- flash(f'Error opening folder: {str(e)}', 'error')
1584
-
1585
- # Redirect back to the settings page
1586
- if 'llm' in file_path:
1587
- return redirect(url_for('research.llm_config_page'))
1588
- elif 'collections' in file_path:
1589
- return redirect(url_for('research.collections_config_page'))
1590
- else:
1591
- return redirect(url_for('research.main_config_page'))
1592
-
1593
- @research_bp.route('/api/research/<int:research_id>/logs')
1594
- def get_research_logs(research_id):
1595
- """Get logs for a specific research ID"""
1596
- # First check if the research exists
1597
- conn = sqlite3.connect(DB_PATH)
1598
- conn.row_factory = sqlite3.Row
1599
- cursor = conn.cursor()
1600
- cursor.execute('SELECT id FROM research_history WHERE id = ?', (research_id,))
1601
- result = cursor.fetchone()
1602
- conn.close()
1603
-
1604
- if not result:
1605
- return jsonify({'status': 'error', 'message': 'Research not found'}), 404
1606
-
1607
- # Retrieve logs from the database
1608
- logs = get_logs_for_research(research_id)
1609
-
1610
- # Add any current logs from memory if this is an active research
1611
- if research_id in active_research and active_research[research_id].get('log'):
1612
- # Use the logs from memory temporarily until they're saved to the database
1613
- memory_logs = active_research[research_id]['log']
1614
-
1615
- # Filter out logs that are already in the database
1616
- # We'll compare timestamps to avoid duplicates
1617
- db_timestamps = {log['time'] for log in logs}
1618
- unique_memory_logs = [log for log in memory_logs if log['time'] not in db_timestamps]
1619
-
1620
- # Add unique memory logs to our return list
1621
- logs.extend(unique_memory_logs)
1622
-
1623
- # Sort logs by timestamp
1624
- logs.sort(key=lambda x: x['time'])
1625
-
1626
- return jsonify({
1627
- 'status': 'success',
1628
- 'logs': logs
1629
- })
1630
41
 
42
+ if legacy_files_exist:
43
+ logger.info(
44
+ "Legacy database files found, but ldr.db doesn't exist. Migration needed."
45
+ )
46
+ return True
1631
47
 
48
+ return False
1632
49
 
1633
- # Register the blueprint
1634
- app.register_blueprint(research_bp)
1635
50
 
1636
- # Also add the static route at the app level for compatibility
1637
- @app.route('/static/<path:path>')
1638
- def app_serve_static(path):
1639
- return send_from_directory(app.static_folder, path)
51
+ # Create the Flask app and SocketIO instance
52
+ app, socketio = create_app()
1640
53
 
1641
- # Add favicon route to prevent 404 errors
1642
- @app.route('/favicon.ico')
1643
- def favicon():
1644
- return send_from_directory(app.static_folder, 'favicon.ico', mimetype='image/x-icon')
1645
-
1646
- # Add this function to app.py
1647
- def convert_debug_to_markdown(raw_text, query):
1648
- """
1649
- Convert the debug-formatted text to clean markdown.
1650
-
1651
- Args:
1652
- raw_text: The raw formatted findings with debug symbols
1653
- query: Original research query
1654
-
1655
- Returns:
1656
- Clean markdown formatted text
1657
- """
1658
- try:
1659
- print(f"Starting markdown conversion for query: {query}")
1660
- print(f"Raw text type: {type(raw_text)}")
1661
-
1662
- # Handle None or empty input
1663
- if not raw_text:
1664
- print("WARNING: raw_text is empty or None")
1665
- return f"No detailed findings available for '{query}'."
1666
-
1667
- # If there's a "DETAILED FINDINGS:" section, extract everything after it
1668
- if "DETAILED FINDINGS:" in raw_text:
1669
- print("Found DETAILED FINDINGS section")
1670
- detailed_index = raw_text.index("DETAILED FINDINGS:")
1671
- content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
1672
- else:
1673
- print("No DETAILED FINDINGS section found, using full text")
1674
- content = raw_text
1675
-
1676
- # Remove divider lines with === symbols
1677
- lines_before = len(content.split("\n"))
1678
- content = "\n".join([line for line in content.split("\n")
1679
- if not line.strip().startswith("===") and not line.strip() == "="*80])
1680
- lines_after = len(content.split("\n"))
1681
- print(f"Removed {lines_before - lines_after} divider lines")
1682
-
1683
-
1684
-
1685
- # Remove SEARCH QUESTIONS BY ITERATION section
1686
- if "SEARCH QUESTIONS BY ITERATION:" in content:
1687
- print("Found SEARCH QUESTIONS BY ITERATION section")
1688
- search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
1689
- next_major_section = -1
1690
- for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
1691
- if marker in content[search_index:]:
1692
- marker_pos = content.index(marker, search_index)
1693
- if next_major_section == -1 or marker_pos < next_major_section:
1694
- next_major_section = marker_pos
1695
-
1696
- if next_major_section != -1:
1697
- print(f"Removing section from index {search_index} to {next_major_section}")
1698
- content = content[:search_index] + content[next_major_section:]
1699
- else:
1700
- # If no later section, just remove everything from SEARCH QUESTIONS onwards
1701
- print(f"Removing everything after index {search_index}")
1702
- content = content[:search_index].strip()
1703
-
1704
- print(f"Final markdown length: {len(content.strip())}")
1705
- return content.strip()
1706
- except Exception as e:
1707
- print(f"Error in convert_debug_to_markdown: {str(e)}")
1708
- print(traceback.format_exc())
1709
- # Return a basic message with the original query as fallback
1710
- return f"# Research on {query}\n\nThere was an error formatting the research results."
1711
54
 
1712
55
  def main():
1713
56
  """
1714
57
  Entry point for the web application when run as a command.
1715
58
  This function is needed for the package's entry point to work properly.
1716
59
  """
1717
- # Import settings here to avoid circular imports
1718
- from local_deep_research.config import settings
60
+ # Check if migration is needed
61
+ if check_migration_needed():
62
+ logger.info(
63
+ "Database migration required. Run migrate_db.py before starting the application."
64
+ )
65
+ print("=" * 80)
66
+ print("DATABASE MIGRATION REQUIRED")
67
+ print(
68
+ "Legacy database files were found, but the new unified database doesn't exist."
69
+ )
70
+ print(
71
+ "Please run 'python -m src.local_deep_research.web.database.migrate_to_ldr_db' to migrate your data."
72
+ )
73
+ print(
74
+ "You can continue without migration, but your previous data won't be available."
75
+ )
76
+ print("=" * 80)
77
+
78
+ # If --auto-migrate flag is passed, run migration automatically
79
+ if "--auto-migrate" in sys.argv:
80
+ logger.info("Auto-migration flag detected, running migration...")
81
+ try:
82
+ from .database.migrate_to_ldr_db import migrate_to_ldr_db
83
+
84
+ success = migrate_to_ldr_db()
85
+ if success:
86
+ logger.info("Database migration completed successfully.")
87
+ else:
88
+ logger.warning("Database migration failed.")
89
+ except Exception as e:
90
+ logger.error(f"Error running database migration: {e}")
91
+ print(f"Error: {e}")
92
+ print("Please run migration manually.")
1719
93
 
1720
94
  # Get web server settings with defaults
1721
- port = settings.web.port
1722
- host = settings.web.host
1723
- debug = settings.web.debug
95
+ port = get_db_setting("web.port", settings.web.port)
96
+ host = get_db_setting("web.host", settings.web.host)
97
+ debug = get_db_setting("web.debug", settings.web.debug)
1724
98
 
1725
99
  # Check for OpenAI availability but don't import it unless necessary
1726
100
  try:
1727
- import os
1728
101
  api_key = os.environ.get("OPENAI_API_KEY")
1729
102
  if api_key:
1730
103
  try:
1731
104
  # Only try to import if we have an API key
1732
105
  import openai
106
+
1733
107
  openai.api_key = api_key
1734
- OPENAI_AVAILABLE = True
1735
- print("OpenAI integration is available")
108
+ logger.info("OpenAI integration is available")
1736
109
  except ImportError:
1737
- print("OpenAI package not installed, integration disabled")
110
+ logger.info("OpenAI package not installed, integration disabled")
1738
111
  else:
1739
- print("OPENAI_API_KEY not found in environment variables, OpenAI integration disabled")
112
+ logger.info(
113
+ "OPENAI_API_KEY not found in environment variables, OpenAI integration disabled"
114
+ )
1740
115
  except Exception as e:
1741
- print(f"Error checking OpenAI availability: {e}")
1742
-
116
+ logger.error(f"Error checking OpenAI availability: {e}")
117
+
118
+ logger.info(f"Starting web server on {host}:{port} (debug: {debug})")
1743
119
  socketio.run(app, debug=debug, host=host, port=port, allow_unsafe_werkzeug=True)
1744
-
1745
- if __name__ == '__main__':
1746
- main()
120
+
121
+
122
+ if __name__ == "__main__":
123
+ main()