local-deep-research 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +24 -0
- local_deep_research/citation_handler.py +113 -0
- local_deep_research/config.py +166 -0
- local_deep_research/defaults/__init__.py +44 -0
- local_deep_research/defaults/llm_config.py +269 -0
- local_deep_research/defaults/local_collections.toml +47 -0
- local_deep_research/defaults/main.toml +57 -0
- local_deep_research/defaults/search_engines.toml +244 -0
- local_deep_research/local_collections.py +141 -0
- local_deep_research/main.py +113 -0
- local_deep_research/report_generator.py +206 -0
- local_deep_research/search_system.py +241 -0
- local_deep_research/utilties/__init__.py +0 -0
- local_deep_research/utilties/enums.py +9 -0
- local_deep_research/utilties/llm_utils.py +116 -0
- local_deep_research/utilties/search_utilities.py +115 -0
- local_deep_research/utilties/setup_utils.py +6 -0
- local_deep_research/web/__init__.py +2 -0
- local_deep_research/web/app.py +1209 -0
- local_deep_research/web/static/css/styles.css +1008 -0
- local_deep_research/web/static/js/app.js +2078 -0
- local_deep_research/web/templates/api_keys_config.html +82 -0
- local_deep_research/web/templates/collections_config.html +90 -0
- local_deep_research/web/templates/index.html +312 -0
- local_deep_research/web/templates/llm_config.html +120 -0
- local_deep_research/web/templates/main_config.html +89 -0
- local_deep_research/web/templates/search_engines_config.html +154 -0
- local_deep_research/web/templates/settings.html +519 -0
- local_deep_research/web/templates/settings_dashboard.html +207 -0
- local_deep_research/web_search_engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/full_search.py +128 -0
- local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
- local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
- local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
- local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
- local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
- local_deep_research/web_search_engines/full_search.py +254 -0
- local_deep_research/web_search_engines/search_engine_base.py +197 -0
- local_deep_research/web_search_engines/search_engine_factory.py +233 -0
- local_deep_research/web_search_engines/search_engines_config.py +54 -0
- local_deep_research-0.1.0.dist-info/LICENSE +21 -0
- local_deep_research-0.1.0.dist-info/METADATA +328 -0
- local_deep_research-0.1.0.dist-info/RECORD +56 -0
- local_deep_research-0.1.0.dist-info/WHEEL +5 -0
- local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
- local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1209 @@
|
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
import time
|
4
|
+
import sqlite3
|
5
|
+
import threading
|
6
|
+
from datetime import datetime
|
7
|
+
from flask import Flask, render_template, request, jsonify, send_from_directory, Response, make_response, current_app, Blueprint, redirect, url_for, flash
|
8
|
+
from flask_socketio import SocketIO, emit
|
9
|
+
from local_deep_research.search_system import AdvancedSearchSystem
|
10
|
+
from local_deep_research.report_generator import IntegratedReportGenerator
|
11
|
+
# Move this import up to ensure it's available globally
|
12
|
+
from dateutil import parser
|
13
|
+
import traceback
|
14
|
+
import pkg_resources
|
15
|
+
# Import the new configuration manager
|
16
|
+
from local_deep_research.config import get_config_dir
|
17
|
+
import logging
|
18
|
+
logger = logging.getLogger(__name__)
|
19
|
+
|
20
|
+
CONFIG_DIR = get_config_dir() / "config"
|
21
|
+
MAIN_CONFIG_FILE = CONFIG_DIR / "settings.toml"
|
22
|
+
LLM_CONFIG_FILE = CONFIG_DIR / "llm_config.py"
|
23
|
+
LOCAL_COLLECTIONS_FILE = CONFIG_DIR / "local_collections.toml"
|
24
|
+
import toml
|
25
|
+
|
26
|
+
# Set flag for tracking OpenAI availability - we'll check it only when needed
|
27
|
+
OPENAI_AVAILABLE = False
|
28
|
+
|
29
|
+
# Initialize Flask app
|
30
|
+
try:
|
31
|
+
import os
|
32
|
+
import logging
|
33
|
+
from local_deep_research.utilties.setup_utils import setup_user_directories
|
34
|
+
|
35
|
+
# Configure logging
|
36
|
+
logging.basicConfig(level=logging.INFO)
|
37
|
+
logger = logging.getLogger(__name__)
|
38
|
+
|
39
|
+
# Explicitly run setup
|
40
|
+
logger.info("Initializing configuration...")
|
41
|
+
setup_user_directories()
|
42
|
+
|
43
|
+
# Get directories based on package installation
|
44
|
+
PACKAGE_DIR = pkg_resources.resource_filename('local_deep_research', 'web')
|
45
|
+
STATIC_DIR = os.path.join(PACKAGE_DIR, 'static')
|
46
|
+
TEMPLATE_DIR = os.path.join(PACKAGE_DIR, 'templates')
|
47
|
+
|
48
|
+
# Setup logging
|
49
|
+
logging.basicConfig(level=logging.INFO)
|
50
|
+
|
51
|
+
# Create directories and default configs if needed
|
52
|
+
setup_user_directories()
|
53
|
+
|
54
|
+
# Initialize Flask app with package directories
|
55
|
+
app = Flask(__name__,
|
56
|
+
static_folder=STATIC_DIR,
|
57
|
+
template_folder=TEMPLATE_DIR)
|
58
|
+
print(f"Using package static path: {STATIC_DIR}")
|
59
|
+
print(f"Using package template path: {TEMPLATE_DIR}")
|
60
|
+
except Exception as e:
|
61
|
+
# Fallback for development
|
62
|
+
print(f"Package directories not found, using fallback paths: {str(e)}")
|
63
|
+
app = Flask(__name__,
|
64
|
+
static_folder=os.path.abspath('static'),
|
65
|
+
template_folder=os.path.abspath('templates'))
|
66
|
+
app.config['SECRET_KEY'] = 'deep-research-secret-key'
|
67
|
+
|
68
|
+
# Create a Blueprint for the research application
|
69
|
+
research_bp = Blueprint('research', __name__, url_prefix='/research')
|
70
|
+
|
71
|
+
# Add improved Socket.IO configuration with better error handling
|
72
|
+
socketio = SocketIO(
|
73
|
+
app,
|
74
|
+
cors_allowed_origins="*",
|
75
|
+
async_mode='threading',
|
76
|
+
path='/research/socket.io',
|
77
|
+
logger=True,
|
78
|
+
engineio_logger=True,
|
79
|
+
ping_timeout=20,
|
80
|
+
ping_interval=5
|
81
|
+
)
|
82
|
+
|
83
|
+
# Active research processes and socket subscriptions
|
84
|
+
active_research = {}
|
85
|
+
socket_subscriptions = {}
|
86
|
+
|
87
|
+
# Add termination flags dictionary
|
88
|
+
termination_flags = {}
|
89
|
+
|
90
|
+
# Database setup
|
91
|
+
DB_PATH = 'research_history.db'
|
92
|
+
|
93
|
+
# Add Content Security Policy headers to allow Socket.IO to function
|
94
|
+
@app.after_request
|
95
|
+
def add_security_headers(response):
|
96
|
+
# Define a permissive CSP for development that allows Socket.IO to function
|
97
|
+
csp = (
|
98
|
+
"default-src 'self'; "
|
99
|
+
"connect-src 'self' ws: wss: http: https:; "
|
100
|
+
"script-src 'self' 'unsafe-inline' 'unsafe-eval' cdnjs.cloudflare.com cdn.jsdelivr.net unpkg.com; "
|
101
|
+
"style-src 'self' 'unsafe-inline' cdnjs.cloudflare.com; "
|
102
|
+
"font-src 'self' cdnjs.cloudflare.com; "
|
103
|
+
"img-src 'self' data:; "
|
104
|
+
"worker-src blob:; "
|
105
|
+
"frame-src 'self';"
|
106
|
+
)
|
107
|
+
|
108
|
+
response.headers['Content-Security-Policy'] = csp
|
109
|
+
response.headers['X-Content-Security-Policy'] = csp
|
110
|
+
|
111
|
+
# Add CORS headers for API requests
|
112
|
+
if request.path.startswith('/api/'):
|
113
|
+
response.headers['Access-Control-Allow-Origin'] = '*'
|
114
|
+
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, DELETE, OPTIONS'
|
115
|
+
response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
|
116
|
+
|
117
|
+
return response
|
118
|
+
|
119
|
+
# Add a middleware layer to handle abrupt disconnections
|
120
|
+
@app.before_request
|
121
|
+
def handle_websocket_requests():
|
122
|
+
if request.path.startswith('/research/socket.io'):
|
123
|
+
try:
|
124
|
+
if not request.environ.get('werkzeug.socket'):
|
125
|
+
return
|
126
|
+
except Exception as e:
|
127
|
+
print(f"WebSocket preprocessing error: {e}")
|
128
|
+
# Return empty response to prevent further processing
|
129
|
+
return '', 200
|
130
|
+
|
131
|
+
# Initialize the database
|
132
|
+
def init_db():
|
133
|
+
conn = sqlite3.connect(DB_PATH)
|
134
|
+
cursor = conn.cursor()
|
135
|
+
|
136
|
+
# Create the table if it doesn't exist
|
137
|
+
cursor.execute('''
|
138
|
+
CREATE TABLE IF NOT EXISTS research_history (
|
139
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
140
|
+
query TEXT NOT NULL,
|
141
|
+
mode TEXT NOT NULL,
|
142
|
+
status TEXT NOT NULL,
|
143
|
+
created_at TEXT NOT NULL,
|
144
|
+
completed_at TEXT,
|
145
|
+
duration_seconds INTEGER,
|
146
|
+
report_path TEXT,
|
147
|
+
metadata TEXT,
|
148
|
+
progress_log TEXT
|
149
|
+
)
|
150
|
+
''')
|
151
|
+
|
152
|
+
# Check if the duration_seconds column exists, add it if missing
|
153
|
+
cursor.execute('PRAGMA table_info(research_history)')
|
154
|
+
columns = [column[1] for column in cursor.fetchall()]
|
155
|
+
|
156
|
+
if 'duration_seconds' not in columns:
|
157
|
+
print("Adding missing 'duration_seconds' column to research_history table")
|
158
|
+
cursor.execute('ALTER TABLE research_history ADD COLUMN duration_seconds INTEGER')
|
159
|
+
|
160
|
+
conn.commit()
|
161
|
+
conn.close()
|
162
|
+
|
163
|
+
# Helper function to calculate duration between created_at and completed_at timestamps
|
164
|
+
def calculate_duration(created_at_str):
|
165
|
+
"""
|
166
|
+
Calculate duration in seconds between created_at timestamp and now.
|
167
|
+
Handles various timestamp formats and returns None if calculation fails.
|
168
|
+
"""
|
169
|
+
if not created_at_str:
|
170
|
+
return None
|
171
|
+
|
172
|
+
now = datetime.utcnow()
|
173
|
+
duration_seconds = None
|
174
|
+
|
175
|
+
try:
|
176
|
+
# Proper parsing of ISO format
|
177
|
+
if 'T' in created_at_str: # ISO format with T separator
|
178
|
+
start_time = datetime.fromisoformat(created_at_str)
|
179
|
+
else: # Older format without T
|
180
|
+
# Try different formats
|
181
|
+
try:
|
182
|
+
start_time = datetime.strptime(created_at_str, '%Y-%m-%d %H:%M:%S.%f')
|
183
|
+
except ValueError:
|
184
|
+
try:
|
185
|
+
start_time = datetime.strptime(created_at_str, '%Y-%m-%d %H:%M:%S')
|
186
|
+
except ValueError:
|
187
|
+
# Last resort fallback
|
188
|
+
start_time = datetime.fromisoformat(created_at_str.replace(' ', 'T'))
|
189
|
+
|
190
|
+
# Ensure we're comparing UTC times
|
191
|
+
duration_seconds = int((now - start_time).total_seconds())
|
192
|
+
except Exception as e:
|
193
|
+
print(f"Error calculating duration: {str(e)}")
|
194
|
+
# Fallback method if parsing fails
|
195
|
+
try:
|
196
|
+
start_time_fallback = parser.parse(created_at_str)
|
197
|
+
duration_seconds = int((now - start_time_fallback).total_seconds())
|
198
|
+
except:
|
199
|
+
print(f"Fallback duration calculation also failed for timestamp: {created_at_str}")
|
200
|
+
|
201
|
+
return duration_seconds
|
202
|
+
|
203
|
+
# Initialize the database on startup
|
204
|
+
def initialize():
|
205
|
+
init_db()
|
206
|
+
|
207
|
+
# Call initialize immediately when app is created
|
208
|
+
initialize()
|
209
|
+
|
210
|
+
# Route for index page - keep this at root level for easy access
|
211
|
+
@app.route('/')
|
212
|
+
def root_index():
|
213
|
+
return redirect(url_for('research.index'))
|
214
|
+
|
215
|
+
# Update all routes with the research prefix
|
216
|
+
@research_bp.route('/')
|
217
|
+
def index():
|
218
|
+
return render_template('index.html')
|
219
|
+
|
220
|
+
@research_bp.route('/static/<path:path>')
|
221
|
+
def serve_static(path):
|
222
|
+
try:
|
223
|
+
print(f"Serving static file: {path}")
|
224
|
+
print(f"Static folder path: {app.static_folder}")
|
225
|
+
return send_from_directory(app.static_folder, path)
|
226
|
+
except Exception as e:
|
227
|
+
print(f"Error serving static file {path}: {str(e)}")
|
228
|
+
return f"Error serving file: {str(e)}", 404
|
229
|
+
|
230
|
+
@research_bp.route('/api/history', methods=['GET'])
|
231
|
+
def get_history():
|
232
|
+
"""Get the research history"""
|
233
|
+
try:
|
234
|
+
conn = sqlite3.connect(DB_PATH)
|
235
|
+
conn.row_factory = sqlite3.Row
|
236
|
+
cursor = conn.cursor()
|
237
|
+
|
238
|
+
# Get all history records ordered by latest first
|
239
|
+
cursor.execute('SELECT * FROM research_history ORDER BY created_at DESC')
|
240
|
+
results = cursor.fetchall()
|
241
|
+
conn.close()
|
242
|
+
|
243
|
+
# Convert to list of dicts
|
244
|
+
history = []
|
245
|
+
for result in results:
|
246
|
+
item = dict(result)
|
247
|
+
|
248
|
+
# Ensure all keys exist with default values
|
249
|
+
if 'id' not in item:
|
250
|
+
item['id'] = None
|
251
|
+
if 'query' not in item:
|
252
|
+
item['query'] = 'Untitled Research'
|
253
|
+
if 'mode' not in item:
|
254
|
+
item['mode'] = 'quick'
|
255
|
+
if 'status' not in item:
|
256
|
+
item['status'] = 'unknown'
|
257
|
+
if 'created_at' not in item:
|
258
|
+
item['created_at'] = None
|
259
|
+
if 'completed_at' not in item:
|
260
|
+
item['completed_at'] = None
|
261
|
+
if 'duration_seconds' not in item:
|
262
|
+
item['duration_seconds'] = None
|
263
|
+
if 'report_path' not in item:
|
264
|
+
item['report_path'] = None
|
265
|
+
if 'metadata' not in item:
|
266
|
+
item['metadata'] = '{}'
|
267
|
+
if 'progress_log' not in item:
|
268
|
+
item['progress_log'] = '[]'
|
269
|
+
|
270
|
+
# Ensure timestamps are in ISO format
|
271
|
+
if item['created_at'] and 'T' not in item['created_at']:
|
272
|
+
try:
|
273
|
+
# Convert to ISO format if it's not already
|
274
|
+
dt = parser.parse(item['created_at'])
|
275
|
+
item['created_at'] = dt.isoformat()
|
276
|
+
except:
|
277
|
+
pass
|
278
|
+
|
279
|
+
if item['completed_at'] and 'T' not in item['completed_at']:
|
280
|
+
try:
|
281
|
+
# Convert to ISO format if it's not already
|
282
|
+
dt = parser.parse(item['completed_at'])
|
283
|
+
item['completed_at'] = dt.isoformat()
|
284
|
+
except:
|
285
|
+
pass
|
286
|
+
|
287
|
+
# Recalculate duration based on timestamps if it's null but both timestamps exist
|
288
|
+
if item['duration_seconds'] is None and item['created_at'] and item['completed_at']:
|
289
|
+
try:
|
290
|
+
start_time = parser.parse(item['created_at'])
|
291
|
+
end_time = parser.parse(item['completed_at'])
|
292
|
+
item['duration_seconds'] = int((end_time - start_time).total_seconds())
|
293
|
+
except Exception as e:
|
294
|
+
print(f"Error recalculating duration: {str(e)}")
|
295
|
+
|
296
|
+
history.append(item)
|
297
|
+
|
298
|
+
# Add CORS headers
|
299
|
+
response = make_response(jsonify(history))
|
300
|
+
response.headers.add('Access-Control-Allow-Origin', '*')
|
301
|
+
response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
|
302
|
+
response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
|
303
|
+
return response
|
304
|
+
except Exception as e:
|
305
|
+
print(f"Error getting history: {str(e)}")
|
306
|
+
print(traceback.format_exc())
|
307
|
+
# Return empty array with CORS headers
|
308
|
+
response = make_response(jsonify([]))
|
309
|
+
response.headers.add('Access-Control-Allow-Origin', '*')
|
310
|
+
response.headers.add('Access-Control-Allow-Headers', 'Content-Type,Authorization')
|
311
|
+
response.headers.add('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS')
|
312
|
+
return response
|
313
|
+
|
314
|
+
@research_bp.route('/api/start_research', methods=['POST'])
|
315
|
+
def start_research():
|
316
|
+
data = request.json
|
317
|
+
query = data.get('query')
|
318
|
+
mode = data.get('mode', 'quick')
|
319
|
+
|
320
|
+
if not query:
|
321
|
+
return jsonify({'status': 'error', 'message': 'Query is required'}), 400
|
322
|
+
|
323
|
+
# Check if there's any active research
|
324
|
+
if active_research:
|
325
|
+
return jsonify({
|
326
|
+
'status': 'error',
|
327
|
+
'message': 'Another research is already in progress. Please wait for it to complete.'
|
328
|
+
}), 409
|
329
|
+
|
330
|
+
# Create a record in the database with explicit UTC timestamp
|
331
|
+
created_at = datetime.utcnow().isoformat()
|
332
|
+
conn = sqlite3.connect(DB_PATH)
|
333
|
+
cursor = conn.cursor()
|
334
|
+
cursor.execute(
|
335
|
+
'INSERT INTO research_history (query, mode, status, created_at, progress_log) VALUES (?, ?, ?, ?, ?)',
|
336
|
+
(query, mode, 'in_progress', created_at, json.dumps([{"time": created_at, "message": "Research started", "progress": 0}]))
|
337
|
+
)
|
338
|
+
research_id = cursor.lastrowid
|
339
|
+
conn.commit()
|
340
|
+
conn.close()
|
341
|
+
|
342
|
+
# Start research process in a background thread
|
343
|
+
thread = threading.Thread(
|
344
|
+
target=run_research_process,
|
345
|
+
args=(research_id, query, mode)
|
346
|
+
)
|
347
|
+
thread.daemon = True
|
348
|
+
thread.start()
|
349
|
+
|
350
|
+
active_research[research_id] = {
|
351
|
+
'thread': thread,
|
352
|
+
'progress': 0,
|
353
|
+
'status': 'in_progress',
|
354
|
+
'log': [{"time": created_at, "message": "Research started", "progress": 0}]
|
355
|
+
}
|
356
|
+
|
357
|
+
return jsonify({
|
358
|
+
'status': 'success',
|
359
|
+
'research_id': research_id
|
360
|
+
})
|
361
|
+
|
362
|
+
@research_bp.route('/api/research/<int:research_id>')
|
363
|
+
def get_research_status(research_id):
|
364
|
+
conn = sqlite3.connect(DB_PATH)
|
365
|
+
conn.row_factory = sqlite3.Row
|
366
|
+
cursor = conn.cursor()
|
367
|
+
cursor.execute('SELECT * FROM research_history WHERE id = ?', (research_id,))
|
368
|
+
result = dict(cursor.fetchone() or {})
|
369
|
+
conn.close()
|
370
|
+
|
371
|
+
if not result:
|
372
|
+
return jsonify({'status': 'error', 'message': 'Research not found'}), 404
|
373
|
+
|
374
|
+
# Add progress information
|
375
|
+
if research_id in active_research:
|
376
|
+
result['progress'] = active_research[research_id]['progress']
|
377
|
+
result['log'] = active_research[research_id]['log']
|
378
|
+
elif result.get('status') == 'completed':
|
379
|
+
result['progress'] = 100
|
380
|
+
try:
|
381
|
+
result['log'] = json.loads(result.get('progress_log', '[]'))
|
382
|
+
except:
|
383
|
+
result['log'] = []
|
384
|
+
else:
|
385
|
+
result['progress'] = 0
|
386
|
+
try:
|
387
|
+
result['log'] = json.loads(result.get('progress_log', '[]'))
|
388
|
+
except:
|
389
|
+
result['log'] = []
|
390
|
+
|
391
|
+
return jsonify(result)
|
392
|
+
|
393
|
+
@research_bp.route('/api/research/<int:research_id>/details')
|
394
|
+
def get_research_details(research_id):
|
395
|
+
"""Get detailed progress log for a specific research"""
|
396
|
+
conn = sqlite3.connect(DB_PATH)
|
397
|
+
conn.row_factory = sqlite3.Row
|
398
|
+
cursor = conn.cursor()
|
399
|
+
cursor.execute('SELECT * FROM research_history WHERE id = ?', (research_id,))
|
400
|
+
result = dict(cursor.fetchone() or {})
|
401
|
+
conn.close()
|
402
|
+
|
403
|
+
if not result:
|
404
|
+
return jsonify({'status': 'error', 'message': 'Research not found'}), 404
|
405
|
+
|
406
|
+
try:
|
407
|
+
# Get the progress log
|
408
|
+
progress_log = json.loads(result.get('progress_log', '[]'))
|
409
|
+
except:
|
410
|
+
progress_log = []
|
411
|
+
|
412
|
+
# If this is an active research, get the latest log
|
413
|
+
if research_id in active_research:
|
414
|
+
progress_log = active_research[research_id]['log']
|
415
|
+
|
416
|
+
return jsonify({
|
417
|
+
'status': 'success',
|
418
|
+
'research_id': research_id,
|
419
|
+
'query': result.get('query'),
|
420
|
+
'mode': result.get('mode'),
|
421
|
+
'status': result.get('status'),
|
422
|
+
'progress': active_research.get(research_id, {}).get('progress', 100 if result.get('status') == 'completed' else 0),
|
423
|
+
'created_at': result.get('created_at'),
|
424
|
+
'completed_at': result.get('completed_at'),
|
425
|
+
'log': progress_log
|
426
|
+
})
|
427
|
+
|
428
|
+
@research_bp.route('/api/report/<int:research_id>')
|
429
|
+
def get_report(research_id):
|
430
|
+
conn = sqlite3.connect(DB_PATH)
|
431
|
+
conn.row_factory = sqlite3.Row
|
432
|
+
cursor = conn.cursor()
|
433
|
+
cursor.execute('SELECT * FROM research_history WHERE id = ?', (research_id,))
|
434
|
+
result = dict(cursor.fetchone() or {})
|
435
|
+
conn.close()
|
436
|
+
|
437
|
+
if not result or not result.get('report_path'):
|
438
|
+
return jsonify({'status': 'error', 'message': 'Report not found'}), 404
|
439
|
+
|
440
|
+
try:
|
441
|
+
with open(result['report_path'], 'r', encoding='utf-8') as f:
|
442
|
+
content = f.read()
|
443
|
+
return jsonify({
|
444
|
+
'status': 'success',
|
445
|
+
'content': content,
|
446
|
+
'metadata': json.loads(result.get('metadata', '{}'))
|
447
|
+
})
|
448
|
+
except Exception as e:
|
449
|
+
return jsonify({'status': 'error', 'message': str(e)}), 500
|
450
|
+
|
451
|
+
@research_bp.route('/research/details/<int:research_id>')
|
452
|
+
def research_details_page(research_id):
|
453
|
+
"""Render the research details page"""
|
454
|
+
return render_template('index.html')
|
455
|
+
|
456
|
+
@socketio.on('connect')
|
457
|
+
def handle_connect():
|
458
|
+
print(f"Client connected: {request.sid}")
|
459
|
+
|
460
|
+
@socketio.on('disconnect')
|
461
|
+
def handle_disconnect():
|
462
|
+
try:
|
463
|
+
print(f"Client disconnected: {request.sid}")
|
464
|
+
# Clean up subscriptions for this client
|
465
|
+
for research_id, subscribers in list(socket_subscriptions.items()):
|
466
|
+
if request.sid in subscribers:
|
467
|
+
subscribers.remove(request.sid)
|
468
|
+
if not subscribers:
|
469
|
+
socket_subscriptions.pop(research_id, None)
|
470
|
+
except Exception as e:
|
471
|
+
print(f"Error handling disconnect: {e}")
|
472
|
+
|
473
|
+
@socketio.on('subscribe_to_research')
|
474
|
+
def handle_subscribe(data):
|
475
|
+
research_id = data.get('research_id')
|
476
|
+
if research_id:
|
477
|
+
if research_id not in socket_subscriptions:
|
478
|
+
socket_subscriptions[research_id] = set()
|
479
|
+
socket_subscriptions[research_id].add(request.sid)
|
480
|
+
print(f"Client {request.sid} subscribed to research {research_id}")
|
481
|
+
|
482
|
+
# Send current status immediately if available
|
483
|
+
if research_id in active_research:
|
484
|
+
progress = active_research[research_id]['progress']
|
485
|
+
latest_log = active_research[research_id]['log'][-1] if active_research[research_id]['log'] else None
|
486
|
+
|
487
|
+
if latest_log:
|
488
|
+
emit(f'research_progress_{research_id}', {
|
489
|
+
'progress': progress,
|
490
|
+
'message': latest_log.get('message', 'Processing...'),
|
491
|
+
'status': 'in_progress',
|
492
|
+
'log_entry': latest_log
|
493
|
+
})
|
494
|
+
|
495
|
+
@socketio.on_error
|
496
|
+
def handle_socket_error(e):
|
497
|
+
print(f"Socket.IO error: {str(e)}")
|
498
|
+
# Don't propagate exceptions to avoid crashing the server
|
499
|
+
return False
|
500
|
+
|
501
|
+
@socketio.on_error_default
|
502
|
+
def handle_default_error(e):
|
503
|
+
print(f"Unhandled Socket.IO error: {str(e)}")
|
504
|
+
# Don't propagate exceptions to avoid crashing the server
|
505
|
+
return False
|
506
|
+
|
507
|
+
def run_research_process(research_id, query, mode):
|
508
|
+
try:
|
509
|
+
system = AdvancedSearchSystem()
|
510
|
+
|
511
|
+
# Set up progress callback
|
512
|
+
def progress_callback(message, progress_percent, metadata):
|
513
|
+
timestamp = datetime.utcnow().isoformat()
|
514
|
+
log_entry = {
|
515
|
+
"time": timestamp,
|
516
|
+
"message": message,
|
517
|
+
"progress": progress_percent,
|
518
|
+
"metadata": metadata
|
519
|
+
}
|
520
|
+
|
521
|
+
# Check if termination was requested
|
522
|
+
if research_id in termination_flags and termination_flags[research_id]:
|
523
|
+
# Clean up and exit
|
524
|
+
raise Exception("Research was terminated by user")
|
525
|
+
|
526
|
+
# Update active research record
|
527
|
+
if research_id in active_research:
|
528
|
+
active_research[research_id]['log'].append(log_entry)
|
529
|
+
if progress_percent is not None:
|
530
|
+
active_research[research_id]['progress'] = progress_percent
|
531
|
+
|
532
|
+
# Save to database (but not too frequently)
|
533
|
+
if progress_percent is None or progress_percent % 10 == 0 or metadata.get('phase') in ['complete', 'iteration_complete']:
|
534
|
+
conn = sqlite3.connect(DB_PATH)
|
535
|
+
cursor = conn.cursor()
|
536
|
+
cursor.execute(
|
537
|
+
'SELECT progress_log FROM research_history WHERE id = ?',
|
538
|
+
(research_id,)
|
539
|
+
)
|
540
|
+
result = cursor.fetchone()
|
541
|
+
if result:
|
542
|
+
try:
|
543
|
+
current_log = json.loads(result[0])
|
544
|
+
except:
|
545
|
+
current_log = []
|
546
|
+
current_log.append(log_entry)
|
547
|
+
cursor.execute(
|
548
|
+
'UPDATE research_history SET progress_log = ? WHERE id = ?',
|
549
|
+
(json.dumps(current_log), research_id)
|
550
|
+
)
|
551
|
+
conn.commit()
|
552
|
+
conn.close()
|
553
|
+
|
554
|
+
# Emit socket event with try/except block to handle connection issues
|
555
|
+
try:
|
556
|
+
event_data = {
|
557
|
+
'progress': progress_percent,
|
558
|
+
'message': message,
|
559
|
+
'status': 'in_progress',
|
560
|
+
'log_entry': log_entry
|
561
|
+
}
|
562
|
+
|
563
|
+
# Emit to the specific research channel
|
564
|
+
socketio.emit(f'research_progress_{research_id}', event_data)
|
565
|
+
|
566
|
+
# Also emit to specific subscribers if available
|
567
|
+
if research_id in socket_subscriptions and socket_subscriptions[research_id]:
|
568
|
+
for sid in socket_subscriptions[research_id]:
|
569
|
+
try:
|
570
|
+
socketio.emit(
|
571
|
+
f'research_progress_{research_id}',
|
572
|
+
event_data,
|
573
|
+
room=sid
|
574
|
+
)
|
575
|
+
except Exception as sub_err:
|
576
|
+
print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
|
577
|
+
|
578
|
+
except Exception as socket_error:
|
579
|
+
# Log socket error but continue with the research process
|
580
|
+
print(f"Socket emit error (non-critical): {str(socket_error)}")
|
581
|
+
|
582
|
+
# Set the progress callback in the system
|
583
|
+
system.set_progress_callback(progress_callback)
|
584
|
+
|
585
|
+
# Run the search
|
586
|
+
progress_callback("Starting research process", 5, {"phase": "init"})
|
587
|
+
results = system.analyze_topic(query)
|
588
|
+
progress_callback("Search complete, generating output", 80, {"phase": "output_generation"})
|
589
|
+
|
590
|
+
# Generate output based on mode
|
591
|
+
if mode == 'quick':
|
592
|
+
# Quick Summary
|
593
|
+
if results.get('findings'):
|
594
|
+
#initial_analysis = [finding['content'] for finding in results['findings']]
|
595
|
+
summary = ""
|
596
|
+
raw_formatted_findings = results['formatted_findings']
|
597
|
+
|
598
|
+
# ADDED CODE: Convert debug output to clean markdown
|
599
|
+
clean_markdown = convert_debug_to_markdown(raw_formatted_findings, query)
|
600
|
+
|
601
|
+
# Save as markdown file
|
602
|
+
output_dir = "research_outputs"
|
603
|
+
if not os.path.exists(output_dir):
|
604
|
+
os.makedirs(output_dir)
|
605
|
+
|
606
|
+
safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
|
607
|
+
safe_query = safe_query.replace(" ", "_").lower()
|
608
|
+
report_path = os.path.join(output_dir, f"quick_summary_{safe_query}.md")
|
609
|
+
|
610
|
+
with open(report_path, "w", encoding="utf-8") as f:
|
611
|
+
f.write("# Quick Research Summary\n\n")
|
612
|
+
f.write(f"Query: {query}\n\n")
|
613
|
+
f.write(clean_markdown) # Use clean markdown instead of raw findings
|
614
|
+
f.write("\n\n## Research Metrics\n")
|
615
|
+
f.write(f"- Search Iterations: {results['iterations']}\n")
|
616
|
+
f.write(f"- Generated at: {datetime.utcnow().isoformat()}\n")
|
617
|
+
|
618
|
+
# Update database
|
619
|
+
metadata = {
|
620
|
+
'iterations': results['iterations'],
|
621
|
+
'generated_at': datetime.utcnow().isoformat()
|
622
|
+
}
|
623
|
+
|
624
|
+
# Calculate duration in seconds - using UTC consistently
|
625
|
+
now = datetime.utcnow()
|
626
|
+
completed_at = now.isoformat()
|
627
|
+
|
628
|
+
# Get the start time from the database
|
629
|
+
conn = sqlite3.connect(DB_PATH)
|
630
|
+
cursor = conn.cursor()
|
631
|
+
cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
|
632
|
+
result = cursor.fetchone()
|
633
|
+
|
634
|
+
# Use the helper function for consistent duration calculation
|
635
|
+
duration_seconds = calculate_duration(result[0])
|
636
|
+
|
637
|
+
# Update the record
|
638
|
+
cursor.execute(
|
639
|
+
'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
|
640
|
+
('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
|
641
|
+
)
|
642
|
+
conn.commit()
|
643
|
+
conn.close()
|
644
|
+
|
645
|
+
progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
|
646
|
+
else:
|
647
|
+
# Full Report
|
648
|
+
progress_callback("Generating detailed report...", 85, {"phase": "report_generation"})
|
649
|
+
report_generator = IntegratedReportGenerator()
|
650
|
+
final_report = report_generator.generate_report(results, query)
|
651
|
+
progress_callback("Report generation complete", 95, {"phase": "report_complete"})
|
652
|
+
|
653
|
+
# Save as markdown file
|
654
|
+
output_dir = "research_outputs"
|
655
|
+
if not os.path.exists(output_dir):
|
656
|
+
os.makedirs(output_dir)
|
657
|
+
|
658
|
+
safe_query = "".join(x for x in query if x.isalnum() or x in [" ", "-", "_"])[:50]
|
659
|
+
safe_query = safe_query.replace(" ", "_").lower()
|
660
|
+
report_path = os.path.join(output_dir, f"detailed_report_{safe_query}.md")
|
661
|
+
|
662
|
+
with open(report_path, "w", encoding="utf-8") as f:
|
663
|
+
f.write(final_report['content'])
|
664
|
+
|
665
|
+
# Update database
|
666
|
+
metadata = final_report['metadata']
|
667
|
+
metadata['iterations'] = results['iterations']
|
668
|
+
|
669
|
+
# Calculate duration in seconds - using UTC consistently
|
670
|
+
now = datetime.utcnow()
|
671
|
+
completed_at = now.isoformat()
|
672
|
+
|
673
|
+
# Get the start time from the database
|
674
|
+
conn = sqlite3.connect(DB_PATH)
|
675
|
+
cursor = conn.cursor()
|
676
|
+
cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
|
677
|
+
result = cursor.fetchone()
|
678
|
+
|
679
|
+
# Use the helper function for consistent duration calculation
|
680
|
+
duration_seconds = calculate_duration(result[0])
|
681
|
+
|
682
|
+
cursor.execute(
|
683
|
+
'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, report_path = ?, metadata = ? WHERE id = ?',
|
684
|
+
('completed', completed_at, duration_seconds, report_path, json.dumps(metadata), research_id)
|
685
|
+
)
|
686
|
+
conn.commit()
|
687
|
+
conn.close()
|
688
|
+
|
689
|
+
progress_callback("Research completed successfully", 100, {"phase": "complete", "report_path": report_path})
|
690
|
+
|
691
|
+
# Clean up
|
692
|
+
if research_id in active_research:
|
693
|
+
del active_research[research_id]
|
694
|
+
|
695
|
+
except Exception as e:
|
696
|
+
# Handle error
|
697
|
+
error_message = f"Research failed: {str(e)}"
|
698
|
+
print(f"Research error: {error_message}")
|
699
|
+
try:
|
700
|
+
progress_callback(error_message, None, {"phase": "error", "error": str(e)})
|
701
|
+
|
702
|
+
conn = sqlite3.connect(DB_PATH)
|
703
|
+
cursor = conn.cursor()
|
704
|
+
|
705
|
+
# If termination was requested, mark as suspended instead of failed
|
706
|
+
status = 'suspended' if (research_id in termination_flags and termination_flags[research_id]) else 'failed'
|
707
|
+
message = "Research was terminated by user" if status == 'suspended' else str(e)
|
708
|
+
|
709
|
+
# Calculate duration up to termination point - using UTC consistently
|
710
|
+
now = datetime.utcnow()
|
711
|
+
completed_at = now.isoformat()
|
712
|
+
|
713
|
+
# Get the start time from the database
|
714
|
+
duration_seconds = None
|
715
|
+
cursor.execute('SELECT created_at FROM research_history WHERE id = ?', (research_id,))
|
716
|
+
result = cursor.fetchone()
|
717
|
+
|
718
|
+
# Use the helper function for consistent duration calculation
|
719
|
+
if result and result[0]:
|
720
|
+
duration_seconds = calculate_duration(result[0])
|
721
|
+
|
722
|
+
cursor.execute(
|
723
|
+
'UPDATE research_history SET status = ?, completed_at = ?, duration_seconds = ?, metadata = ? WHERE id = ?',
|
724
|
+
(status, completed_at, duration_seconds, json.dumps({'error': message}), research_id)
|
725
|
+
)
|
726
|
+
conn.commit()
|
727
|
+
conn.close()
|
728
|
+
|
729
|
+
try:
|
730
|
+
socketio.emit(f'research_progress_{research_id}', {
|
731
|
+
'status': status,
|
732
|
+
'error': message
|
733
|
+
})
|
734
|
+
|
735
|
+
# Also notify specific subscribers
|
736
|
+
if research_id in socket_subscriptions and socket_subscriptions[research_id]:
|
737
|
+
for sid in socket_subscriptions[research_id]:
|
738
|
+
try:
|
739
|
+
socketio.emit(
|
740
|
+
f'research_progress_{research_id}',
|
741
|
+
{'status': status, 'error': message},
|
742
|
+
room=sid
|
743
|
+
)
|
744
|
+
except Exception as sub_err:
|
745
|
+
print(f"Error emitting to subscriber {sid}: {str(sub_err)}")
|
746
|
+
|
747
|
+
except Exception as socket_error:
|
748
|
+
print(f"Failed to emit error via socket: {str(socket_error)}")
|
749
|
+
except Exception as inner_e:
|
750
|
+
print(f"Error in error handler: {str(inner_e)}")
|
751
|
+
|
752
|
+
# Clean up resources
|
753
|
+
if research_id in active_research:
|
754
|
+
del active_research[research_id]
|
755
|
+
if research_id in termination_flags:
|
756
|
+
del termination_flags[research_id]
|
757
|
+
|
758
|
+
@research_bp.route('/api/research/<int:research_id>/terminate', methods=['POST'])
|
759
|
+
def terminate_research(research_id):
|
760
|
+
"""Terminate an in-progress research process"""
|
761
|
+
|
762
|
+
# Check if the research exists and is in progress
|
763
|
+
conn = sqlite3.connect(DB_PATH)
|
764
|
+
cursor = conn.cursor()
|
765
|
+
cursor.execute('SELECT status FROM research_history WHERE id = ?', (research_id,))
|
766
|
+
result = cursor.fetchone()
|
767
|
+
|
768
|
+
if not result:
|
769
|
+
conn.close()
|
770
|
+
return jsonify({'status': 'error', 'message': 'Research not found'}), 404
|
771
|
+
|
772
|
+
status = result[0]
|
773
|
+
|
774
|
+
# If it's not in progress, return an error
|
775
|
+
if status != 'in_progress':
|
776
|
+
conn.close()
|
777
|
+
return jsonify({'status': 'error', 'message': 'Research is not in progress'}), 400
|
778
|
+
|
779
|
+
# Check if it's in the active_research dict
|
780
|
+
if research_id not in active_research:
|
781
|
+
# Update the status in the database
|
782
|
+
cursor.execute('UPDATE research_history SET status = ? WHERE id = ?', ('suspended', research_id))
|
783
|
+
conn.commit()
|
784
|
+
conn.close()
|
785
|
+
return jsonify({'status': 'success', 'message': 'Research terminated'})
|
786
|
+
|
787
|
+
# Set the termination flag
|
788
|
+
termination_flags[research_id] = True
|
789
|
+
|
790
|
+
# Log the termination request - using UTC timestamp
|
791
|
+
timestamp = datetime.utcnow().isoformat()
|
792
|
+
log_entry = {
|
793
|
+
"time": timestamp,
|
794
|
+
"message": "Research termination requested by user",
|
795
|
+
"progress": active_research[research_id]['progress'],
|
796
|
+
"metadata": {"phase": "termination"}
|
797
|
+
}
|
798
|
+
|
799
|
+
active_research[research_id]['log'].append(log_entry)
|
800
|
+
|
801
|
+
# Update the log in the database
|
802
|
+
cursor.execute('SELECT progress_log FROM research_history WHERE id = ?', (research_id,))
|
803
|
+
log_result = cursor.fetchone()
|
804
|
+
if log_result:
|
805
|
+
try:
|
806
|
+
current_log = json.loads(log_result[0])
|
807
|
+
except:
|
808
|
+
current_log = []
|
809
|
+
current_log.append(log_entry)
|
810
|
+
cursor.execute(
|
811
|
+
'UPDATE research_history SET progress_log = ? WHERE id = ?',
|
812
|
+
(json.dumps(current_log), research_id)
|
813
|
+
)
|
814
|
+
|
815
|
+
conn.commit()
|
816
|
+
conn.close()
|
817
|
+
|
818
|
+
# Emit a socket event for the termination request
|
819
|
+
try:
|
820
|
+
event_data = {
|
821
|
+
'status': 'terminating',
|
822
|
+
'message': 'Research termination requested by user'
|
823
|
+
}
|
824
|
+
|
825
|
+
socketio.emit(f'research_progress_{research_id}', event_data)
|
826
|
+
|
827
|
+
if research_id in socket_subscriptions and socket_subscriptions[research_id]:
|
828
|
+
for sid in socket_subscriptions[research_id]:
|
829
|
+
try:
|
830
|
+
socketio.emit(
|
831
|
+
f'research_progress_{research_id}',
|
832
|
+
event_data,
|
833
|
+
room=sid
|
834
|
+
)
|
835
|
+
except Exception as err:
|
836
|
+
print(f"Error emitting to subscriber {sid}: {str(err)}")
|
837
|
+
|
838
|
+
except Exception as socket_error:
|
839
|
+
print(f"Socket emit error (non-critical): {str(socket_error)}")
|
840
|
+
|
841
|
+
return jsonify({'status': 'success', 'message': 'Research termination requested'})
|
842
|
+
|
843
|
+
@research_bp.route('/api/research/<int:research_id>/delete', methods=['DELETE'])
|
844
|
+
def delete_research(research_id):
|
845
|
+
"""Delete a research record"""
|
846
|
+
conn = sqlite3.connect(DB_PATH)
|
847
|
+
cursor = conn.cursor()
|
848
|
+
|
849
|
+
# First check if the research exists and is not in progress
|
850
|
+
cursor.execute('SELECT status, report_path FROM research_history WHERE id = ?', (research_id,))
|
851
|
+
result = cursor.fetchone()
|
852
|
+
|
853
|
+
if not result:
|
854
|
+
conn.close()
|
855
|
+
return jsonify({'status': 'error', 'message': 'Research not found'}), 404
|
856
|
+
|
857
|
+
status, report_path = result
|
858
|
+
|
859
|
+
# Don't allow deleting research in progress
|
860
|
+
if status == 'in_progress' and research_id in active_research:
|
861
|
+
conn.close()
|
862
|
+
return jsonify({
|
863
|
+
'status': 'error',
|
864
|
+
'message': 'Cannot delete research that is in progress'
|
865
|
+
}), 400
|
866
|
+
|
867
|
+
# Delete report file if it exists
|
868
|
+
if report_path and os.path.exists(report_path):
|
869
|
+
try:
|
870
|
+
os.remove(report_path)
|
871
|
+
except Exception as e:
|
872
|
+
print(f"Error removing report file: {str(e)}")
|
873
|
+
|
874
|
+
# Delete the database record
|
875
|
+
cursor.execute('DELETE FROM research_history WHERE id = ?', (research_id,))
|
876
|
+
conn.commit()
|
877
|
+
conn.close()
|
878
|
+
|
879
|
+
return jsonify({'status': 'success'})
|
880
|
+
|
881
|
+
# Main settings page that links to specialized config pages
|
882
|
+
@research_bp.route('/settings', methods=['GET'])
|
883
|
+
def settings_page():
|
884
|
+
"""Main settings dashboard with links to specialized config pages"""
|
885
|
+
return render_template('settings_dashboard.html')
|
886
|
+
|
887
|
+
@research_bp.route('/settings/main', methods=['GET'])
|
888
|
+
def main_config_page():
|
889
|
+
"""Edit main configuration with search parameters"""
|
890
|
+
return render_template('main_config.html', main_file_path=MAIN_CONFIG_FILE)
|
891
|
+
|
892
|
+
@research_bp.route('/settings/llm', methods=['GET'])
|
893
|
+
def llm_config_page():
|
894
|
+
"""Edit LLM configuration using raw file editor"""
|
895
|
+
return render_template('llm_config.html', llm_file_path=LLM_CONFIG_FILE)
|
896
|
+
|
897
|
+
@research_bp.route('/settings/collections', methods=['GET'])
|
898
|
+
def collections_config_page():
|
899
|
+
"""Edit local collections configuration using raw file editor"""
|
900
|
+
return render_template('collections_config.html', collections_file_path=LOCAL_COLLECTIONS_FILE)
|
901
|
+
|
902
|
+
@research_bp.route('/settings/api_keys', methods=['GET'])
|
903
|
+
def api_keys_config_page():
|
904
|
+
"""Edit API keys configuration"""
|
905
|
+
# Get the secrets file path
|
906
|
+
secrets_file = CONFIG_DIR / ".secrets.toml"
|
907
|
+
|
908
|
+
return render_template('api_keys_config.html', secrets_file_path=secrets_file)
|
909
|
+
# Add to the imports section
|
910
|
+
from local_deep_research.config import SEARCH_ENGINES_FILE
|
911
|
+
|
912
|
+
# Add a new route for search engines configuration page
|
913
|
+
@research_bp.route('/settings/search_engines', methods=['GET'])
|
914
|
+
def search_engines_config_page():
|
915
|
+
"""Edit search engines configuration using raw file editor"""
|
916
|
+
# Read the current config file
|
917
|
+
raw_config = ""
|
918
|
+
try:
|
919
|
+
with open(SEARCH_ENGINES_FILE, 'r') as f:
|
920
|
+
raw_config = f.read()
|
921
|
+
except Exception as e:
|
922
|
+
flash(f'Error reading search engines configuration: {str(e)}', 'error')
|
923
|
+
raw_config = "# Error reading configuration file"
|
924
|
+
|
925
|
+
# Get list of engine names for display
|
926
|
+
engine_names = []
|
927
|
+
try:
|
928
|
+
from local_deep_research.web_search_engines.search_engines_config import SEARCH_ENGINES
|
929
|
+
engine_names = list(SEARCH_ENGINES.keys())
|
930
|
+
engine_names.sort() # Alphabetical order
|
931
|
+
except Exception as e:
|
932
|
+
logger.error(f"Error getting engine names: {e}")
|
933
|
+
|
934
|
+
return render_template('search_engines_config.html',
|
935
|
+
search_engines_file_path=SEARCH_ENGINES_FILE,
|
936
|
+
raw_config=raw_config,
|
937
|
+
engine_names=engine_names)
|
938
|
+
|
939
|
+
# Add a route to save search engines configuration
|
940
|
+
@research_bp.route('/api/save_search_engines_config', methods=['POST'])
|
941
|
+
def save_search_engines_config():
|
942
|
+
try:
|
943
|
+
data = request.get_json()
|
944
|
+
raw_config = data.get('raw_config', '')
|
945
|
+
|
946
|
+
# Validate TOML syntax
|
947
|
+
try:
|
948
|
+
toml.loads(raw_config)
|
949
|
+
except toml.TomlDecodeError as e:
|
950
|
+
return jsonify({'success': False, 'error': f'TOML syntax error: {str(e)}'})
|
951
|
+
|
952
|
+
# Ensure directory exists
|
953
|
+
os.makedirs(os.path.dirname(SEARCH_ENGINES_FILE), exist_ok=True)
|
954
|
+
|
955
|
+
# Create a backup first
|
956
|
+
backup_path = f"{SEARCH_ENGINES_FILE}.bak"
|
957
|
+
if os.path.exists(SEARCH_ENGINES_FILE):
|
958
|
+
import shutil
|
959
|
+
shutil.copy2(SEARCH_ENGINES_FILE, backup_path)
|
960
|
+
|
961
|
+
# Write new config
|
962
|
+
with open(SEARCH_ENGINES_FILE, 'w') as f:
|
963
|
+
f.write(raw_config)
|
964
|
+
|
965
|
+
return jsonify({'success': True})
|
966
|
+
except Exception as e:
|
967
|
+
return jsonify({'success': False, 'error': str(e)})
|
968
|
+
|
969
|
+
|
970
|
+
# API endpoint to save raw LLM config
|
971
|
+
@research_bp.route('/api/save_llm_config', methods=['POST'])
|
972
|
+
def save_llm_config():
|
973
|
+
try:
|
974
|
+
data = request.get_json()
|
975
|
+
raw_config = data.get('raw_config', '')
|
976
|
+
|
977
|
+
# Validate Python syntax
|
978
|
+
try:
|
979
|
+
compile(raw_config, '<string>', 'exec')
|
980
|
+
except SyntaxError as e:
|
981
|
+
return jsonify({'success': False, 'error': f'Syntax error: {str(e)}'})
|
982
|
+
|
983
|
+
# Ensure directory exists
|
984
|
+
os.makedirs(os.path.dirname(LLM_CONFIG_FILE), exist_ok=True)
|
985
|
+
|
986
|
+
# Create a backup first
|
987
|
+
backup_path = f"{LLM_CONFIG_FILE}.bak"
|
988
|
+
if os.path.exists(LLM_CONFIG_FILE):
|
989
|
+
import shutil
|
990
|
+
shutil.copy2(LLM_CONFIG_FILE, backup_path)
|
991
|
+
|
992
|
+
# Write new config
|
993
|
+
with open(LLM_CONFIG_FILE, 'w') as f:
|
994
|
+
f.write(raw_config)
|
995
|
+
|
996
|
+
return jsonify({'success': True})
|
997
|
+
except Exception as e:
|
998
|
+
return jsonify({'success': False, 'error': str(e)})
|
999
|
+
|
1000
|
+
# API endpoint to save raw collections config
|
1001
|
+
@research_bp.route('/api/save_collections_config', methods=['POST'])
|
1002
|
+
def save_collections_config():
|
1003
|
+
try:
|
1004
|
+
data = request.get_json()
|
1005
|
+
raw_config = data.get('raw_config', '')
|
1006
|
+
|
1007
|
+
# Validate TOML syntax
|
1008
|
+
try:
|
1009
|
+
toml.loads(raw_config)
|
1010
|
+
except toml.TomlDecodeError as e:
|
1011
|
+
return jsonify({'success': False, 'error': f'TOML syntax error: {str(e)}'})
|
1012
|
+
|
1013
|
+
# Ensure directory exists
|
1014
|
+
os.makedirs(os.path.dirname(LOCAL_COLLECTIONS_FILE), exist_ok=True)
|
1015
|
+
|
1016
|
+
# Create a backup first
|
1017
|
+
backup_path = f"{LOCAL_COLLECTIONS_FILE}.bak"
|
1018
|
+
if os.path.exists(LOCAL_COLLECTIONS_FILE):
|
1019
|
+
import shutil
|
1020
|
+
shutil.copy2(LOCAL_COLLECTIONS_FILE, backup_path)
|
1021
|
+
|
1022
|
+
# Write new config
|
1023
|
+
with open(LOCAL_COLLECTIONS_FILE, 'w') as f:
|
1024
|
+
f.write(raw_config)
|
1025
|
+
|
1026
|
+
# Also trigger a reload in the collections system
|
1027
|
+
try:
|
1028
|
+
load_local_collections(reload=True)
|
1029
|
+
except Exception as reload_error:
|
1030
|
+
return jsonify({'success': True, 'warning': f'Config saved, but error reloading: {str(reload_error)}'})
|
1031
|
+
|
1032
|
+
return jsonify({'success': True})
|
1033
|
+
except Exception as e:
|
1034
|
+
return jsonify({'success': False, 'error': str(e)})
|
1035
|
+
|
1036
|
+
# API endpoint to save raw main config
|
1037
|
+
@research_bp.route('/api/save_main_config', methods=['POST'])
|
1038
|
+
def save_raw_main_config():
|
1039
|
+
try:
|
1040
|
+
data = request.get_json()
|
1041
|
+
raw_config = data.get('raw_config', '')
|
1042
|
+
|
1043
|
+
# Validate TOML syntax
|
1044
|
+
try:
|
1045
|
+
toml.loads(raw_config)
|
1046
|
+
except toml.TomlDecodeError as e:
|
1047
|
+
return jsonify({'success': False, 'error': f'TOML syntax error: {str(e)}'})
|
1048
|
+
|
1049
|
+
# Ensure directory exists
|
1050
|
+
os.makedirs(os.path.dirname(MAIN_CONFIG_FILE), exist_ok=True)
|
1051
|
+
|
1052
|
+
# Create a backup first
|
1053
|
+
backup_path = f"{MAIN_CONFIG_FILE}.bak"
|
1054
|
+
if os.path.exists(MAIN_CONFIG_FILE):
|
1055
|
+
import shutil
|
1056
|
+
shutil.copy2(MAIN_CONFIG_FILE, backup_path)
|
1057
|
+
|
1058
|
+
# Write new config
|
1059
|
+
with open(MAIN_CONFIG_FILE, 'w') as f:
|
1060
|
+
f.write(raw_config)
|
1061
|
+
|
1062
|
+
return jsonify({'success': True})
|
1063
|
+
except Exception as e:
|
1064
|
+
return jsonify({'success': False, 'error': str(e)})
|
1065
|
+
@research_bp.route('/raw_config')
|
1066
|
+
def get_raw_config():
|
1067
|
+
"""Return the raw configuration file content"""
|
1068
|
+
try:
|
1069
|
+
# Determine which config file to load based on a query parameter
|
1070
|
+
config_type = request.args.get('type', 'main')
|
1071
|
+
|
1072
|
+
if config_type == 'main':
|
1073
|
+
config_path = os.path.join(app.config['CONFIG_DIR'], 'config.toml')
|
1074
|
+
with open(config_path, 'r') as f:
|
1075
|
+
return f.read()
|
1076
|
+
elif config_type == 'llm':
|
1077
|
+
config_path = os.path.join(app.config['CONFIG_DIR'], 'llm_config.py')
|
1078
|
+
with open(config_path, 'r') as f:
|
1079
|
+
return f.read()
|
1080
|
+
elif config_type == 'collections':
|
1081
|
+
config_path = os.path.join(app.config['CONFIG_DIR'], 'collections.toml')
|
1082
|
+
with open(config_path, 'r') as f:
|
1083
|
+
return f.read()
|
1084
|
+
else:
|
1085
|
+
return "Unknown configuration type", 400
|
1086
|
+
except Exception as e:
|
1087
|
+
return str(e), 500
|
1088
|
+
import os
|
1089
|
+
import subprocess
|
1090
|
+
import platform
|
1091
|
+
|
1092
|
+
@research_bp.route('/open_file_location', methods=['POST'])
|
1093
|
+
def open_file_location():
|
1094
|
+
file_path = request.form.get('file_path')
|
1095
|
+
|
1096
|
+
if not file_path:
|
1097
|
+
flash('No file path provided', 'error')
|
1098
|
+
return redirect(url_for('research.settings_page'))
|
1099
|
+
|
1100
|
+
# Get the directory containing the file
|
1101
|
+
dir_path = os.path.dirname(os.path.abspath(file_path))
|
1102
|
+
|
1103
|
+
# Open the directory in the file explorer
|
1104
|
+
try:
|
1105
|
+
if platform.system() == "Windows":
|
1106
|
+
subprocess.Popen(f'explorer "{dir_path}"')
|
1107
|
+
elif platform.system() == "Darwin": # macOS
|
1108
|
+
subprocess.Popen(["open", dir_path])
|
1109
|
+
else: # Linux
|
1110
|
+
subprocess.Popen(["xdg-open", dir_path])
|
1111
|
+
|
1112
|
+
flash(f'Opening folder: {dir_path}', 'success')
|
1113
|
+
except Exception as e:
|
1114
|
+
flash(f'Error opening folder: {str(e)}', 'error')
|
1115
|
+
|
1116
|
+
# Redirect back to the settings page
|
1117
|
+
if 'llm' in file_path:
|
1118
|
+
return redirect(url_for('research.llm_config_page'))
|
1119
|
+
elif 'collections' in file_path:
|
1120
|
+
return redirect(url_for('research.collections_config_page'))
|
1121
|
+
else:
|
1122
|
+
return redirect(url_for('research.main_config_page'))
|
1123
|
+
# Register the blueprint
|
1124
|
+
app.register_blueprint(research_bp)
|
1125
|
+
|
1126
|
+
# Also add the static route at the app level for compatibility
|
1127
|
+
@app.route('/static/<path:path>')
|
1128
|
+
def app_serve_static(path):
|
1129
|
+
return send_from_directory(app.static_folder, path)
|
1130
|
+
|
1131
|
+
# Add favicon route to prevent 404 errors
|
1132
|
+
@app.route('/favicon.ico')
|
1133
|
+
def favicon():
|
1134
|
+
return send_from_directory(app.static_folder, 'favicon.ico', mimetype='image/x-icon')
|
1135
|
+
|
1136
|
+
|
1137
|
+
# Add this function to app.py
|
1138
|
+
def convert_debug_to_markdown(raw_text, query):
|
1139
|
+
"""
|
1140
|
+
Convert the debug-formatted text to clean markdown.
|
1141
|
+
|
1142
|
+
Args:
|
1143
|
+
raw_text: The raw formatted findings with debug symbols
|
1144
|
+
query: Original research query
|
1145
|
+
|
1146
|
+
Returns:
|
1147
|
+
Clean markdown formatted text
|
1148
|
+
"""
|
1149
|
+
# If there's a "DETAILED FINDINGS:" section, extract everything after it
|
1150
|
+
if "DETAILED FINDINGS:" in raw_text:
|
1151
|
+
detailed_index = raw_text.index("DETAILED FINDINGS:")
|
1152
|
+
content = raw_text[detailed_index + len("DETAILED FINDINGS:"):].strip()
|
1153
|
+
else:
|
1154
|
+
content = raw_text
|
1155
|
+
|
1156
|
+
# Remove divider lines with === symbols
|
1157
|
+
content = "\n".join([line for line in content.split("\n")
|
1158
|
+
if not line.strip().startswith("===") and not line.strip() == "="*80])
|
1159
|
+
|
1160
|
+
# If COMPLETE RESEARCH OUTPUT exists, remove that section
|
1161
|
+
if "COMPLETE RESEARCH OUTPUT" in content:
|
1162
|
+
content = content.split("COMPLETE RESEARCH OUTPUT")[0].strip()
|
1163
|
+
|
1164
|
+
# Remove SEARCH QUESTIONS BY ITERATION section
|
1165
|
+
if "SEARCH QUESTIONS BY ITERATION:" in content:
|
1166
|
+
search_index = content.index("SEARCH QUESTIONS BY ITERATION:")
|
1167
|
+
next_major_section = -1
|
1168
|
+
for marker in ["DETAILED FINDINGS:", "COMPLETE RESEARCH:"]:
|
1169
|
+
if marker in content[search_index:]:
|
1170
|
+
marker_pos = content.index(marker, search_index)
|
1171
|
+
if next_major_section == -1 or marker_pos < next_major_section:
|
1172
|
+
next_major_section = marker_pos
|
1173
|
+
|
1174
|
+
if next_major_section != -1:
|
1175
|
+
content = content[:search_index] + content[next_major_section:]
|
1176
|
+
else:
|
1177
|
+
# If no later section, just remove everything from SEARCH QUESTIONS onwards
|
1178
|
+
content = content[:search_index].strip()
|
1179
|
+
|
1180
|
+
return content.strip()
|
1181
|
+
def main():
|
1182
|
+
"""
|
1183
|
+
Entry point for the web application when run as a command.
|
1184
|
+
This function is needed for the package's entry point to work properly.
|
1185
|
+
"""
|
1186
|
+
|
1187
|
+
# Check for OpenAI availability but don't import it unless necessary
|
1188
|
+
try:
|
1189
|
+
import os
|
1190
|
+
api_key = os.environ.get("OPENAI_API_KEY")
|
1191
|
+
if api_key:
|
1192
|
+
try:
|
1193
|
+
# Only try to import if we have an API key
|
1194
|
+
import openai
|
1195
|
+
openai.api_key = api_key
|
1196
|
+
OPENAI_AVAILABLE = True
|
1197
|
+
print("OpenAI integration is available")
|
1198
|
+
except ImportError:
|
1199
|
+
print("OpenAI package not installed, integration disabled")
|
1200
|
+
else:
|
1201
|
+
print("OPENAI_API_KEY not found in environment variables, OpenAI integration disabled")
|
1202
|
+
except Exception as e:
|
1203
|
+
print(f"Error checking OpenAI availability: {e}")
|
1204
|
+
|
1205
|
+
|
1206
|
+
socketio.run(app, debug=True, host='0.0.0.0', port=5000, allow_unsafe_werkzeug=True)
|
1207
|
+
|
1208
|
+
if __name__ == '__main__':
|
1209
|
+
main()
|