local-deep-research 0.3.12__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__version__.py +1 -1
- local_deep_research/advanced_search_system/filters/base_filter.py +2 -3
- local_deep_research/advanced_search_system/filters/cross_engine_filter.py +4 -5
- local_deep_research/advanced_search_system/filters/journal_reputation_filter.py +298 -0
- local_deep_research/advanced_search_system/findings/repository.py +0 -3
- local_deep_research/advanced_search_system/strategies/base_strategy.py +1 -2
- local_deep_research/advanced_search_system/strategies/iterdrag_strategy.py +14 -18
- local_deep_research/advanced_search_system/strategies/parallel_search_strategy.py +4 -8
- local_deep_research/advanced_search_system/strategies/rapid_search_strategy.py +5 -6
- local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -2
- local_deep_research/advanced_search_system/strategies/standard_strategy.py +9 -7
- local_deep_research/api/benchmark_functions.py +288 -0
- local_deep_research/api/research_functions.py +8 -4
- local_deep_research/benchmarks/README.md +162 -0
- local_deep_research/benchmarks/__init__.py +51 -0
- local_deep_research/benchmarks/benchmark_functions.py +353 -0
- local_deep_research/benchmarks/cli/__init__.py +16 -0
- local_deep_research/benchmarks/cli/benchmark_commands.py +338 -0
- local_deep_research/benchmarks/cli.py +347 -0
- local_deep_research/benchmarks/comparison/__init__.py +12 -0
- local_deep_research/benchmarks/comparison/evaluator.py +768 -0
- local_deep_research/benchmarks/datasets/__init__.py +53 -0
- local_deep_research/benchmarks/datasets/base.py +295 -0
- local_deep_research/benchmarks/datasets/browsecomp.py +116 -0
- local_deep_research/benchmarks/datasets/custom_dataset_template.py +98 -0
- local_deep_research/benchmarks/datasets/simpleqa.py +74 -0
- local_deep_research/benchmarks/datasets/utils.py +116 -0
- local_deep_research/benchmarks/datasets.py +31 -0
- local_deep_research/benchmarks/efficiency/__init__.py +14 -0
- local_deep_research/benchmarks/efficiency/resource_monitor.py +367 -0
- local_deep_research/benchmarks/efficiency/speed_profiler.py +214 -0
- local_deep_research/benchmarks/evaluators/__init__.py +18 -0
- local_deep_research/benchmarks/evaluators/base.py +74 -0
- local_deep_research/benchmarks/evaluators/browsecomp.py +83 -0
- local_deep_research/benchmarks/evaluators/composite.py +121 -0
- local_deep_research/benchmarks/evaluators/simpleqa.py +271 -0
- local_deep_research/benchmarks/graders.py +410 -0
- local_deep_research/benchmarks/metrics/README.md +80 -0
- local_deep_research/benchmarks/metrics/__init__.py +24 -0
- local_deep_research/benchmarks/metrics/calculation.py +385 -0
- local_deep_research/benchmarks/metrics/reporting.py +155 -0
- local_deep_research/benchmarks/metrics/visualization.py +205 -0
- local_deep_research/benchmarks/metrics.py +11 -0
- local_deep_research/benchmarks/optimization/__init__.py +32 -0
- local_deep_research/benchmarks/optimization/api.py +274 -0
- local_deep_research/benchmarks/optimization/metrics.py +20 -0
- local_deep_research/benchmarks/optimization/optuna_optimizer.py +1163 -0
- local_deep_research/benchmarks/runners.py +434 -0
- local_deep_research/benchmarks/templates.py +65 -0
- local_deep_research/config/llm_config.py +26 -23
- local_deep_research/config/search_config.py +1 -5
- local_deep_research/defaults/default_settings.json +108 -7
- local_deep_research/search_system.py +16 -8
- local_deep_research/utilities/db_utils.py +3 -6
- local_deep_research/utilities/es_utils.py +441 -0
- local_deep_research/utilities/log_utils.py +36 -0
- local_deep_research/utilities/search_utilities.py +8 -9
- local_deep_research/web/app.py +7 -9
- local_deep_research/web/app_factory.py +9 -12
- local_deep_research/web/database/migrations.py +8 -5
- local_deep_research/web/database/models.py +20 -0
- local_deep_research/web/database/schema_upgrade.py +5 -8
- local_deep_research/web/models/database.py +15 -18
- local_deep_research/web/routes/benchmark_routes.py +427 -0
- local_deep_research/web/routes/research_routes.py +13 -17
- local_deep_research/web/routes/settings_routes.py +264 -67
- local_deep_research/web/services/research_service.py +47 -57
- local_deep_research/web/services/settings_manager.py +1 -4
- local_deep_research/web/services/settings_service.py +4 -6
- local_deep_research/web/static/css/styles.css +12 -0
- local_deep_research/web/static/js/components/logpanel.js +164 -155
- local_deep_research/web/static/js/components/research.js +44 -3
- local_deep_research/web/static/js/components/settings.js +27 -0
- local_deep_research/web/static/js/services/socket.js +47 -0
- local_deep_research/web_search_engines/default_search_engines.py +38 -0
- local_deep_research/web_search_engines/engines/meta_search_engine.py +100 -33
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +31 -17
- local_deep_research/web_search_engines/engines/search_engine_brave.py +8 -3
- local_deep_research/web_search_engines/engines/search_engine_elasticsearch.py +343 -0
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +14 -6
- local_deep_research/web_search_engines/engines/search_engine_local.py +19 -23
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +9 -12
- local_deep_research/web_search_engines/engines/search_engine_searxng.py +12 -17
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +8 -4
- local_deep_research/web_search_engines/search_engine_base.py +22 -5
- local_deep_research/web_search_engines/search_engine_factory.py +32 -11
- local_deep_research/web_search_engines/search_engines_config.py +14 -1
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/METADATA +10 -2
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/RECORD +92 -49
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/WHEEL +0 -0
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/entry_points.txt +0 -0
- {local_deep_research-0.3.12.dist-info → local_deep_research-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,427 @@
|
|
1
|
+
"""
|
2
|
+
Web routes for benchmarking.
|
3
|
+
|
4
|
+
This module provides Flask routes for the benchmark web interface.
|
5
|
+
"""
|
6
|
+
|
7
|
+
import logging
|
8
|
+
import os
|
9
|
+
import threading
|
10
|
+
|
11
|
+
from flask import Blueprint, jsonify, render_template, request
|
12
|
+
|
13
|
+
from ...api.benchmark_functions import (
|
14
|
+
compare_configurations,
|
15
|
+
evaluate_browsecomp,
|
16
|
+
evaluate_simpleqa,
|
17
|
+
get_available_benchmarks,
|
18
|
+
)
|
19
|
+
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
# Create blueprint
|
23
|
+
benchmark_bp = Blueprint("benchmark", __name__, url_prefix="/benchmark")
|
24
|
+
|
25
|
+
# Store running jobs
|
26
|
+
running_jobs = {}
|
27
|
+
|
28
|
+
|
29
|
+
def run_benchmark_task(job_id, benchmark_type, params, callback=None):
|
30
|
+
"""
|
31
|
+
Run a benchmark task in a separate thread.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
job_id: Unique job ID
|
35
|
+
benchmark_type: Type of benchmark to run
|
36
|
+
params: Parameters for the benchmark
|
37
|
+
callback: Optional callback to run when job completes
|
38
|
+
"""
|
39
|
+
try:
|
40
|
+
# Update job status to running
|
41
|
+
running_jobs[job_id]["status"] = "running"
|
42
|
+
|
43
|
+
# Run the benchmark based on type
|
44
|
+
if benchmark_type == "simpleqa":
|
45
|
+
result = evaluate_simpleqa(**params)
|
46
|
+
elif benchmark_type == "browsecomp":
|
47
|
+
result = evaluate_browsecomp(**params)
|
48
|
+
elif benchmark_type == "compare":
|
49
|
+
result = compare_configurations(**params)
|
50
|
+
else:
|
51
|
+
result = {"error": f"Unknown benchmark type: {benchmark_type}"}
|
52
|
+
|
53
|
+
# Update job with result
|
54
|
+
running_jobs[job_id]["status"] = "completed"
|
55
|
+
running_jobs[job_id]["result"] = result
|
56
|
+
|
57
|
+
# Call callback if provided
|
58
|
+
if callback:
|
59
|
+
callback(job_id, result)
|
60
|
+
|
61
|
+
except Exception as e:
|
62
|
+
logger.error(f"Error running benchmark job {job_id}: {str(e)}")
|
63
|
+
running_jobs[job_id]["status"] = "error"
|
64
|
+
running_jobs[job_id]["error"] = str(e)
|
65
|
+
|
66
|
+
|
67
|
+
@benchmark_bp.route("/", methods=["GET"])
|
68
|
+
def benchmark_dashboard():
|
69
|
+
"""Render benchmark dashboard."""
|
70
|
+
return render_template(
|
71
|
+
"benchmark/dashboard.html", benchmarks=get_available_benchmarks()
|
72
|
+
)
|
73
|
+
|
74
|
+
|
75
|
+
@benchmark_bp.route("/run", methods=["POST"])
|
76
|
+
def run_benchmark_endpoint():
|
77
|
+
"""Run benchmark with specified parameters."""
|
78
|
+
data = request.json
|
79
|
+
|
80
|
+
# Extract benchmark type
|
81
|
+
benchmark_type = data.get("benchmark_type")
|
82
|
+
if not benchmark_type:
|
83
|
+
return jsonify({"error": "benchmark_type is required"}), 400
|
84
|
+
|
85
|
+
# Generate job ID
|
86
|
+
import uuid
|
87
|
+
|
88
|
+
job_id = str(uuid.uuid4())
|
89
|
+
|
90
|
+
# Extract parameters
|
91
|
+
params = {
|
92
|
+
"num_examples": data.get("num_examples", 100),
|
93
|
+
"search_iterations": data.get("search_iterations", 3),
|
94
|
+
"questions_per_iteration": data.get("questions_per_iteration", 3),
|
95
|
+
"search_tool": data.get("search_tool", "searxng"),
|
96
|
+
"human_evaluation": data.get("human_evaluation", False),
|
97
|
+
"output_dir": os.path.join("benchmark_results", job_id),
|
98
|
+
}
|
99
|
+
|
100
|
+
# Add optional parameters if present
|
101
|
+
if "evaluation_model" in data:
|
102
|
+
params["evaluation_model"] = data["evaluation_model"]
|
103
|
+
if "evaluation_provider" in data:
|
104
|
+
params["evaluation_provider"] = data["evaluation_provider"]
|
105
|
+
|
106
|
+
# Store job info
|
107
|
+
running_jobs[job_id] = {
|
108
|
+
"id": job_id,
|
109
|
+
"benchmark_type": benchmark_type,
|
110
|
+
"params": params,
|
111
|
+
"status": "pending",
|
112
|
+
"start_time": import_time().time(),
|
113
|
+
}
|
114
|
+
|
115
|
+
# Start job in background thread
|
116
|
+
thread = threading.Thread(
|
117
|
+
target=run_benchmark_task, args=(job_id, benchmark_type, params)
|
118
|
+
)
|
119
|
+
thread.daemon = True
|
120
|
+
thread.start()
|
121
|
+
|
122
|
+
return jsonify(
|
123
|
+
{
|
124
|
+
"status": "started",
|
125
|
+
"job_id": job_id,
|
126
|
+
"message": f"Benchmark job started: {benchmark_type}",
|
127
|
+
}
|
128
|
+
)
|
129
|
+
|
130
|
+
|
131
|
+
@benchmark_bp.route("/status/<job_id>", methods=["GET"])
|
132
|
+
def benchmark_status(job_id):
|
133
|
+
"""Get status of a benchmark job."""
|
134
|
+
if job_id not in running_jobs:
|
135
|
+
return jsonify({"error": "Job not found"}), 404
|
136
|
+
|
137
|
+
job = running_jobs[job_id]
|
138
|
+
|
139
|
+
# Calculate runtime if job is running
|
140
|
+
if job["status"] == "running":
|
141
|
+
job["runtime"] = import_time().time() - job["start_time"]
|
142
|
+
|
143
|
+
return jsonify(job)
|
144
|
+
|
145
|
+
|
146
|
+
@benchmark_bp.route("/results/<job_id>", methods=["GET"])
|
147
|
+
def benchmark_results(job_id):
|
148
|
+
"""Get results of a completed benchmark job."""
|
149
|
+
if job_id not in running_jobs:
|
150
|
+
return jsonify({"error": "Job not found"}), 404
|
151
|
+
|
152
|
+
job = running_jobs[job_id]
|
153
|
+
|
154
|
+
if job["status"] != "completed":
|
155
|
+
return jsonify({"error": f"Job is not completed: {job['status']}"}), 400
|
156
|
+
|
157
|
+
# Return job result
|
158
|
+
return jsonify(job["result"])
|
159
|
+
|
160
|
+
|
161
|
+
@benchmark_bp.route("/list", methods=["GET"])
|
162
|
+
def list_benchmarks():
|
163
|
+
"""List available benchmarks."""
|
164
|
+
return jsonify(get_available_benchmarks())
|
165
|
+
|
166
|
+
|
167
|
+
@benchmark_bp.route("/jobs", methods=["GET"])
|
168
|
+
def list_jobs():
|
169
|
+
"""List all benchmark jobs."""
|
170
|
+
return jsonify(running_jobs)
|
171
|
+
|
172
|
+
|
173
|
+
@benchmark_bp.route("/config", methods=["GET"])
|
174
|
+
def get_benchmark_config():
|
175
|
+
"""Get benchmark configuration options."""
|
176
|
+
return jsonify(
|
177
|
+
{
|
178
|
+
"search_tools": [
|
179
|
+
{"id": "searxng", "name": "SearXNG"},
|
180
|
+
{"id": "wikipedia", "name": "Wikipedia"},
|
181
|
+
{"id": "arxiv", "name": "ArXiv"},
|
182
|
+
{"id": "pubmed", "name": "PubMed"},
|
183
|
+
{"id": "auto", "name": "Auto (Multiple Engines)"},
|
184
|
+
],
|
185
|
+
"evaluation_providers": [
|
186
|
+
{"id": "openai_endpoint", "name": "Claude (via OpenRouter)"},
|
187
|
+
{"id": "openai", "name": "OpenAI"},
|
188
|
+
{"id": "anthropic", "name": "Anthropic"},
|
189
|
+
{"id": "ollama", "name": "Ollama (Local)"},
|
190
|
+
],
|
191
|
+
"evaluation_models": {
|
192
|
+
"openai_endpoint": [
|
193
|
+
{"id": "anthropic/claude-3.7-sonnet", "name": "Claude 3.7 Sonnet"}
|
194
|
+
],
|
195
|
+
"openai": [
|
196
|
+
{"id": "gpt-4o", "name": "GPT-4o"},
|
197
|
+
{"id": "gpt-4-turbo", "name": "GPT-4 Turbo"},
|
198
|
+
{"id": "gpt-3.5-turbo", "name": "GPT-3.5 Turbo"},
|
199
|
+
],
|
200
|
+
"anthropic": [
|
201
|
+
{"id": "claude-3-opus", "name": "Claude 3 Opus"},
|
202
|
+
{"id": "claude-3-sonnet", "name": "Claude 3 Sonnet"},
|
203
|
+
{"id": "claude-3-haiku", "name": "Claude 3 Haiku"},
|
204
|
+
],
|
205
|
+
"ollama": [
|
206
|
+
{"id": "llama3", "name": "Llama 3"},
|
207
|
+
{"id": "gemma:7b", "name": "Gemma 7B"},
|
208
|
+
{"id": "mistral", "name": "Mistral"},
|
209
|
+
],
|
210
|
+
},
|
211
|
+
}
|
212
|
+
)
|
213
|
+
|
214
|
+
|
215
|
+
# Utility function for importing time dynamically to avoid circular imports
|
216
|
+
def import_time():
|
217
|
+
"""Import time module dynamically."""
|
218
|
+
import time
|
219
|
+
|
220
|
+
return time
|
221
|
+
|
222
|
+
|
223
|
+
# Function to register routes with the main app
|
224
|
+
def register_blueprint(app):
|
225
|
+
"""Register benchmark routes with the Flask app."""
|
226
|
+
app.register_blueprint(benchmark_bp)
|
227
|
+
|
228
|
+
# Create templates directory if it doesn't exist
|
229
|
+
template_dir = os.path.join(
|
230
|
+
os.path.dirname(app.root_path), "templates", "benchmark"
|
231
|
+
)
|
232
|
+
os.makedirs(template_dir, exist_ok=True)
|
233
|
+
|
234
|
+
# Create dashboard template if it doesn't exist
|
235
|
+
dashboard_template = os.path.join(template_dir, "dashboard.html")
|
236
|
+
if not os.path.exists(dashboard_template):
|
237
|
+
with open(dashboard_template, "w") as f:
|
238
|
+
f.write(
|
239
|
+
"""
|
240
|
+
{% extends "base.html" %}
|
241
|
+
{% block title %}Benchmarks{% endblock %}
|
242
|
+
{% block content %}
|
243
|
+
<div class="container">
|
244
|
+
<h1>LDR Benchmarks</h1>
|
245
|
+
|
246
|
+
<div class="card mb-4">
|
247
|
+
<div class="card-header">
|
248
|
+
<h2>Run Benchmark</h2>
|
249
|
+
</div>
|
250
|
+
<div class="card-body">
|
251
|
+
<form id="benchmarkForm">
|
252
|
+
<div class="form-group">
|
253
|
+
<label for="benchmarkType">Benchmark</label>
|
254
|
+
<select class="form-control" id="benchmarkType" required>
|
255
|
+
{% for benchmark in benchmarks %}
|
256
|
+
<option value="{{ benchmark.id }}">{{ benchmark.name }} - {{ benchmark.description }}</option>
|
257
|
+
{% endfor %}
|
258
|
+
</select>
|
259
|
+
</div>
|
260
|
+
|
261
|
+
<div class="form-group">
|
262
|
+
<label for="numExamples">Number of Examples</label>
|
263
|
+
<input type="number" class="form-control" id="numExamples" value="10" min="1" max="1000">
|
264
|
+
<small class="form-text text-muted">Higher numbers give more accurate results but take longer</small>
|
265
|
+
</div>
|
266
|
+
|
267
|
+
<div class="form-group">
|
268
|
+
<label for="searchIterations">Search Iterations</label>
|
269
|
+
<input type="number" class="form-control" id="searchIterations" value="3" min="1" max="10">
|
270
|
+
</div>
|
271
|
+
|
272
|
+
<div class="form-group">
|
273
|
+
<label for="questionsPerIteration">Questions Per Iteration</label>
|
274
|
+
<input type="number" class="form-control" id="questionsPerIteration" value="3" min="1" max="10">
|
275
|
+
</div>
|
276
|
+
|
277
|
+
<div class="form-group">
|
278
|
+
<label for="searchTool">Search Tool</label>
|
279
|
+
<select class="form-control" id="searchTool">
|
280
|
+
<option value="searxng">SearXNG</option>
|
281
|
+
<option value="wikipedia">Wikipedia</option>
|
282
|
+
<option value="auto">Auto (Multiple Engines)</option>
|
283
|
+
</select>
|
284
|
+
</div>
|
285
|
+
|
286
|
+
<button type="submit" class="btn btn-primary">Start Benchmark</button>
|
287
|
+
</form>
|
288
|
+
</div>
|
289
|
+
</div>
|
290
|
+
|
291
|
+
<div class="card mb-4" id="benchmarkStatus" style="display: none;">
|
292
|
+
<div class="card-header">
|
293
|
+
<h2>Benchmark Status</h2>
|
294
|
+
</div>
|
295
|
+
<div class="card-body">
|
296
|
+
<div class="progress mb-3">
|
297
|
+
<div class="progress-bar" id="benchmarkProgress" role="progressbar" style="width: 0%"></div>
|
298
|
+
</div>
|
299
|
+
<p id="statusMessage">Initializing benchmark...</p>
|
300
|
+
<button class="btn btn-secondary" id="viewResults" style="display: none;">View Results</button>
|
301
|
+
</div>
|
302
|
+
</div>
|
303
|
+
|
304
|
+
<div class="card" id="benchmarkResults" style="display: none;">
|
305
|
+
<div class="card-header">
|
306
|
+
<h2>Benchmark Results</h2>
|
307
|
+
</div>
|
308
|
+
<div class="card-body" id="resultsContent">
|
309
|
+
</div>
|
310
|
+
</div>
|
311
|
+
</div>
|
312
|
+
|
313
|
+
<script>
|
314
|
+
let currentJobId = null;
|
315
|
+
let statusInterval = null;
|
316
|
+
|
317
|
+
document.getElementById('benchmarkForm').addEventListener('submit', function(e) {
|
318
|
+
e.preventDefault();
|
319
|
+
|
320
|
+
const benchmarkType = document.getElementById('benchmarkType').value;
|
321
|
+
const numExamples = document.getElementById('numExamples').value;
|
322
|
+
const searchIterations = document.getElementById('searchIterations').value;
|
323
|
+
const questionsPerIteration = document.getElementById('questionsPerIteration').value;
|
324
|
+
const searchTool = document.getElementById('searchTool').value;
|
325
|
+
|
326
|
+
// Show status card
|
327
|
+
document.getElementById('benchmarkStatus').style.display = 'block';
|
328
|
+
document.getElementById('benchmarkResults').style.display = 'none';
|
329
|
+
document.getElementById('viewResults').style.display = 'none';
|
330
|
+
|
331
|
+
// Start benchmark
|
332
|
+
fetch('/benchmark/run', {
|
333
|
+
method: 'POST',
|
334
|
+
headers: {
|
335
|
+
'Content-Type': 'application/json'
|
336
|
+
},
|
337
|
+
body: JSON.stringify({
|
338
|
+
benchmark_type: benchmarkType,
|
339
|
+
num_examples: parseInt(numExamples),
|
340
|
+
search_iterations: parseInt(searchIterations),
|
341
|
+
questions_per_iteration: parseInt(questionsPerIteration),
|
342
|
+
search_tool: searchTool
|
343
|
+
})
|
344
|
+
})
|
345
|
+
.then(response => response.json())
|
346
|
+
.then(data => {
|
347
|
+
if (data.job_id) {
|
348
|
+
currentJobId = data.job_id;
|
349
|
+
document.getElementById('statusMessage').textContent = data.message;
|
350
|
+
|
351
|
+
// Start polling for status updates
|
352
|
+
statusInterval = setInterval(checkStatus, 2000);
|
353
|
+
} else {
|
354
|
+
document.getElementById('statusMessage').textContent = 'Error: ' + data.error;
|
355
|
+
}
|
356
|
+
})
|
357
|
+
.catch(error => {
|
358
|
+
document.getElementById('statusMessage').textContent = 'Error: ' + error;
|
359
|
+
});
|
360
|
+
});
|
361
|
+
|
362
|
+
function checkStatus() {
|
363
|
+
if (!currentJobId) return;
|
364
|
+
|
365
|
+
fetch('/benchmark/status/' + currentJobId)
|
366
|
+
.then(response => response.json())
|
367
|
+
.then(data => {
|
368
|
+
if (data.status === 'running') {
|
369
|
+
const runtime = data.runtime || 0;
|
370
|
+
document.getElementById('statusMessage').textContent = `Running benchmark... (${Math.round(runtime)}s elapsed)`;
|
371
|
+
document.getElementById('benchmarkProgress').style.width = '50%';
|
372
|
+
} else if (data.status === 'completed') {
|
373
|
+
clearInterval(statusInterval);
|
374
|
+
document.getElementById('statusMessage').textContent = 'Benchmark completed successfully!';
|
375
|
+
document.getElementById('benchmarkProgress').style.width = '100%';
|
376
|
+
document.getElementById('viewResults').style.display = 'inline-block';
|
377
|
+
document.getElementById('viewResults').onclick = function() {
|
378
|
+
showResults(currentJobId);
|
379
|
+
};
|
380
|
+
} else if (data.status === 'error') {
|
381
|
+
clearInterval(statusInterval);
|
382
|
+
document.getElementById('statusMessage').textContent = 'Error: ' + data.error;
|
383
|
+
document.getElementById('benchmarkProgress').style.width = '100%';
|
384
|
+
document.getElementById('benchmarkProgress').classList.add('bg-danger');
|
385
|
+
}
|
386
|
+
})
|
387
|
+
.catch(error => {
|
388
|
+
document.getElementById('statusMessage').textContent = 'Error checking status: ' + error;
|
389
|
+
});
|
390
|
+
}
|
391
|
+
|
392
|
+
function showResults(jobId) {
|
393
|
+
fetch('/benchmark/results/' + jobId)
|
394
|
+
.then(response => response.json())
|
395
|
+
.then(data => {
|
396
|
+
document.getElementById('benchmarkResults').style.display = 'block';
|
397
|
+
|
398
|
+
let html = '';
|
399
|
+
|
400
|
+
if (data.metrics) {
|
401
|
+
html += `<h3>Summary</h3>`;
|
402
|
+
html += `<p><strong>Accuracy:</strong> ${(data.metrics.accuracy * 100).toFixed(1)}%</p>`;
|
403
|
+
html += `<p><strong>Examples:</strong> ${data.metrics.total_examples}</p>`;
|
404
|
+
html += `<p><strong>Correct:</strong> ${data.metrics.correct}</p>`;
|
405
|
+
|
406
|
+
if (data.metrics.average_processing_time) {
|
407
|
+
html += `<p><strong>Average Processing Time:</strong> ${data.metrics.average_processing_time.toFixed(2)}s</p>`;
|
408
|
+
}
|
409
|
+
|
410
|
+
html += `<p><a href="${data.report_path}" target="_blank" class="btn btn-info">View Full Report</a></p>`;
|
411
|
+
} else {
|
412
|
+
html += `<p>No metrics available. Check the results file for details.</p>`;
|
413
|
+
html += `<p><a href="${data.results_path}" target="_blank" class="btn btn-info">View Results File</a></p>`;
|
414
|
+
}
|
415
|
+
|
416
|
+
document.getElementById('resultsContent').innerHTML = html;
|
417
|
+
})
|
418
|
+
.catch(error => {
|
419
|
+
document.getElementById('resultsContent').innerHTML = `<p>Error loading results: ${error}</p>`;
|
420
|
+
});
|
421
|
+
}
|
422
|
+
</script>
|
423
|
+
{% endblock %}
|
424
|
+
"""
|
425
|
+
)
|
426
|
+
|
427
|
+
logger.info("Benchmark routes registered")
|
@@ -1,5 +1,4 @@
|
|
1
1
|
import json
|
2
|
-
import logging
|
3
2
|
import os
|
4
3
|
import platform
|
5
4
|
import subprocess
|
@@ -14,14 +13,12 @@ from flask import (
|
|
14
13
|
send_from_directory,
|
15
14
|
url_for,
|
16
15
|
)
|
16
|
+
from loguru import logger
|
17
17
|
|
18
18
|
from ..models.database import add_log_to_db, calculate_duration, get_db_connection
|
19
19
|
from ..services.research_service import run_research_process, start_research_process
|
20
20
|
from ..utils.templates import render_template_with_defaults
|
21
21
|
|
22
|
-
# Initialize logger
|
23
|
-
logger = logging.getLogger(__name__)
|
24
|
-
|
25
22
|
# Create a Blueprint for the research application
|
26
23
|
research_bp = Blueprint("research", __name__, url_prefix="/research")
|
27
24
|
|
@@ -370,8 +367,8 @@ def terminate_research(research_id):
|
|
370
367
|
|
371
368
|
emit_socket_event(f"research_progress_{research_id}", event_data)
|
372
369
|
|
373
|
-
except Exception
|
374
|
-
|
370
|
+
except Exception:
|
371
|
+
logger.exception("Socket emit error (non-critical)")
|
375
372
|
|
376
373
|
return jsonify({"status": "success", "message": "Research termination requested"})
|
377
374
|
|
@@ -411,8 +408,8 @@ def delete_research(research_id):
|
|
411
408
|
if report_path and os.path.exists(report_path):
|
412
409
|
try:
|
413
410
|
os.remove(report_path)
|
414
|
-
except Exception
|
415
|
-
|
411
|
+
except Exception:
|
412
|
+
logger.exception("Error removing report file")
|
416
413
|
|
417
414
|
# Delete the database record
|
418
415
|
cursor.execute("DELETE FROM research_history WHERE id = ?", (research_id,))
|
@@ -443,8 +440,8 @@ def clear_history():
|
|
443
440
|
if report_path and os.path.exists(report_path):
|
444
441
|
try:
|
445
442
|
os.remove(report_path)
|
446
|
-
except Exception
|
447
|
-
|
443
|
+
except Exception:
|
444
|
+
logger.exception("Error removing report file")
|
448
445
|
|
449
446
|
# Delete records from the database, except active research
|
450
447
|
placeholders = ", ".join(["?"] * len(active_research))
|
@@ -461,6 +458,7 @@ def clear_history():
|
|
461
458
|
|
462
459
|
return jsonify({"status": "success"})
|
463
460
|
except Exception as e:
|
461
|
+
logger.exception("Error clearing history")
|
464
462
|
return jsonify({"status": "error", "message": str(e)}), 500
|
465
463
|
|
466
464
|
|
@@ -496,6 +494,7 @@ def open_file_location():
|
|
496
494
|
|
497
495
|
return jsonify({"status": "success"})
|
498
496
|
except Exception as e:
|
497
|
+
logger.exception("Error opening a file")
|
499
498
|
return jsonify({"status": "error", "message": str(e)}), 500
|
500
499
|
|
501
500
|
|
@@ -523,6 +522,7 @@ def save_raw_config():
|
|
523
522
|
|
524
523
|
return jsonify({"success": True})
|
525
524
|
except Exception as e:
|
525
|
+
logger.exception("Error saving configuration file")
|
526
526
|
return jsonify({"success": False, "error": str(e)}), 500
|
527
527
|
|
528
528
|
|
@@ -578,8 +578,8 @@ def get_history():
|
|
578
578
|
if completed_at and created_at:
|
579
579
|
try:
|
580
580
|
duration_seconds = calculate_duration(created_at, completed_at)
|
581
|
-
except Exception
|
582
|
-
|
581
|
+
except Exception:
|
582
|
+
logger.exception("Error calculating duration")
|
583
583
|
|
584
584
|
# Create a history item
|
585
585
|
item = {
|
@@ -602,11 +602,7 @@ def get_history():
|
|
602
602
|
conn.close()
|
603
603
|
return jsonify({"status": "success", "items": history_items})
|
604
604
|
except Exception as e:
|
605
|
-
|
606
|
-
import traceback
|
607
|
-
|
608
|
-
print(f"Error getting history: {e}")
|
609
|
-
print(traceback.format_exc())
|
605
|
+
logger.exception("Error getting history")
|
610
606
|
return jsonify({"status": "error", "message": str(e)}), 500
|
611
607
|
|
612
608
|
|