local-deep-research 0.5.9__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. local_deep_research/__version__.py +1 -1
  2. local_deep_research/advanced_search_system/candidate_exploration/progressive_explorer.py +11 -1
  3. local_deep_research/advanced_search_system/questions/browsecomp_question.py +32 -6
  4. local_deep_research/advanced_search_system/strategies/focused_iteration_strategy.py +32 -8
  5. local_deep_research/advanced_search_system/strategies/source_based_strategy.py +2 -0
  6. local_deep_research/api/__init__.py +2 -0
  7. local_deep_research/api/research_functions.py +177 -3
  8. local_deep_research/benchmarks/graders.py +150 -5
  9. local_deep_research/benchmarks/models/__init__.py +19 -0
  10. local_deep_research/benchmarks/models/benchmark_models.py +283 -0
  11. local_deep_research/benchmarks/ui/__init__.py +1 -0
  12. local_deep_research/benchmarks/web_api/__init__.py +6 -0
  13. local_deep_research/benchmarks/web_api/benchmark_routes.py +862 -0
  14. local_deep_research/benchmarks/web_api/benchmark_service.py +920 -0
  15. local_deep_research/config/llm_config.py +106 -21
  16. local_deep_research/defaults/default_settings.json +447 -2
  17. local_deep_research/error_handling/report_generator.py +10 -0
  18. local_deep_research/llm/__init__.py +19 -0
  19. local_deep_research/llm/llm_registry.py +155 -0
  20. local_deep_research/metrics/db_models.py +3 -7
  21. local_deep_research/metrics/search_tracker.py +25 -11
  22. local_deep_research/search_system.py +12 -9
  23. local_deep_research/utilities/log_utils.py +23 -10
  24. local_deep_research/utilities/thread_context.py +99 -0
  25. local_deep_research/web/app_factory.py +32 -8
  26. local_deep_research/web/database/benchmark_schema.py +230 -0
  27. local_deep_research/web/database/convert_research_id_to_string.py +161 -0
  28. local_deep_research/web/database/models.py +55 -1
  29. local_deep_research/web/database/schema_upgrade.py +397 -2
  30. local_deep_research/web/database/uuid_migration.py +265 -0
  31. local_deep_research/web/routes/api_routes.py +62 -31
  32. local_deep_research/web/routes/history_routes.py +13 -6
  33. local_deep_research/web/routes/metrics_routes.py +264 -4
  34. local_deep_research/web/routes/research_routes.py +45 -18
  35. local_deep_research/web/routes/route_registry.py +352 -0
  36. local_deep_research/web/routes/settings_routes.py +382 -22
  37. local_deep_research/web/services/research_service.py +22 -29
  38. local_deep_research/web/services/settings_manager.py +53 -0
  39. local_deep_research/web/services/settings_service.py +2 -0
  40. local_deep_research/web/static/css/styles.css +8 -0
  41. local_deep_research/web/static/js/components/detail.js +7 -14
  42. local_deep_research/web/static/js/components/details.js +8 -10
  43. local_deep_research/web/static/js/components/fallback/ui.js +4 -4
  44. local_deep_research/web/static/js/components/history.js +6 -6
  45. local_deep_research/web/static/js/components/logpanel.js +14 -11
  46. local_deep_research/web/static/js/components/progress.js +51 -46
  47. local_deep_research/web/static/js/components/research.js +250 -89
  48. local_deep_research/web/static/js/components/results.js +5 -7
  49. local_deep_research/web/static/js/components/settings.js +32 -26
  50. local_deep_research/web/static/js/components/settings_sync.js +24 -23
  51. local_deep_research/web/static/js/config/urls.js +285 -0
  52. local_deep_research/web/static/js/main.js +8 -8
  53. local_deep_research/web/static/js/research_form.js +267 -12
  54. local_deep_research/web/static/js/services/api.js +18 -18
  55. local_deep_research/web/static/js/services/keyboard.js +8 -8
  56. local_deep_research/web/static/js/services/socket.js +53 -35
  57. local_deep_research/web/static/js/services/ui.js +1 -1
  58. local_deep_research/web/templates/base.html +4 -1
  59. local_deep_research/web/templates/components/custom_dropdown.html +5 -3
  60. local_deep_research/web/templates/components/mobile_nav.html +3 -3
  61. local_deep_research/web/templates/components/sidebar.html +9 -3
  62. local_deep_research/web/templates/pages/benchmark.html +2697 -0
  63. local_deep_research/web/templates/pages/benchmark_results.html +1274 -0
  64. local_deep_research/web/templates/pages/benchmark_simple.html +453 -0
  65. local_deep_research/web/templates/pages/cost_analytics.html +1 -1
  66. local_deep_research/web/templates/pages/metrics.html +212 -39
  67. local_deep_research/web/templates/pages/research.html +8 -6
  68. local_deep_research/web/templates/pages/star_reviews.html +1 -1
  69. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +14 -1
  70. local_deep_research/web_search_engines/engines/search_engine_brave.py +15 -1
  71. local_deep_research/web_search_engines/engines/search_engine_ddg.py +20 -1
  72. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +26 -2
  73. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +15 -1
  74. local_deep_research/web_search_engines/engines/search_engine_retriever.py +192 -0
  75. local_deep_research/web_search_engines/engines/search_engine_tavily.py +307 -0
  76. local_deep_research/web_search_engines/rate_limiting/__init__.py +14 -0
  77. local_deep_research/web_search_engines/rate_limiting/__main__.py +9 -0
  78. local_deep_research/web_search_engines/rate_limiting/cli.py +209 -0
  79. local_deep_research/web_search_engines/rate_limiting/exceptions.py +21 -0
  80. local_deep_research/web_search_engines/rate_limiting/tracker.py +506 -0
  81. local_deep_research/web_search_engines/retriever_registry.py +108 -0
  82. local_deep_research/web_search_engines/search_engine_base.py +161 -43
  83. local_deep_research/web_search_engines/search_engine_factory.py +14 -0
  84. local_deep_research/web_search_engines/search_engines_config.py +20 -0
  85. local_deep_research-0.6.1.dist-info/METADATA +374 -0
  86. {local_deep_research-0.5.9.dist-info → local_deep_research-0.6.1.dist-info}/RECORD +89 -64
  87. local_deep_research-0.5.9.dist-info/METADATA +0 -420
  88. {local_deep_research-0.5.9.dist-info → local_deep_research-0.6.1.dist-info}/WHEEL +0 -0
  89. {local_deep_research-0.5.9.dist-info → local_deep_research-0.6.1.dist-info}/entry_points.txt +0 -0
  90. {local_deep_research-0.5.9.dist-info → local_deep_research-0.6.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1274 @@
1
+ {% extends "base.html" %}
2
+
3
+ {% set active_page = 'benchmark-results' %}
4
+
5
+ {% block title %}Benchmark Results History - Deep Research System{% endblock %}
6
+
7
+ {% block extra_head %}
8
+ <meta name="csrf-token" content="{{ csrf_token() }}">
9
+ <meta name="app-version" content="{{ version }}">
10
+ <style>
11
+ .benchmark-results-card {
12
+ width: 100%;
13
+ margin: 0;
14
+ padding: 0;
15
+ background: transparent;
16
+ border: none;
17
+ box-shadow: none;
18
+ }
19
+
20
+ .card-content {
21
+ padding: 0;
22
+ }
23
+
24
+ .run-card {
25
+ background: #1a1a1a;
26
+ border: 1px solid #333;
27
+ border-radius: 8px;
28
+ padding: 20px;
29
+ margin-bottom: 20px;
30
+ cursor: pointer;
31
+ transition: border-color 0.2s, background-color 0.2s;
32
+ }
33
+
34
+ .run-card:hover {
35
+ border-color: var(--primary-color);
36
+ background: #1e1e1e;
37
+ }
38
+
39
+ .run-card.expanded {
40
+ border-color: var(--primary-color);
41
+ }
42
+
43
+ .run-header {
44
+ display: flex;
45
+ justify-content: space-between;
46
+ align-items: center;
47
+ margin-bottom: 10px;
48
+ }
49
+
50
+ .run-title {
51
+ font-size: 1.2rem;
52
+ font-weight: bold;
53
+ color: #e0e0e0;
54
+ }
55
+
56
+ .run-date {
57
+ color: #a0a0a0;
58
+ font-size: 0.9rem;
59
+ }
60
+
61
+ .run-summary {
62
+ display: grid;
63
+ grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
64
+ gap: 12px;
65
+ margin-bottom: 15px;
66
+ }
67
+
68
+ /* Responsive grid adjustments for more columns on wider screens */
69
+ @media (min-width: 768px) {
70
+ .run-summary {
71
+ grid-template-columns: repeat(auto-fit, minmax(130px, 1fr));
72
+ gap: 15px;
73
+ }
74
+ }
75
+
76
+ @media (min-width: 1200px) {
77
+ .run-summary {
78
+ grid-template-columns: repeat(auto-fit, minmax(140px, 1fr));
79
+ gap: 16px;
80
+ }
81
+ }
82
+
83
+ @media (min-width: 1600px) {
84
+ .run-summary {
85
+ grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
86
+ gap: 18px;
87
+ }
88
+ }
89
+
90
+ @media (min-width: 1920px) {
91
+ .run-summary {
92
+ grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
93
+ gap: 20px;
94
+ }
95
+ }
96
+
97
+ .summary-item {
98
+ text-align: center;
99
+ padding: 10px;
100
+ background: #2a2a2a;
101
+ border-radius: 6px;
102
+ }
103
+
104
+ .summary-value {
105
+ font-size: 1.4rem;
106
+ font-weight: bold;
107
+ color: var(--primary-color);
108
+ }
109
+
110
+ .summary-label {
111
+ font-size: 0.85rem;
112
+ color: #a0a0a0;
113
+ margin-top: 5px;
114
+ }
115
+
116
+ .accuracy-indicator {
117
+ display: inline-block;
118
+ padding: 4px 8px;
119
+ border-radius: 12px;
120
+ font-size: 0.85rem;
121
+ font-weight: bold;
122
+ }
123
+
124
+ .accuracy-high {
125
+ background: #1e3a1e;
126
+ color: #4caf50;
127
+ }
128
+
129
+ .accuracy-medium {
130
+ background: #3a2a1e;
131
+ color: #ff9800;
132
+ }
133
+
134
+ .accuracy-low {
135
+ background: #3a1e1e;
136
+ color: #f44336;
137
+ }
138
+
139
+ .status-indicator {
140
+ display: inline-block;
141
+ padding: 4px 8px;
142
+ border-radius: 12px;
143
+ font-size: 0.85rem;
144
+ font-weight: bold;
145
+ }
146
+
147
+ .status-completed {
148
+ background: #1e3a1e;
149
+ color: #4caf50;
150
+ }
151
+
152
+ .status-in-progress {
153
+ background: #1e2a3a;
154
+ color: #2196f3;
155
+ }
156
+
157
+ .status-failed {
158
+ background: #3a1e1e;
159
+ color: #f44336;
160
+ }
161
+
162
+ .status-cancelled {
163
+ background: #2a2a2a;
164
+ color: #999;
165
+ }
166
+
167
+ .run-config {
168
+ display: grid;
169
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
170
+ gap: 10px;
171
+ margin-bottom: 15px;
172
+ padding: 15px;
173
+ background: #242424;
174
+ border-radius: 6px;
175
+ }
176
+
177
+ .config-item {
178
+ display: flex;
179
+ justify-content: space-between;
180
+ padding: 5px 0;
181
+ border-bottom: 1px solid #333;
182
+ }
183
+
184
+ .config-label {
185
+ color: #a0a0a0;
186
+ font-size: 0.9rem;
187
+ }
188
+
189
+ .config-value {
190
+ color: #e0e0e0;
191
+ font-weight: 500;
192
+ }
193
+
194
+ .results-section {
195
+ margin-top: 20px;
196
+ display: none;
197
+ }
198
+
199
+ .results-section.visible {
200
+ display: block;
201
+ }
202
+
203
+ .examples-grid {
204
+ display: grid;
205
+ gap: 15px;
206
+ margin-top: 15px;
207
+ }
208
+
209
+ .example-card {
210
+ background: #1a1a1a;
211
+ border: 1px solid #333;
212
+ border-radius: 6px;
213
+ padding: 15px;
214
+ }
215
+
216
+ .example-card.correct {
217
+ border-left: 4px solid #4caf50;
218
+ }
219
+
220
+ .example-card.incorrect {
221
+ border-left: 4px solid #f44336;
222
+ }
223
+
224
+ .example-header {
225
+ display: flex;
226
+ justify-content: space-between;
227
+ align-items: center;
228
+ margin-bottom: 10px;
229
+ }
230
+
231
+ .example-status {
232
+ display: flex;
233
+ align-items: center;
234
+ gap: 5px;
235
+ font-weight: 600;
236
+ }
237
+
238
+ .example-status.correct {
239
+ color: #4caf50;
240
+ }
241
+
242
+ .example-status.incorrect {
243
+ color: #f44336;
244
+ }
245
+
246
+ .example-question {
247
+ background: #2a2a2a;
248
+ padding: 12px;
249
+ border-radius: 4px;
250
+ border-left: 4px solid var(--primary-color);
251
+ margin-bottom: 12px;
252
+ color: #e0e0e0;
253
+ }
254
+
255
+ .example-answers {
256
+ display: grid;
257
+ grid-template-columns: 1fr 1fr;
258
+ gap: 12px;
259
+ }
260
+
261
+ .answer-section {
262
+ padding: 10px;
263
+ border-radius: 4px;
264
+ font-size: 0.9rem;
265
+ line-height: 1.4;
266
+ }
267
+
268
+ .model-answer-section {
269
+ background: #1e2a3a;
270
+ border-left: 3px solid #2196f3;
271
+ }
272
+
273
+ .correct-answer-section {
274
+ background: #1e3a1e;
275
+ border-left: 3px solid #4caf50;
276
+ }
277
+
278
+ .answer-label {
279
+ font-size: 0.75rem;
280
+ font-weight: 600;
281
+ color: #a0a0a0;
282
+ text-transform: uppercase;
283
+ margin-bottom: 6px;
284
+ }
285
+
286
+ .answer-text {
287
+ color: #e0e0e0;
288
+ }
289
+
290
+ .no-results {
291
+ text-align: center;
292
+ color: #a0a0a0;
293
+ padding: 40px;
294
+ font-style: italic;
295
+ }
296
+
297
+ .expand-indicator {
298
+ color: #a0a0a0;
299
+ font-size: 0.9rem;
300
+ margin-top: 10px;
301
+ text-align: center;
302
+ }
303
+
304
+ .dataset-breakdown {
305
+ display: grid;
306
+ grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
307
+ gap: 10px;
308
+ margin-top: 10px;
309
+ }
310
+
311
+ .dataset-item {
312
+ text-align: center;
313
+ padding: 8px;
314
+ background: #2a2a2a;
315
+ border-radius: 4px;
316
+ }
317
+
318
+ .dataset-name {
319
+ font-size: 0.8rem;
320
+ color: #a0a0a0;
321
+ margin-bottom: 4px;
322
+ }
323
+
324
+ .dataset-accuracy {
325
+ font-weight: bold;
326
+ color: var(--primary-color);
327
+ }
328
+
329
+ .loading {
330
+ text-align: center;
331
+ padding: 40px;
332
+ color: #a0a0a0;
333
+ }
334
+
335
+ .pagination {
336
+ display: flex;
337
+ justify-content: center;
338
+ align-items: center;
339
+ gap: 10px;
340
+ margin-top: 30px;
341
+ }
342
+
343
+ .pagination button {
344
+ padding: 8px 12px;
345
+ background: #2a2a2a;
346
+ border: 1px solid #333;
347
+ border-radius: 4px;
348
+ color: #e0e0e0;
349
+ cursor: pointer;
350
+ }
351
+
352
+ .pagination button:hover {
353
+ background: var(--primary-color);
354
+ }
355
+
356
+ .pagination button:disabled {
357
+ opacity: 0.5;
358
+ cursor: not-allowed;
359
+ }
360
+
361
+ .delete-btn {
362
+ background: #3a1e1e !important;
363
+ border-color: #f44336 !important;
364
+ color: #f44336 !important;
365
+ font-size: 0.8rem;
366
+ padding: 4px 8px;
367
+ transition: all 0.2s;
368
+ }
369
+
370
+ .delete-btn:hover:not(:disabled) {
371
+ background: #f44336 !important;
372
+ color: white !important;
373
+ }
374
+
375
+ .delete-btn:disabled {
376
+ background: #2a2a2a !important;
377
+ border-color: #555 !important;
378
+ color: #888 !important;
379
+ cursor: not-allowed;
380
+ }
381
+
382
+ .filters {
383
+ display: grid;
384
+ grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
385
+ gap: 15px;
386
+ margin-bottom: 20px;
387
+ }
388
+
389
+ /* Responsive filter adjustments for full-width layout */
390
+ @media (max-width: 767px) {
391
+ .filters {
392
+ grid-template-columns: 1fr 1fr;
393
+ gap: 12px;
394
+ }
395
+ }
396
+
397
+ @media (min-width: 768px) {
398
+ .filters {
399
+ grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));
400
+ gap: 16px;
401
+ }
402
+ }
403
+
404
+ @media (min-width: 1200px) {
405
+ .filters {
406
+ grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
407
+ gap: 18px;
408
+ }
409
+ }
410
+
411
+ @media (min-width: 1600px) {
412
+ .filters {
413
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
414
+ gap: 20px;
415
+ }
416
+ }
417
+
418
+ .processing-time {
419
+ background: #2a3f2a;
420
+ color: #90ee90;
421
+ padding: 2px 6px;
422
+ border-radius: 3px;
423
+ font-size: 0.8rem;
424
+ font-weight: 500;
425
+ white-space: nowrap;
426
+ }
427
+
428
+ .filter-group {
429
+ display: flex;
430
+ flex-direction: column;
431
+ gap: 5px;
432
+ }
433
+
434
+ .filter-group label {
435
+ font-size: 0.85rem;
436
+ color: #a0a0a0;
437
+ }
438
+
439
+ .filter-group select,
440
+ .filter-group input {
441
+ padding: 6px 10px;
442
+ background: #2a2a2a;
443
+ border: 1px solid #333;
444
+ border-radius: 4px;
445
+ color: #e0e0e0;
446
+ }
447
+
448
+ .search-stats-section {
449
+ margin-bottom: 20px;
450
+ padding: 15px;
451
+ background: #1e1e1e;
452
+ border: 1px solid #333;
453
+ border-radius: 6px;
454
+ }
455
+
456
+ .section-title {
457
+ display: flex;
458
+ align-items: center;
459
+ gap: 8px;
460
+ margin-bottom: 15px;
461
+ color: #e0e0e0;
462
+ font-size: 1.1rem;
463
+ font-weight: 600;
464
+ }
465
+
466
+ .search-stats-grid {
467
+ display: grid;
468
+ grid-template-columns: repeat(auto-fit, minmax(120px, 1fr));
469
+ gap: 15px;
470
+ }
471
+
472
+ .stat-item {
473
+ text-align: center;
474
+ padding: 12px;
475
+ background: #2a2a2a;
476
+ border-radius: 6px;
477
+ }
478
+
479
+ .stat-value {
480
+ font-size: 1.4rem;
481
+ font-weight: bold;
482
+ color: var(--primary-color);
483
+ margin-bottom: 5px;
484
+ }
485
+
486
+ .stat-label {
487
+ font-size: 0.85rem;
488
+ color: #a0a0a0;
489
+ }
490
+
491
+ .results-divider {
492
+ height: 1px;
493
+ background: #333;
494
+ margin: 20px 0;
495
+ }
496
+
497
+ .examples-section {
498
+ margin-top: 15px;
499
+ }
500
+
501
+ .example-metrics {
502
+ display: flex;
503
+ gap: 10px;
504
+ align-items: center;
505
+ }
506
+
507
+ .search-results-count {
508
+ background: #2a3a3a;
509
+ color: #81c784;
510
+ padding: 2px 6px;
511
+ border-radius: 3px;
512
+ font-size: 0.8rem;
513
+ font-weight: 500;
514
+ white-space: nowrap;
515
+ }
516
+ </style>
517
+ {% endblock %}
518
+
519
+ {% block content %}
520
+ <div class="page active" id="benchmark-results">
521
+ <div class="page-header">
522
+ <h1>Benchmark Results History</h1>
523
+ <p class="page-subtitle">Compare accuracy across different models, search engines, and strategies</p>
524
+ </div>
525
+
526
+ <div class="card benchmark-results-card">
527
+ <div class="card-content">
528
+ <!-- Filters -->
529
+ <div class="filters">
530
+ <div class="filter-group">
531
+ <label for="accuracy-filter">Accuracy Range</label>
532
+ <select id="accuracy-filter">
533
+ <option value="">All</option>
534
+ <option value="high">90%+ (High)</option>
535
+ <option value="medium">70-90% (Medium)</option>
536
+ <option value="low"><70% (Low)</option>
537
+ </select>
538
+ </div>
539
+ <div class="filter-group">
540
+ <label for="model-filter">Model</label>
541
+ <select id="model-filter">
542
+ <option value="">All Models</option>
543
+ </select>
544
+ </div>
545
+ <div class="filter-group">
546
+ <label for="strategy-filter">Strategy</label>
547
+ <select id="strategy-filter">
548
+ <option value="">All Strategies</option>
549
+ </select>
550
+ </div>
551
+ <div class="filter-group">
552
+ <label for="status-filter">Status</label>
553
+ <select id="status-filter">
554
+ <option value="">All Statuses</option>
555
+ <option value="completed">Completed</option>
556
+ <option value="in_progress">In Progress</option>
557
+ <option value="failed">Failed</option>
558
+ <option value="cancelled">Cancelled</option>
559
+ </select>
560
+ </div>
561
+ <div class="filter-group">
562
+ <label for="date-filter">Date Range</label>
563
+ <input type="date" id="date-from">
564
+ <input type="date" id="date-to">
565
+ </div>
566
+ </div>
567
+
568
+ <!-- Results List -->
569
+ <div id="results-container">
570
+ <div class="loading">
571
+ <i class="fas fa-spinner fa-spin"></i> Loading benchmark results...
572
+ </div>
573
+ </div>
574
+
575
+ <!-- Pagination -->
576
+ <div class="pagination" id="pagination" style="display: none;">
577
+ <button id="prev-page">← Previous</button>
578
+ <span id="page-info">Page 1 of 1</span>
579
+ <button id="next-page">Next →</button>
580
+ </div>
581
+ </div>
582
+ </div>
583
+ </div>
584
+
585
+ <script>
586
+ let benchmarkRuns = [];
587
+ let filteredRuns = [];
588
+ let currentPage = 1;
589
+ const itemsPerPage = 20;
590
+
591
+ document.addEventListener('DOMContentLoaded', function() {
592
+ loadBenchmarkHistory();
593
+ setupFilters();
594
+ });
595
+
596
+ async function loadBenchmarkHistory() {
597
+ try {
598
+ const response = await fetch('/benchmark/api/history');
599
+ const data = await response.json();
600
+
601
+ if (data.success) {
602
+ benchmarkRuns = data.runs;
603
+ filteredRuns = [...benchmarkRuns];
604
+ populateFilters();
605
+ displayResults();
606
+ } else {
607
+ document.getElementById('results-container').innerHTML =
608
+ '<div class="no-results">Error loading benchmark results</div>';
609
+ }
610
+ } catch (error) {
611
+ console.error('Error loading benchmark history:', error);
612
+ document.getElementById('results-container').innerHTML =
613
+ '<div class="no-results">Error loading benchmark results</div>';
614
+ }
615
+ }
616
+
617
+ function populateFilters() {
618
+ // Populate model filter
619
+ const models = [...new Set(benchmarkRuns.map(run =>
620
+ run.search_config?.model_name).filter(Boolean))];
621
+ const modelFilter = document.getElementById('model-filter');
622
+ models.forEach(model => {
623
+ const option = document.createElement('option');
624
+ option.value = model;
625
+ option.textContent = model;
626
+ modelFilter.appendChild(option);
627
+ });
628
+
629
+ // Populate strategy filter
630
+ const strategies = [...new Set(benchmarkRuns.map(run =>
631
+ run.search_config?.search_strategy).filter(Boolean))];
632
+ const strategyFilter = document.getElementById('strategy-filter');
633
+ strategies.forEach(strategy => {
634
+ const option = document.createElement('option');
635
+ option.value = strategy;
636
+ option.textContent = strategy.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
637
+ strategyFilter.appendChild(option);
638
+ });
639
+ }
640
+
641
+ function setupFilters() {
642
+ document.getElementById('accuracy-filter').addEventListener('change', applyFilters);
643
+ document.getElementById('model-filter').addEventListener('change', applyFilters);
644
+ document.getElementById('strategy-filter').addEventListener('change', applyFilters);
645
+ document.getElementById('status-filter').addEventListener('change', applyFilters);
646
+ document.getElementById('date-from').addEventListener('change', applyFilters);
647
+ document.getElementById('date-to').addEventListener('change', applyFilters);
648
+ }
649
+
650
+ function applyFilters() {
651
+ const accuracyFilter = document.getElementById('accuracy-filter').value;
652
+ const modelFilter = document.getElementById('model-filter').value;
653
+ const strategyFilter = document.getElementById('strategy-filter').value;
654
+ const statusFilter = document.getElementById('status-filter').value;
655
+ const dateFrom = document.getElementById('date-from').value;
656
+ const dateTo = document.getElementById('date-to').value;
657
+
658
+ filteredRuns = benchmarkRuns.filter(run => {
659
+ // Accuracy filter
660
+ if (accuracyFilter) {
661
+ const accuracy = run.overall_accuracy || 0;
662
+ if (accuracyFilter === 'high' && accuracy < 90) return false;
663
+ if (accuracyFilter === 'medium' && (accuracy < 70 || accuracy >= 90)) return false;
664
+ if (accuracyFilter === 'low' && accuracy >= 70) return false;
665
+ }
666
+
667
+ // Model filter
668
+ if (modelFilter && run.search_config?.model_name !== modelFilter) return false;
669
+
670
+ // Strategy filter
671
+ if (strategyFilter && run.search_config?.search_strategy !== strategyFilter) return false;
672
+
673
+ // Status filter
674
+ if (statusFilter && run.status !== statusFilter) return false;
675
+
676
+ // Date filters
677
+ const runDate = new Date(run.created_at).toISOString().split('T')[0];
678
+ if (dateFrom && runDate < dateFrom) return false;
679
+ if (dateTo && runDate > dateTo) return false;
680
+
681
+ return true;
682
+ });
683
+
684
+ currentPage = 1;
685
+ displayResults();
686
+ }
687
+
688
+ function displayResults() {
689
+ const container = document.getElementById('results-container');
690
+
691
+ if (filteredRuns.length === 0) {
692
+ container.innerHTML = '<div class="no-results">No benchmark results found</div>';
693
+ document.getElementById('pagination').style.display = 'none';
694
+ return;
695
+ }
696
+
697
+ const startIndex = (currentPage - 1) * itemsPerPage;
698
+ const endIndex = Math.min(startIndex + itemsPerPage, filteredRuns.length);
699
+ const pageRuns = filteredRuns.slice(startIndex, endIndex);
700
+
701
+ const html = pageRuns.map(run => createRunCard(run)).join('');
702
+ container.innerHTML = html;
703
+
704
+ // Setup pagination
705
+ setupPagination();
706
+ }
707
+
708
+ function createRunCard(run) {
709
+ const accuracy = run.overall_accuracy || 0;
710
+ const accuracyClass = accuracy >= 90 ? 'accuracy-high' :
711
+ accuracy >= 70 ? 'accuracy-medium' : 'accuracy-low';
712
+
713
+ // Status handling
714
+ const status = run.status || 'unknown';
715
+ const statusClass = `status-${status.replace('_', '-')}`;
716
+ const statusText = status.replace('_', ' ').replace(/\b\w/g, l => l.toUpperCase());
717
+
718
+ const date = new Date(run.created_at).toLocaleDateString();
719
+ const time = new Date(run.created_at).toLocaleTimeString();
720
+
721
+ // Progress calculation
722
+ const progress = run.total_examples > 0 ?
723
+ ((run.completed_examples / run.total_examples) * 100).toFixed(1) : 0;
724
+
725
+ // Format status text with proper capitalization
726
+ const formattedStatus = status.replace(/_/g, ' ').replace(/\b\w/g, l => l.toUpperCase());
727
+
728
+ return `
729
+ <div class="run-card" onclick="toggleRunDetails(${run.id})">
730
+ <div class="run-header">
731
+ <div class="run-title">${run.run_name}</div>
732
+ <div class="run-date">${date} ${time}</div>
733
+ </div>
734
+
735
+ <div class="run-summary">
736
+ <div class="summary-item">
737
+ <div class="summary-value ${statusClass} status-indicator">${formattedStatus}</div>
738
+ <div class="summary-label">Status</div>
739
+ </div>
740
+ <div class="summary-item">
741
+ <div class="summary-value">${run.completed_examples}/${run.total_examples}</div>
742
+ <div class="summary-label">Progress (${progress}%)</div>
743
+ </div>
744
+ <div class="summary-item">
745
+ <div class="summary-value ${status === 'completed' && accuracy > 0 ? accuracyClass + ' accuracy-indicator' : ''}">${status === 'completed' && accuracy > 0 ? accuracy.toFixed(1) + '%' : 'N/A'}</div>
746
+ <div class="summary-label">Accuracy</div>
747
+ </div>
748
+ <div class="summary-item">
749
+ <div class="summary-value">${run.search_config?.model_name || 'Unknown'}</div>
750
+ <div class="summary-label">Model</div>
751
+ </div>
752
+ <div class="summary-item">
753
+ <div class="summary-value">${(run.search_config?.search_strategy || 'unknown').replace(/_/g, ' ')}</div>
754
+ <div class="summary-label">Strategy</div>
755
+ </div>
756
+ <div class="summary-item">
757
+ <div class="summary-value">${run.search_config?.search_tool || 'Unknown'}</div>
758
+ <div class="summary-label">Search Engine</div>
759
+ </div>
760
+ <div class="summary-item">
761
+ <div class="summary-value">${formatAvgSearchResults(run)}</div>
762
+ <div class="summary-label">Avg Search Results</div>
763
+ </div>
764
+ <div class="summary-item">
765
+ <div class="summary-value">${formatAvgSearchRequests(run)}</div>
766
+ <div class="summary-label">Avg Search Requests</div>
767
+ </div>
768
+ <div class="summary-item">
769
+ <div class="summary-value">${formatAvgProcessingTime(run)}</div>
770
+ <div class="summary-label">Avg Time/Question</div>
771
+ </div>
772
+ <div class="summary-item">
773
+ <div style="display: flex; gap: 8px;">
774
+ ${status === 'completed' ?
775
+ `<button class="btn btn-outline btn-sm" onclick="event.stopPropagation(); downloadBenchmarkYAML(${run.id})" style="background: #1e3a1e !important; border-color: #4caf50 !important; color: #4caf50 !important;">
776
+ <i class="fas fa-download"></i> YAML
777
+ </button>` : ''
778
+ }
779
+ ${status === 'in_progress' ?
780
+ `<button class="btn btn-outline btn-sm delete-btn" onclick="event.stopPropagation(); cancelAndDeleteBenchmarkRun(${run.id})" style="background: #3a1e1e !important; border-color: #f44336 !important; color: #f44336 !important;">
781
+ <i class="fas fa-stop"></i> Cancel & Delete
782
+ </button>` :
783
+ `<button class="btn btn-outline btn-sm delete-btn" onclick="event.stopPropagation(); deleteBenchmarkRun(${run.id})">
784
+ <i class="fas fa-trash"></i> Delete
785
+ </button>`
786
+ }
787
+ </div>
788
+ <div class="summary-label">Actions</div>
789
+ </div>
790
+ </div>
791
+
792
+ <div class="expand-indicator">
793
+ <i class="fas fa-chevron-down"></i> Click to view detailed results and examples
794
+ </div>
795
+
796
+ <div class="results-section" id="results-${run.id}">
797
+ <div class="run-config">
798
+ <div class="config-item">
799
+ <span class="config-label">Iterations:</span>
800
+ <span class="config-value">${run.search_config?.iterations || 'N/A'}</span>
801
+ </div>
802
+ <div class="config-item">
803
+ <span class="config-label">Questions/Iteration:</span>
804
+ <span class="config-value">${run.search_config?.questions_per_iteration || 'N/A'}</span>
805
+ </div>
806
+ <div class="config-item">
807
+ <span class="config-label">Temperature:</span>
808
+ <span class="config-value">${run.search_config?.temperature || 'N/A'}</span>
809
+ </div>
810
+ <div class="config-item">
811
+ <span class="config-label">Provider:</span>
812
+ <span class="config-value">${run.search_config?.provider || 'N/A'}</span>
813
+ </div>
814
+ </div>
815
+
816
+ <div id="examples-${run.id}">
817
+ <div class="loading">Loading detailed results...</div>
818
+ </div>
819
+ </div>
820
+ </div>
821
+ `;
822
+ }
823
+
824
+ async function toggleRunDetails(runId) {
825
+ const resultsSection = document.getElementById(`results-${runId}`);
826
+ const runCard = resultsSection.closest('.run-card');
827
+
828
+ if (resultsSection.classList.contains('visible')) {
829
+ resultsSection.classList.remove('visible');
830
+ runCard.classList.remove('expanded');
831
+ return;
832
+ }
833
+
834
+ resultsSection.classList.add('visible');
835
+ runCard.classList.add('expanded');
836
+
837
+ // Load examples if not already loaded
838
+ const examplesContainer = document.getElementById(`examples-${runId}`);
839
+ if (examplesContainer.innerHTML.includes('Loading detailed results...')) {
840
+ await loadExamples(runId);
841
+ }
842
+ }
843
+
844
+ async function loadExamples(runId) {
845
+ try {
846
+ const response = await fetch(`/benchmark/api/results/${runId}?limit=50`);
847
+ const data = await response.json();
848
+
849
+ const examplesContainer = document.getElementById(`examples-${runId}`);
850
+
851
+ if (data.success && data.results.length > 0) {
852
+ // Calculate search result statistics
853
+ const searchResultCounts = data.results.map(r => r.search_result_count || 0);
854
+ const avgSearchResults = searchResultCounts.length > 0
855
+ ? (searchResultCounts.reduce((sum, count) => sum + count, 0) / searchResultCounts.length).toFixed(1)
856
+ : 'N/A';
857
+ const minSearchResults = searchResultCounts.length > 0 ? Math.min(...searchResultCounts) : 'N/A';
858
+ const maxSearchResults = searchResultCounts.length > 0 ? Math.max(...searchResultCounts) : 'N/A';
859
+
860
+ // Create search results statistics section
861
+ const statsHtml = `
862
+ <div class="search-stats-section">
863
+ <h4 class="section-title">
864
+ <i class="fas fa-search"></i> Search Results Statistics
865
+ </h4>
866
+ <div class="search-stats-grid">
867
+ <div class="stat-item">
868
+ <div class="stat-value">${avgSearchResults}</div>
869
+ <div class="stat-label">Avg Search Results</div>
870
+ </div>
871
+ <div class="stat-item">
872
+ <div class="stat-value">${minSearchResults}</div>
873
+ <div class="stat-label">Min Results</div>
874
+ </div>
875
+ <div class="stat-item">
876
+ <div class="stat-value">${maxSearchResults}</div>
877
+ <div class="stat-label">Max Results</div>
878
+ </div>
879
+ <div class="stat-item">
880
+ <div class="stat-value">${data.results.length}</div>
881
+ <div class="stat-label">Total Queries</div>
882
+ </div>
883
+ </div>
884
+ </div>
885
+ `;
886
+
887
+ // Create examples grid
888
+ const examplesHtml = data.results.map(result => createExampleCard(result)).join('');
889
+
890
+ examplesContainer.innerHTML = `
891
+ ${statsHtml}
892
+ <div class="results-divider"></div>
893
+ <div class="examples-section">
894
+ <h4 class="section-title">
895
+ <i class="fas fa-list"></i> Individual Query Results (${data.results.length} shown)
896
+ </h4>
897
+ <div class="examples-grid">${examplesHtml}</div>
898
+ </div>
899
+ `;
900
+ } else {
901
+ examplesContainer.innerHTML = '<div class="no-results">No detailed results available</div>';
902
+ }
903
+ } catch (error) {
904
+ console.error('Error loading examples:', error);
905
+ document.getElementById(`examples-${runId}`).innerHTML =
906
+ '<div class="no-results">Error loading examples</div>';
907
+ }
908
+ }
909
+
910
+ function formatAvgSearchResults(run) {
911
+ if (!run.avg_search_results || run.avg_search_results <= 0) {
912
+ return 'N/A';
913
+ }
914
+
915
+ return Math.round(run.avg_search_results).toString();
916
+ }
917
+
918
+ function formatAvgSearchRequests(run) {
919
+ if (!run.total_search_requests || run.total_search_requests <= 0) {
920
+ return 'N/A';
921
+ }
922
+
923
+ return Math.round(run.total_search_requests).toString();
924
+ }
925
+
926
+ function formatAvgProcessingTime(run) {
927
+ // Calculate average processing time from completed examples
928
+ if (!run.avg_processing_time && (!run.results || run.results.length === 0)) {
929
+ return 'N/A';
930
+ }
931
+
932
+ // Use avg_processing_time if available, otherwise calculate from results
933
+ let avgTime = run.avg_processing_time;
934
+ if (!avgTime && run.results) {
935
+ const timesWithValues = run.results
936
+ .filter(r => r.processing_time && r.processing_time > 0)
937
+ .map(r => r.processing_time);
938
+
939
+ if (timesWithValues.length === 0) return 'N/A';
940
+ avgTime = timesWithValues.reduce((sum, time) => sum + time, 0) / timesWithValues.length;
941
+ }
942
+
943
+ if (!avgTime || avgTime <= 0) return 'N/A';
944
+
945
+ // Format time nicely
946
+ if (avgTime < 60) {
947
+ return `${avgTime.toFixed(1)}s`;
948
+ } else if (avgTime < 3600) {
949
+ const minutes = Math.floor(avgTime / 60);
950
+ const seconds = Math.round(avgTime % 60);
951
+ return seconds > 0 ? `${minutes}m ${seconds}s` : `${minutes}m`;
952
+ } else {
953
+ const hours = Math.floor(avgTime / 3600);
954
+ const minutes = Math.round((avgTime % 3600) / 60);
955
+ return minutes > 0 ? `${hours}h ${minutes}m` : `${hours}h`;
956
+ }
957
+ }
958
+
959
+ function createExampleCard(result) {
960
+ const statusClass = result.is_correct ? 'correct' : 'incorrect';
961
+ const statusIcon = result.is_correct ? '<i class="fas fa-check-circle"></i>' : '<i class="fas fa-times-circle"></i>';
962
+ const statusText = result.is_correct ? 'Correct' : 'Incorrect';
963
+
964
+ // Format processing time for individual result
965
+ const processingTime = result.processing_time && result.processing_time > 0
966
+ ? (result.processing_time < 60
967
+ ? `${result.processing_time.toFixed(1)}s`
968
+ : `${Math.floor(result.processing_time / 60)}m ${Math.round(result.processing_time % 60)}s`)
969
+ : 'N/A';
970
+
971
+ // Format search results count
972
+ const searchResultCount = result.search_result_count || 0;
973
+
974
+ return `
975
+ <div class="example-card ${statusClass}">
976
+ <div class="example-header">
977
+ <span class="dataset-badge">${result.dataset_type}</span>
978
+ <span class="example-status ${statusClass}">
979
+ ${statusIcon} ${statusText}
980
+ </span>
981
+ <div class="example-metrics">
982
+ <span class="processing-time">⏱️ ${processingTime}</span>
983
+ <span class="search-results-count">🔍 ${searchResultCount} results</span>
984
+ </div>
985
+ </div>
986
+
987
+ <div class="example-question">
988
+ <strong>Question:</strong> ${result.question}
989
+ </div>
990
+
991
+ <div class="example-answers">
992
+ <div class="answer-section model-answer-section">
993
+ <div class="answer-label">Model Answer</div>
994
+ <div class="answer-text">${result.model_answer || 'No answer provided'}</div>
995
+ </div>
996
+ <div class="answer-section correct-answer-section">
997
+ <div class="answer-label">Expected Answer</div>
998
+ <div class="answer-text">${result.correct_answer || 'No expected answer'}</div>
999
+ </div>
1000
+ </div>
1001
+ </div>
1002
+ `;
1003
+ }
1004
+
1005
+ function setupPagination() {
1006
+ const totalPages = Math.ceil(filteredRuns.length / itemsPerPage);
1007
+ const paginationDiv = document.getElementById('pagination');
1008
+
1009
+ if (totalPages <= 1) {
1010
+ paginationDiv.style.display = 'none';
1011
+ return;
1012
+ }
1013
+
1014
+ paginationDiv.style.display = 'flex';
1015
+
1016
+ const prevBtn = document.getElementById('prev-page');
1017
+ const nextBtn = document.getElementById('next-page');
1018
+ const pageInfo = document.getElementById('page-info');
1019
+
1020
+ prevBtn.disabled = currentPage === 1;
1021
+ nextBtn.disabled = currentPage === totalPages;
1022
+ pageInfo.textContent = `Page ${currentPage} of ${totalPages}`;
1023
+
1024
+ prevBtn.onclick = () => {
1025
+ if (currentPage > 1) {
1026
+ currentPage--;
1027
+ displayResults();
1028
+ }
1029
+ };
1030
+
1031
+ nextBtn.onclick = () => {
1032
+ if (currentPage < totalPages) {
1033
+ currentPage++;
1034
+ displayResults();
1035
+ }
1036
+ };
1037
+ }
1038
+
1039
+ async function cancelAndDeleteBenchmarkRun(runId) {
1040
+ if (!confirm('Are you sure you want to cancel and delete this running benchmark? This action cannot be undone.')) {
1041
+ return;
1042
+ }
1043
+
1044
+ try {
1045
+ // First cancel the benchmark
1046
+ const cancelResponse = await fetch(`/benchmark/api/cancel/${runId}`, {
1047
+ method: 'POST',
1048
+ headers: {
1049
+ 'Content-Type': 'application/json',
1050
+ }
1051
+ });
1052
+
1053
+ const cancelData = await cancelResponse.json();
1054
+
1055
+ if (cancelData.success) {
1056
+ showAlert('Benchmark cancelled successfully. Deleting...', 'info');
1057
+
1058
+ // Wait a moment for cancellation to process
1059
+ await new Promise(resolve => setTimeout(resolve, 1000));
1060
+
1061
+ // Then delete it
1062
+ await deleteBenchmarkRun(runId);
1063
+ } else {
1064
+ showAlert('Error cancelling benchmark: ' + cancelData.error, 'error');
1065
+ }
1066
+ } catch (error) {
1067
+ console.error('Error cancelling benchmark:', error);
1068
+ showAlert('Error cancelling benchmark: ' + error.message, 'error');
1069
+ }
1070
+ }
1071
+
1072
+ async function downloadBenchmarkYAML(runId) {
1073
+ try {
1074
+ // Find the run in our local data
1075
+ const run = benchmarkRuns.find(r => r.id === runId);
1076
+ if (!run) {
1077
+ showAlert('Benchmark run not found', 'error');
1078
+ return;
1079
+ }
1080
+
1081
+ // Get current date for filename
1082
+ const date = new Date().toISOString().split('T')[0];
1083
+
1084
+ // Get app version from meta tag
1085
+ const appVersion = document.querySelector('meta[name="app-version"]')?.content || 'Could not fetch version';
1086
+
1087
+ // Extract model name and clean it for filename
1088
+ const modelName = run.search_config?.model_name || 'unknown-model';
1089
+ const cleanModelName = modelName.replace(/[^a-zA-Z0-9.-]/g, '-').toLowerCase();
1090
+
1091
+ // Get all relevant settings from database
1092
+ let localContextWindow = 'Could not fetch';
1093
+ let maxTokens = 'Could not fetch';
1094
+ let contextWindowUnrestricted = 'Could not fetch';
1095
+ let contextWindowSize = 'Could not fetch';
1096
+ let supportsMaxTokens = 'Could not fetch';
1097
+
1098
+ try {
1099
+ const settingsResponse = await fetch('/settings/api');
1100
+ if (settingsResponse.ok) {
1101
+ const data = await settingsResponse.json();
1102
+ if (data.status === 'success' && data.settings) {
1103
+ const settings = data.settings;
1104
+ // LLM settings - extract the 'value' property from each setting object
1105
+ localContextWindow = settings['llm.local_context_window_size']?.value || 'Could not fetch';
1106
+ maxTokens = settings['llm.max_tokens']?.value || 'Could not fetch';
1107
+ contextWindowUnrestricted = settings['llm.context_window_unrestricted']?.value !== undefined ?
1108
+ (settings['llm.context_window_unrestricted'].value ? 'Yes' : 'No') : 'Could not fetch';
1109
+ contextWindowSize = settings['llm.context_window_size']?.value || 'Could not fetch';
1110
+ supportsMaxTokens = settings['llm.supports_max_tokens']?.value !== undefined ?
1111
+ (settings['llm.supports_max_tokens'].value ? 'Yes' : 'No') : 'Could not fetch';
1112
+ }
1113
+ }
1114
+ } catch (e) {
1115
+ console.error('Could not fetch current settings:', e);
1116
+ }
1117
+
1118
+ // Calculate average search results if available
1119
+ const avgSearchResults = formatAvgSearchResults(run).replace(' results', '');
1120
+ const searchResultsNum = avgSearchResults !== 'N/A' ? avgSearchResults : '# Please fill in';
1121
+
1122
+ // Generate YAML content
1123
+ const yamlContent = `# Benchmark Result
1124
+ # Generated from Local Deep Research v${appVersion}
1125
+ # Date: ${date}
1126
+
1127
+ # Model Information
1128
+ model: ${modelName}
1129
+ model_provider: ${run.search_config?.provider || 'unknown'}
1130
+ quantization: # Please fill in if applicable
1131
+
1132
+ # Search Engine (critical for benchmark reproducibility)
1133
+ search_engine: ${run.search_config?.search_tool || 'unknown'}
1134
+ search_provider_version: # if known, e.g., "latest", "2024.1.0"
1135
+ average_results_per_query: ${searchResultsNum}
1136
+
1137
+ # Hardware
1138
+ hardware:
1139
+ gpu: # Please fill in
1140
+ ram: # Please fill in
1141
+ cpu: # Please fill in
1142
+
1143
+ # Benchmark Results
1144
+ results:
1145
+ dataset: SimpleQA
1146
+ total_questions: ${run.total_examples}
1147
+
1148
+ ${run.search_config?.search_strategy === 'focused_iteration' ? 'focused_iteration' : 'source_based'}:
1149
+ accuracy: ${run.overall_accuracy ? run.overall_accuracy.toFixed(1) : 0}% (${Math.round(run.overall_accuracy * run.total_examples / 100)}/${run.total_examples})
1150
+ iterations: ${run.search_config?.iterations || 'N/A'}
1151
+ questions_per_iteration: ${run.search_config?.questions_per_iteration || 'N/A'}
1152
+ avg_time_per_question: ${formatAvgProcessingTime(run)}
1153
+ total_tokens_used: # if available
1154
+
1155
+ # Configuration
1156
+ configuration:
1157
+ context_window: ${localContextWindow} # Current setting at download time - may differ from benchmark run
1158
+ temperature: ${run.search_config?.temperature || 'N/A'}
1159
+ max_tokens: ${maxTokens} # Current setting at download time
1160
+ local_provider_context_window_size: ${localContextWindow} # Current setting at download time
1161
+ context_window_unrestricted: ${contextWindowUnrestricted} # Current setting at download time
1162
+
1163
+ # Versions
1164
+ versions:
1165
+ ldr_version: ${appVersion}
1166
+ ollama_version: # if applicable
1167
+
1168
+ # Test Details
1169
+ test_details:
1170
+ date_tested: ${date}
1171
+ rate_limiting_issues: # yes/no
1172
+ search_failures: # number of failed searches, if any
1173
+
1174
+ # Notes
1175
+ notes: |
1176
+ # Add any observations, errors, or insights here
1177
+ # Search strategy: ${run.search_config?.search_strategy || 'unknown'}
1178
+ # Provider: ${run.search_config?.provider || 'unknown'}
1179
+ # Note: Configuration values are from current settings at download time,
1180
+ # not necessarily the values used during the benchmark run
1181
+ `;
1182
+
1183
+ // Create blob and download
1184
+ const blob = new Blob([yamlContent], { type: 'text/yaml' });
1185
+ const url = window.URL.createObjectURL(blob);
1186
+ const a = document.createElement('a');
1187
+ a.style.display = 'none';
1188
+ a.href = url;
1189
+ a.download = `${cleanModelName}_${date}.yaml`;
1190
+ document.body.appendChild(a);
1191
+ a.click();
1192
+ window.URL.revokeObjectURL(url);
1193
+ document.body.removeChild(a);
1194
+
1195
+ showAlert('Benchmark YAML downloaded! Hardware details are optional but helpful for performance context.', 'success');
1196
+ } catch (error) {
1197
+ console.error('Error downloading YAML:', error);
1198
+ showAlert('Error downloading YAML: ' + error.message, 'error');
1199
+ }
1200
+ }
1201
+
1202
+ async function deleteBenchmarkRun(runId) {
1203
+ try {
1204
+ const response = await fetch(`/benchmark/api/delete/${runId}`, {
1205
+ method: 'DELETE',
1206
+ headers: {
1207
+ 'Content-Type': 'application/json',
1208
+ }
1209
+ });
1210
+
1211
+ const data = await response.json();
1212
+
1213
+ if (data.success) {
1214
+ // Show success message
1215
+ showAlert('Benchmark run deleted successfully!', 'success');
1216
+
1217
+ // Remove the run from our local data
1218
+ benchmarkRuns = benchmarkRuns.filter(run => run.id !== runId);
1219
+
1220
+ // Reapply filters and redisplay
1221
+ applyFilters();
1222
+ } else {
1223
+ showAlert('Error deleting benchmark run: ' + data.error, 'error');
1224
+ }
1225
+ } catch (error) {
1226
+ console.error('Error deleting benchmark run:', error);
1227
+ showAlert('Error deleting benchmark run: ' + error.message, 'error');
1228
+ }
1229
+ }
1230
+
1231
+ function showAlert(message, type) {
1232
+ // Create alert element
1233
+ const alertDiv = document.createElement('div');
1234
+ alertDiv.className = `alert alert-${type}`;
1235
+ alertDiv.style.cssText = `
1236
+ position: fixed;
1237
+ top: 20px;
1238
+ right: 20px;
1239
+ z-index: 1000;
1240
+ max-width: 400px;
1241
+ padding: 15px;
1242
+ border-radius: 6px;
1243
+ color: white;
1244
+ font-weight: 500;
1245
+ box-shadow: 0 4px 12px rgba(0,0,0,0.3);
1246
+ `;
1247
+
1248
+ // Set background color based on type
1249
+ const colors = {
1250
+ success: '#4caf50',
1251
+ error: '#f44336',
1252
+ warning: '#ff9800',
1253
+ info: '#2196f3'
1254
+ };
1255
+ alertDiv.style.backgroundColor = colors[type] || colors.info;
1256
+
1257
+ alertDiv.innerHTML = `
1258
+ <div style="display: flex; align-items: center; justify-content: space-between;">
1259
+ <span>${message}</span>
1260
+ <button onclick="this.parentElement.parentElement.remove()" style="background: none; border: none; color: white; font-size: 1.2rem; cursor: pointer; margin-left: 10px;">&times;</button>
1261
+ </div>
1262
+ `;
1263
+
1264
+ document.body.appendChild(alertDiv);
1265
+
1266
+ // Auto-remove after 5 seconds
1267
+ setTimeout(() => {
1268
+ if (alertDiv.parentElement) {
1269
+ alertDiv.remove();
1270
+ }
1271
+ }, 5000);
1272
+ }
1273
+ </script>
1274
+ {% endblock %}