openadapt-ml 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openadapt_ml/benchmarks/__init__.py +8 -0
- openadapt_ml/benchmarks/agent.py +90 -11
- openadapt_ml/benchmarks/azure.py +35 -6
- openadapt_ml/benchmarks/cli.py +4449 -201
- openadapt_ml/benchmarks/live_tracker.py +180 -0
- openadapt_ml/benchmarks/runner.py +41 -4
- openadapt_ml/benchmarks/viewer.py +1219 -0
- openadapt_ml/benchmarks/vm_monitor.py +610 -0
- openadapt_ml/benchmarks/waa.py +61 -4
- openadapt_ml/benchmarks/waa_deploy/Dockerfile +222 -0
- openadapt_ml/benchmarks/waa_deploy/__init__.py +10 -0
- openadapt_ml/benchmarks/waa_deploy/api_agent.py +539 -0
- openadapt_ml/benchmarks/waa_deploy/start_waa_server.bat +53 -0
- openadapt_ml/benchmarks/waa_live.py +619 -0
- openadapt_ml/cloud/local.py +1555 -1
- openadapt_ml/cloud/ssh_tunnel.py +553 -0
- openadapt_ml/datasets/next_action.py +87 -68
- openadapt_ml/evals/grounding.py +26 -8
- openadapt_ml/evals/trajectory_matching.py +84 -36
- openadapt_ml/experiments/demo_prompt/__init__.py +19 -0
- openadapt_ml/experiments/demo_prompt/format_demo.py +226 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_20251231_002125.json +83 -0
- openadapt_ml/experiments/demo_prompt/results/experiment_n30_20251231_165958.json +1100 -0
- openadapt_ml/experiments/demo_prompt/results/multistep_20251231_025051.json +182 -0
- openadapt_ml/experiments/demo_prompt/run_experiment.py +531 -0
- openadapt_ml/experiments/waa_demo/__init__.py +10 -0
- openadapt_ml/experiments/waa_demo/demos.py +357 -0
- openadapt_ml/experiments/waa_demo/runner.py +717 -0
- openadapt_ml/experiments/waa_demo/tasks.py +151 -0
- openadapt_ml/export/__init__.py +9 -0
- openadapt_ml/export/__main__.py +6 -0
- openadapt_ml/export/cli.py +89 -0
- openadapt_ml/export/parquet.py +265 -0
- openadapt_ml/ingest/__init__.py +3 -4
- openadapt_ml/ingest/capture.py +89 -81
- openadapt_ml/ingest/loader.py +116 -68
- openadapt_ml/ingest/synthetic.py +221 -159
- openadapt_ml/retrieval/README.md +226 -0
- openadapt_ml/retrieval/USAGE.md +391 -0
- openadapt_ml/retrieval/__init__.py +91 -0
- openadapt_ml/retrieval/demo_retriever.py +817 -0
- openadapt_ml/retrieval/embeddings.py +629 -0
- openadapt_ml/retrieval/index.py +194 -0
- openadapt_ml/retrieval/retriever.py +160 -0
- openadapt_ml/runtime/policy.py +10 -10
- openadapt_ml/schema/__init__.py +104 -0
- openadapt_ml/schema/converters.py +541 -0
- openadapt_ml/schema/episode.py +457 -0
- openadapt_ml/scripts/compare.py +26 -16
- openadapt_ml/scripts/eval_policy.py +4 -5
- openadapt_ml/scripts/prepare_synthetic.py +14 -17
- openadapt_ml/scripts/train.py +81 -70
- openadapt_ml/training/benchmark_viewer.py +3225 -0
- openadapt_ml/training/trainer.py +120 -363
- openadapt_ml/training/trl_trainer.py +354 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/METADATA +102 -60
- openadapt_ml-0.2.0.dist-info/RECORD +86 -0
- openadapt_ml/schemas/__init__.py +0 -53
- openadapt_ml/schemas/sessions.py +0 -122
- openadapt_ml/schemas/validation.py +0 -252
- openadapt_ml-0.1.0.dist-info/RECORD +0 -55
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/WHEEL +0 -0
- {openadapt_ml-0.1.0.dist-info → openadapt_ml-0.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -10,6 +10,2960 @@ import json
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
def _get_background_tasks_panel_css() -> str:
|
|
14
|
+
"""Return CSS for background tasks panel."""
|
|
15
|
+
return '''
|
|
16
|
+
.tasks-panel {
|
|
17
|
+
background: linear-gradient(135deg, rgba(100, 100, 255, 0.1) 0%, rgba(100, 100, 255, 0.05) 100%);
|
|
18
|
+
border: 1px solid rgba(100, 100, 255, 0.3);
|
|
19
|
+
border-radius: 12px;
|
|
20
|
+
padding: 20px 24px;
|
|
21
|
+
margin-bottom: 24px;
|
|
22
|
+
}
|
|
23
|
+
.tasks-header {
|
|
24
|
+
display: flex;
|
|
25
|
+
align-items: center;
|
|
26
|
+
justify-content: space-between;
|
|
27
|
+
margin-bottom: 16px;
|
|
28
|
+
}
|
|
29
|
+
.tasks-title {
|
|
30
|
+
display: flex;
|
|
31
|
+
align-items: center;
|
|
32
|
+
gap: 10px;
|
|
33
|
+
font-size: 1rem;
|
|
34
|
+
font-weight: 600;
|
|
35
|
+
color: #6366f1;
|
|
36
|
+
}
|
|
37
|
+
.tasks-title svg {
|
|
38
|
+
width: 20px;
|
|
39
|
+
height: 20px;
|
|
40
|
+
}
|
|
41
|
+
.tasks-refresh {
|
|
42
|
+
font-size: 0.75rem;
|
|
43
|
+
color: var(--text-muted);
|
|
44
|
+
}
|
|
45
|
+
.task-card {
|
|
46
|
+
background: rgba(0, 0, 0, 0.3);
|
|
47
|
+
border: 1px solid var(--border-color);
|
|
48
|
+
border-radius: 8px;
|
|
49
|
+
padding: 16px;
|
|
50
|
+
margin-bottom: 12px;
|
|
51
|
+
}
|
|
52
|
+
.task-card:last-child {
|
|
53
|
+
margin-bottom: 0;
|
|
54
|
+
}
|
|
55
|
+
.task-card-header {
|
|
56
|
+
display: flex;
|
|
57
|
+
align-items: center;
|
|
58
|
+
gap: 12px;
|
|
59
|
+
margin-bottom: 8px;
|
|
60
|
+
}
|
|
61
|
+
.task-status-indicator {
|
|
62
|
+
width: 12px;
|
|
63
|
+
height: 12px;
|
|
64
|
+
border-radius: 50%;
|
|
65
|
+
flex-shrink: 0;
|
|
66
|
+
}
|
|
67
|
+
.task-status-indicator.running {
|
|
68
|
+
background: #3b82f6;
|
|
69
|
+
animation: pulse-task 2s infinite;
|
|
70
|
+
}
|
|
71
|
+
.task-status-indicator.completed {
|
|
72
|
+
background: #10b981;
|
|
73
|
+
}
|
|
74
|
+
.task-status-indicator.failed {
|
|
75
|
+
background: #ef4444;
|
|
76
|
+
}
|
|
77
|
+
.task-status-indicator.pending {
|
|
78
|
+
background: #f59e0b;
|
|
79
|
+
}
|
|
80
|
+
@keyframes pulse-task {
|
|
81
|
+
0%, 100% { opacity: 1; box-shadow: 0 0 0 0 rgba(59, 130, 246, 0.5); }
|
|
82
|
+
50% { opacity: 0.8; box-shadow: 0 0 0 8px rgba(59, 130, 246, 0); }
|
|
83
|
+
}
|
|
84
|
+
.task-title {
|
|
85
|
+
font-weight: 600;
|
|
86
|
+
font-size: 0.95rem;
|
|
87
|
+
color: var(--text-primary);
|
|
88
|
+
}
|
|
89
|
+
.task-description {
|
|
90
|
+
font-size: 0.85rem;
|
|
91
|
+
color: var(--text-secondary);
|
|
92
|
+
margin-bottom: 12px;
|
|
93
|
+
}
|
|
94
|
+
.task-progress-bar {
|
|
95
|
+
height: 8px;
|
|
96
|
+
background: rgba(255, 255, 255, 0.1);
|
|
97
|
+
border-radius: 4px;
|
|
98
|
+
overflow: hidden;
|
|
99
|
+
margin-bottom: 8px;
|
|
100
|
+
}
|
|
101
|
+
.task-progress-fill {
|
|
102
|
+
height: 100%;
|
|
103
|
+
background: linear-gradient(90deg, #3b82f6, #06b6d4);
|
|
104
|
+
border-radius: 4px;
|
|
105
|
+
transition: width 0.5s ease;
|
|
106
|
+
}
|
|
107
|
+
.task-progress-fill.completed {
|
|
108
|
+
background: linear-gradient(90deg, #10b981, #059669);
|
|
109
|
+
}
|
|
110
|
+
.task-meta {
|
|
111
|
+
display: flex;
|
|
112
|
+
justify-content: space-between;
|
|
113
|
+
font-size: 0.75rem;
|
|
114
|
+
color: var(--text-muted);
|
|
115
|
+
}
|
|
116
|
+
.task-link {
|
|
117
|
+
display: inline-flex;
|
|
118
|
+
align-items: center;
|
|
119
|
+
gap: 4px;
|
|
120
|
+
padding: 4px 8px;
|
|
121
|
+
background: rgba(99, 102, 241, 0.2);
|
|
122
|
+
border: 1px solid rgba(99, 102, 241, 0.4);
|
|
123
|
+
border-radius: 4px;
|
|
124
|
+
color: #818cf8;
|
|
125
|
+
text-decoration: none;
|
|
126
|
+
font-size: 0.75rem;
|
|
127
|
+
margin-top: 8px;
|
|
128
|
+
transition: all 0.2s;
|
|
129
|
+
}
|
|
130
|
+
.task-link:hover {
|
|
131
|
+
background: rgba(99, 102, 241, 0.3);
|
|
132
|
+
transform: translateY(-1px);
|
|
133
|
+
}
|
|
134
|
+
.task-credentials {
|
|
135
|
+
display: flex;
|
|
136
|
+
align-items: center;
|
|
137
|
+
gap: 8px;
|
|
138
|
+
padding: 8px 12px;
|
|
139
|
+
background: rgba(245, 158, 11, 0.15);
|
|
140
|
+
border: 1px solid rgba(245, 158, 11, 0.3);
|
|
141
|
+
border-radius: 6px;
|
|
142
|
+
margin: 8px 0;
|
|
143
|
+
font-size: 0.85rem;
|
|
144
|
+
}
|
|
145
|
+
.task-credentials .cred-label {
|
|
146
|
+
color: #fbbf24;
|
|
147
|
+
}
|
|
148
|
+
.task-credentials code {
|
|
149
|
+
background: rgba(0, 0, 0, 0.3);
|
|
150
|
+
padding: 2px 6px;
|
|
151
|
+
border-radius: 4px;
|
|
152
|
+
font-family: 'SF Mono', Monaco, monospace;
|
|
153
|
+
color: #fcd34d;
|
|
154
|
+
}
|
|
155
|
+
.no-tasks {
|
|
156
|
+
text-align: center;
|
|
157
|
+
padding: 20px;
|
|
158
|
+
color: var(--text-muted);
|
|
159
|
+
font-size: 0.9rem;
|
|
160
|
+
}
|
|
161
|
+
.task-phase-badge {
|
|
162
|
+
margin-left: auto;
|
|
163
|
+
padding: 2px 8px;
|
|
164
|
+
background: rgba(99, 102, 241, 0.2);
|
|
165
|
+
border-radius: 12px;
|
|
166
|
+
font-size: 0.75rem;
|
|
167
|
+
color: #a5b4fc;
|
|
168
|
+
}
|
|
169
|
+
.task-logs-details {
|
|
170
|
+
margin-top: 12px;
|
|
171
|
+
border-top: 1px solid var(--border-color);
|
|
172
|
+
padding-top: 8px;
|
|
173
|
+
}
|
|
174
|
+
.task-logs-summary {
|
|
175
|
+
cursor: pointer;
|
|
176
|
+
font-size: 0.75rem;
|
|
177
|
+
color: var(--text-muted);
|
|
178
|
+
user-select: none;
|
|
179
|
+
}
|
|
180
|
+
.task-logs-summary:hover {
|
|
181
|
+
color: var(--text-secondary);
|
|
182
|
+
}
|
|
183
|
+
.task-logs-content {
|
|
184
|
+
margin-top: 8px;
|
|
185
|
+
padding: 8px;
|
|
186
|
+
background: rgba(0, 0, 0, 0.4);
|
|
187
|
+
border-radius: 4px;
|
|
188
|
+
font-size: 0.7rem;
|
|
189
|
+
line-height: 1.4;
|
|
190
|
+
max-height: 150px;
|
|
191
|
+
overflow-y: auto;
|
|
192
|
+
white-space: pre-wrap;
|
|
193
|
+
word-break: break-all;
|
|
194
|
+
color: #10b981;
|
|
195
|
+
font-family: 'SF Mono', Monaco, 'Cascadia Code', monospace;
|
|
196
|
+
}
|
|
197
|
+
/* VM Details section - using native <details> element to preserve state across re-renders */
|
|
198
|
+
.vm-details-section {
|
|
199
|
+
margin-top: 12px;
|
|
200
|
+
border-top: 1px solid var(--border-color);
|
|
201
|
+
padding-top: 12px;
|
|
202
|
+
}
|
|
203
|
+
.vm-details-summary {
|
|
204
|
+
cursor: pointer;
|
|
205
|
+
font-size: 0.75rem;
|
|
206
|
+
color: var(--text-muted);
|
|
207
|
+
user-select: none;
|
|
208
|
+
display: flex;
|
|
209
|
+
align-items: center;
|
|
210
|
+
gap: 6px;
|
|
211
|
+
padding: 6px 0;
|
|
212
|
+
list-style: none;
|
|
213
|
+
}
|
|
214
|
+
.vm-details-summary::-webkit-details-marker {
|
|
215
|
+
display: none;
|
|
216
|
+
}
|
|
217
|
+
.vm-details-summary:hover {
|
|
218
|
+
color: var(--text-secondary);
|
|
219
|
+
}
|
|
220
|
+
.vm-details-icon {
|
|
221
|
+
transition: transform 0.2s;
|
|
222
|
+
}
|
|
223
|
+
details.vm-details[open] .vm-details-icon {
|
|
224
|
+
transform: rotate(90deg);
|
|
225
|
+
}
|
|
226
|
+
.vm-details-content {
|
|
227
|
+
margin-top: 8px;
|
|
228
|
+
padding: 12px;
|
|
229
|
+
background: rgba(0, 0, 0, 0.3);
|
|
230
|
+
border-radius: 6px;
|
|
231
|
+
font-size: 0.75rem;
|
|
232
|
+
}
|
|
233
|
+
.vm-detail-row {
|
|
234
|
+
display: flex;
|
|
235
|
+
justify-content: space-between;
|
|
236
|
+
align-items: center;
|
|
237
|
+
padding: 6px 0;
|
|
238
|
+
border-bottom: 1px solid rgba(255, 255, 255, 0.05);
|
|
239
|
+
}
|
|
240
|
+
.vm-detail-row:last-child {
|
|
241
|
+
border-bottom: none;
|
|
242
|
+
}
|
|
243
|
+
.vm-detail-label {
|
|
244
|
+
color: var(--text-muted);
|
|
245
|
+
font-weight: 500;
|
|
246
|
+
}
|
|
247
|
+
.vm-detail-value {
|
|
248
|
+
color: var(--text-primary);
|
|
249
|
+
font-family: 'SF Mono', Monaco, monospace;
|
|
250
|
+
}
|
|
251
|
+
.vm-detail-value.success {
|
|
252
|
+
color: #10b981;
|
|
253
|
+
}
|
|
254
|
+
.vm-detail-value.warning {
|
|
255
|
+
color: #f59e0b;
|
|
256
|
+
}
|
|
257
|
+
.vm-detail-value.error {
|
|
258
|
+
color: #ef4444;
|
|
259
|
+
}
|
|
260
|
+
.vm-dependencies-list {
|
|
261
|
+
margin-top: 8px;
|
|
262
|
+
padding: 8px;
|
|
263
|
+
background: rgba(0, 0, 0, 0.2);
|
|
264
|
+
border-radius: 4px;
|
|
265
|
+
}
|
|
266
|
+
.vm-dependency-item {
|
|
267
|
+
display: flex;
|
|
268
|
+
align-items: center;
|
|
269
|
+
gap: 8px;
|
|
270
|
+
padding: 4px 0;
|
|
271
|
+
font-size: 0.7rem;
|
|
272
|
+
}
|
|
273
|
+
.vm-dependency-icon {
|
|
274
|
+
font-size: 1rem;
|
|
275
|
+
}
|
|
276
|
+
.vm-progress-bar {
|
|
277
|
+
width: 100%;
|
|
278
|
+
height: 6px;
|
|
279
|
+
background: rgba(255, 255, 255, 0.1);
|
|
280
|
+
border-radius: 3px;
|
|
281
|
+
overflow: hidden;
|
|
282
|
+
margin: 8px 0;
|
|
283
|
+
}
|
|
284
|
+
.vm-progress-fill {
|
|
285
|
+
height: 100%;
|
|
286
|
+
background: linear-gradient(90deg, #10b981, #059669);
|
|
287
|
+
border-radius: 3px;
|
|
288
|
+
transition: width 0.5s ease;
|
|
289
|
+
}
|
|
290
|
+
'''
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _get_background_tasks_panel_html() -> str:
|
|
294
|
+
"""Return HTML for background tasks panel with JS polling and improved styling."""
|
|
295
|
+
return '''
|
|
296
|
+
<div class="tasks-panel" id="tasks-panel">
|
|
297
|
+
<div class="tasks-header">
|
|
298
|
+
<div class="tasks-title">
|
|
299
|
+
<svg viewBox="0 0 24 24" fill="currentColor">
|
|
300
|
+
<path d="M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zm-5 14H7v-2h7v2zm3-4H7v-2h10v2zm0-4H7V7h10v2z"/>
|
|
301
|
+
</svg>
|
|
302
|
+
Background Tasks
|
|
303
|
+
</div>
|
|
304
|
+
<div style="display: flex; align-items: center; gap: 12px;">
|
|
305
|
+
<span class="tasks-refresh" id="tasks-refresh-time">Checking...</span>
|
|
306
|
+
<button class="refresh-btn" onclick="refreshBackgroundTasks()" title="Refresh tasks" id="tasks-refresh-btn" style="background: rgba(99, 102, 241, 0.2); border-color: rgba(99, 102, 241, 0.4);">
|
|
307
|
+
<span class="refresh-icon">↻</span>
|
|
308
|
+
<span class="spinner" style="border-top-color: #6366f1;"></span>
|
|
309
|
+
Refresh
|
|
310
|
+
</button>
|
|
311
|
+
</div>
|
|
312
|
+
</div>
|
|
313
|
+
|
|
314
|
+
<!-- API Error Banner -->
|
|
315
|
+
<div class="api-error-banner" id="tasks-api-error" style="display: none;">
|
|
316
|
+
<span class="error-icon">!</span>
|
|
317
|
+
<span class="error-message" id="tasks-error-msg">Failed to fetch tasks</span>
|
|
318
|
+
<button class="retry-btn" onclick="refreshBackgroundTasks()">Retry</button>
|
|
319
|
+
</div>
|
|
320
|
+
|
|
321
|
+
<!-- Loading state -->
|
|
322
|
+
<div id="tasks-loading" style="display: none; text-align: center; padding: 30px;">
|
|
323
|
+
<div style="display: inline-block; width: 24px; height: 24px; border: 3px solid rgba(99,102,241,0.3); border-top-color: #6366f1; border-radius: 50%; animation: spin 1s linear infinite;"></div>
|
|
324
|
+
<div style="margin-top: 12px; color: var(--text-muted); font-size: 0.85rem;">Loading tasks...</div>
|
|
325
|
+
</div>
|
|
326
|
+
|
|
327
|
+
<div id="tasks-list">
|
|
328
|
+
<div class="no-tasks">
|
|
329
|
+
<div style="font-size: 2rem; margin-bottom: 12px; opacity: 0.5;">📋</div>
|
|
330
|
+
Checking for active tasks...
|
|
331
|
+
</div>
|
|
332
|
+
</div>
|
|
333
|
+
</div>
|
|
334
|
+
|
|
335
|
+
<script>
|
|
336
|
+
let isTasksRefreshing = false;
|
|
337
|
+
let tasksErrorCount = 0;
|
|
338
|
+
|
|
339
|
+
function setTasksLoadingState(loading) {
|
|
340
|
+
const loadingEl = document.getElementById('tasks-loading');
|
|
341
|
+
const listEl = document.getElementById('tasks-list');
|
|
342
|
+
const btn = document.getElementById('tasks-refresh-btn');
|
|
343
|
+
|
|
344
|
+
if (loading) {
|
|
345
|
+
loadingEl.style.display = 'block';
|
|
346
|
+
listEl.style.display = 'none';
|
|
347
|
+
if (btn) btn.classList.add('loading');
|
|
348
|
+
} else {
|
|
349
|
+
loadingEl.style.display = 'none';
|
|
350
|
+
listEl.style.display = 'block';
|
|
351
|
+
if (btn) btn.classList.remove('loading');
|
|
352
|
+
}
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function showTasksError(msg) {
|
|
356
|
+
const errorEl = document.getElementById('tasks-api-error');
|
|
357
|
+
const errorMsgEl = document.getElementById('tasks-error-msg');
|
|
358
|
+
if (errorEl && errorMsgEl) {
|
|
359
|
+
errorMsgEl.textContent = msg;
|
|
360
|
+
errorEl.style.display = 'flex';
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
function hideTasksError() {
|
|
365
|
+
const errorEl = document.getElementById('tasks-api-error');
|
|
366
|
+
if (errorEl) errorEl.style.display = 'none';
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
async function refreshBackgroundTasks() {
|
|
370
|
+
if (isTasksRefreshing) return;
|
|
371
|
+
isTasksRefreshing = true;
|
|
372
|
+
setTasksLoadingState(true);
|
|
373
|
+
hideTasksError();
|
|
374
|
+
|
|
375
|
+
try {
|
|
376
|
+
const response = await fetch('/api/tasks?' + Date.now());
|
|
377
|
+
if (!response.ok) throw new Error('HTTP ' + response.status);
|
|
378
|
+
const tasks = await response.json();
|
|
379
|
+
if (tasks.error) throw new Error(tasks.error);
|
|
380
|
+
|
|
381
|
+
renderBackgroundTasks(tasks);
|
|
382
|
+
tasksErrorCount = 0;
|
|
383
|
+
document.getElementById('tasks-refresh-time').textContent =
|
|
384
|
+
'Updated ' + new Date().toLocaleTimeString();
|
|
385
|
+
} catch (e) {
|
|
386
|
+
console.error('Tasks refresh failed:', e);
|
|
387
|
+
tasksErrorCount++;
|
|
388
|
+
showTasksError(e.message || 'Connection failed');
|
|
389
|
+
} finally {
|
|
390
|
+
isTasksRefreshing = false;
|
|
391
|
+
setTasksLoadingState(false);
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
async function fetchBackgroundTasks() {
|
|
396
|
+
if (isTasksRefreshing) return;
|
|
397
|
+
if (tasksErrorCount >= 3) {
|
|
398
|
+
document.getElementById('tasks-refresh-time').textContent = 'Polling paused';
|
|
399
|
+
return;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
try {
|
|
403
|
+
const response = await fetch('/api/tasks?' + Date.now());
|
|
404
|
+
if (response.ok) {
|
|
405
|
+
const tasks = await response.json();
|
|
406
|
+
if (!tasks.error) {
|
|
407
|
+
renderBackgroundTasks(tasks);
|
|
408
|
+
hideTasksError();
|
|
409
|
+
tasksErrorCount = 0;
|
|
410
|
+
document.getElementById('tasks-refresh-time').textContent =
|
|
411
|
+
'Updated ' + new Date().toLocaleTimeString();
|
|
412
|
+
}
|
|
413
|
+
}
|
|
414
|
+
} catch (e) {
|
|
415
|
+
console.log('Tasks API unavailable:', e);
|
|
416
|
+
tasksErrorCount++;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function renderVMDetails(metadata) {
|
|
421
|
+
if (!metadata) return '';
|
|
422
|
+
|
|
423
|
+
const statusClass = (value, type = 'default') => {
|
|
424
|
+
if (type === 'probe') {
|
|
425
|
+
return value && value !== 'Not responding' && value !== 'Connection failed' ? 'success' : 'error';
|
|
426
|
+
} else if (type === 'qmp') {
|
|
427
|
+
return value ? 'success' : 'warning';
|
|
428
|
+
}
|
|
429
|
+
return '';
|
|
430
|
+
};
|
|
431
|
+
|
|
432
|
+
const renderDependencies = (deps) => {
|
|
433
|
+
if (!deps || deps.length === 0) return '';
|
|
434
|
+
|
|
435
|
+
const statusIcons = {
|
|
436
|
+
'complete': '✓',
|
|
437
|
+
'installing': '⏳',
|
|
438
|
+
'pending': '○'
|
|
439
|
+
};
|
|
440
|
+
|
|
441
|
+
return `
|
|
442
|
+
<div class="vm-detail-row">
|
|
443
|
+
<div class="vm-detail-label">Dependencies</div>
|
|
444
|
+
</div>
|
|
445
|
+
<div class="vm-dependencies-list">
|
|
446
|
+
${deps.map(dep => `
|
|
447
|
+
<div class="vm-dependency-item">
|
|
448
|
+
<span class="vm-dependency-icon">${dep.icon || '📦'}</span>
|
|
449
|
+
<span>${statusIcons[dep.status] || '○'} ${dep.name}</span>
|
|
450
|
+
</div>
|
|
451
|
+
`).join('')}
|
|
452
|
+
</div>
|
|
453
|
+
`;
|
|
454
|
+
};
|
|
455
|
+
|
|
456
|
+
// Use native <details> element to preserve expanded state across SSE re-renders
|
|
457
|
+
return `
|
|
458
|
+
<div class="vm-details-section">
|
|
459
|
+
<details class="vm-details">
|
|
460
|
+
<summary class="vm-details-summary">
|
|
461
|
+
<span class="vm-details-icon">▶</span>
|
|
462
|
+
<span>VM Details</span>
|
|
463
|
+
</summary>
|
|
464
|
+
<div class="vm-details-content">
|
|
465
|
+
${metadata.setup_script_phase ? `
|
|
466
|
+
<div class="vm-detail-row">
|
|
467
|
+
<div class="vm-detail-label">Setup Phase</div>
|
|
468
|
+
<div class="vm-detail-value">${metadata.setup_script_phase}</div>
|
|
469
|
+
</div>
|
|
470
|
+
` : ''}
|
|
471
|
+
${metadata.disk_usage_gb ? `
|
|
472
|
+
<div class="vm-detail-row">
|
|
473
|
+
<div class="vm-detail-label">Disk Usage</div>
|
|
474
|
+
<div class="vm-detail-value">${metadata.disk_usage_gb}</div>
|
|
475
|
+
</div>
|
|
476
|
+
` : ''}
|
|
477
|
+
${metadata.memory_usage_mb ? `
|
|
478
|
+
<div class="vm-detail-row">
|
|
479
|
+
<div class="vm-detail-label">Memory Usage</div>
|
|
480
|
+
<div class="vm-detail-value">${metadata.memory_usage_mb}</div>
|
|
481
|
+
</div>
|
|
482
|
+
` : ''}
|
|
483
|
+
${metadata.probe_response !== undefined ? `
|
|
484
|
+
<div class="vm-detail-row">
|
|
485
|
+
<div class="vm-detail-label">WAA Server (/probe)</div>
|
|
486
|
+
<div class="vm-detail-value ${statusClass(metadata.probe_response, 'probe')}">
|
|
487
|
+
${metadata.probe_response}
|
|
488
|
+
</div>
|
|
489
|
+
</div>
|
|
490
|
+
` : ''}
|
|
491
|
+
${metadata.qmp_connected !== undefined ? `
|
|
492
|
+
<div class="vm-detail-row">
|
|
493
|
+
<div class="vm-detail-label">QMP (port 7200)</div>
|
|
494
|
+
<div class="vm-detail-value ${statusClass(metadata.qmp_connected, 'qmp')}">
|
|
495
|
+
${metadata.qmp_connected ? 'Connected ✓' : 'Not connected'}
|
|
496
|
+
</div>
|
|
497
|
+
</div>
|
|
498
|
+
` : ''}
|
|
499
|
+
${renderDependencies(metadata.dependencies)}
|
|
500
|
+
</div>
|
|
501
|
+
</details>
|
|
502
|
+
</div>
|
|
503
|
+
`;
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
// Track expanded states for VM Details and logs panels across page refreshes
|
|
507
|
+
// Uses localStorage to persist states across browser reloads
|
|
508
|
+
// Key: task_id, Value: { vmDetailsExpanded: bool, logsExpanded: bool }
|
|
509
|
+
const STORAGE_KEY = 'openadapt_task_expanded_states';
|
|
510
|
+
|
|
511
|
+
function getTaskExpandedStates() {
|
|
512
|
+
try {
|
|
513
|
+
const stored = localStorage.getItem(STORAGE_KEY);
|
|
514
|
+
return stored ? JSON.parse(stored) : {};
|
|
515
|
+
} catch (e) {
|
|
516
|
+
console.warn('Failed to load expanded states from localStorage:', e);
|
|
517
|
+
return {};
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
function saveTaskExpandedStates() {
|
|
522
|
+
const taskExpandedStates = getTaskExpandedStates();
|
|
523
|
+
|
|
524
|
+
// First, clear all expanded states (we'll re-add the currently expanded ones)
|
|
525
|
+
// This handles the case where a user collapses a panel
|
|
526
|
+
for (const key of Object.keys(taskExpandedStates)) {
|
|
527
|
+
taskExpandedStates[key].vmDetailsExpanded = false;
|
|
528
|
+
taskExpandedStates[key].logsExpanded = false;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
// Save VM Details expanded states (using native <details> element)
|
|
532
|
+
document.querySelectorAll('details.vm-details[open]').forEach(details => {
|
|
533
|
+
const card = details.closest('.task-card');
|
|
534
|
+
if (card) {
|
|
535
|
+
const taskTitle = card.querySelector('.task-title')?.textContent || '';
|
|
536
|
+
if (taskTitle) {
|
|
537
|
+
if (!taskExpandedStates[taskTitle]) taskExpandedStates[taskTitle] = {};
|
|
538
|
+
taskExpandedStates[taskTitle].vmDetailsExpanded = true;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
});
|
|
542
|
+
|
|
543
|
+
// Save logs details expanded states
|
|
544
|
+
document.querySelectorAll('.task-logs-details[open]').forEach(details => {
|
|
545
|
+
const card = details.closest('.task-card');
|
|
546
|
+
if (card) {
|
|
547
|
+
const taskTitle = card.querySelector('.task-title')?.textContent || '';
|
|
548
|
+
if (taskTitle) {
|
|
549
|
+
if (!taskExpandedStates[taskTitle]) taskExpandedStates[taskTitle] = {};
|
|
550
|
+
taskExpandedStates[taskTitle].logsExpanded = true;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
});
|
|
554
|
+
|
|
555
|
+
// Persist to localStorage
|
|
556
|
+
try {
|
|
557
|
+
localStorage.setItem(STORAGE_KEY, JSON.stringify(taskExpandedStates));
|
|
558
|
+
} catch (e) {
|
|
559
|
+
console.warn('Failed to save expanded states to localStorage:', e);
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
function restoreTaskExpandedStates() {
|
|
564
|
+
const taskExpandedStates = getTaskExpandedStates();
|
|
565
|
+
|
|
566
|
+
// Restore VM Details expanded states (using native <details> element)
|
|
567
|
+
document.querySelectorAll('.task-card').forEach(card => {
|
|
568
|
+
const taskTitle = card.querySelector('.task-title')?.textContent || '';
|
|
569
|
+
const state = taskExpandedStates[taskTitle];
|
|
570
|
+
if (state) {
|
|
571
|
+
if (state.vmDetailsExpanded) {
|
|
572
|
+
const details = card.querySelector('details.vm-details');
|
|
573
|
+
if (details) details.open = true;
|
|
574
|
+
}
|
|
575
|
+
if (state.logsExpanded) {
|
|
576
|
+
const details = card.querySelector('.task-logs-details');
|
|
577
|
+
if (details) details.open = true;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
});
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
function renderBackgroundTasks(tasks) {
|
|
584
|
+
const container = document.getElementById('tasks-list');
|
|
585
|
+
|
|
586
|
+
// Debug: Log incoming tasks data
|
|
587
|
+
console.log('[SSE Debug] renderBackgroundTasks called with:', JSON.stringify(tasks, null, 2));
|
|
588
|
+
|
|
589
|
+
// Save expanded states before replacing DOM
|
|
590
|
+
saveTaskExpandedStates();
|
|
591
|
+
|
|
592
|
+
if (!tasks || tasks.length === 0) {
|
|
593
|
+
container.innerHTML = '<div class="no-tasks">No active background tasks</div>';
|
|
594
|
+
return;
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
const phaseLabels = {
|
|
598
|
+
'downloading': '⬇️ Downloading',
|
|
599
|
+
'extracting': '📦 Extracting',
|
|
600
|
+
'configuring': '⚙️ Configuring',
|
|
601
|
+
'building': '🔨 Building',
|
|
602
|
+
'booting': '🚀 Booting',
|
|
603
|
+
'oobe': '🪟 Windows Setup',
|
|
604
|
+
'ready': '✅ Ready',
|
|
605
|
+
'unknown': '⏳ Starting'
|
|
606
|
+
};
|
|
607
|
+
|
|
608
|
+
const html = tasks.map(task => {
|
|
609
|
+
const statusClass = task.status || 'pending';
|
|
610
|
+
const progressPercent = task.progress_percent || 0;
|
|
611
|
+
const progressClass = task.status === 'completed' ? 'completed' : '';
|
|
612
|
+
|
|
613
|
+
// Determine phase: use task.phase, fall back to metadata.phase,
|
|
614
|
+
// then if status is 'completed' use 'ready', otherwise 'unknown'
|
|
615
|
+
let phase = task.phase || task.metadata?.phase;
|
|
616
|
+
if (!phase) {
|
|
617
|
+
// If no phase specified, infer from status to prevent "Starting" + "completed" conflict
|
|
618
|
+
phase = (task.status === 'completed') ? 'ready' : 'unknown';
|
|
619
|
+
}
|
|
620
|
+
const phaseLabel = phaseLabels[phase] || phase;
|
|
621
|
+
|
|
622
|
+
// Debug: Log per-task phase/status mapping
|
|
623
|
+
console.log(`[SSE Debug] Task ${task.task_id}: status=${task.status}, phase=${task.phase}, resolvedPhase=${phase}, phaseLabel=${phaseLabel}`);
|
|
624
|
+
|
|
625
|
+
// Build link if VNC URL available
|
|
626
|
+
let linkHtml = '';
|
|
627
|
+
if (task.metadata && task.metadata.vnc_url) {
|
|
628
|
+
linkHtml = `<a href="${task.metadata.vnc_url}" target="_blank" class="task-link">
|
|
629
|
+
Open VNC →
|
|
630
|
+
</a>`;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// Show Windows credentials if available
|
|
634
|
+
let credentialsHtml = '';
|
|
635
|
+
if (task.metadata && task.metadata.windows_username) {
|
|
636
|
+
credentialsHtml = `
|
|
637
|
+
<div class="task-credentials">
|
|
638
|
+
<span class="cred-label">🔑 Login:</span>
|
|
639
|
+
<code>${task.metadata.windows_username}</code> /
|
|
640
|
+
<code>${task.metadata.windows_password || '(empty)'}</code>
|
|
641
|
+
</div>
|
|
642
|
+
`;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
// Add expandable logs if available
|
|
646
|
+
let logsHtml = '';
|
|
647
|
+
if (task.metadata && task.metadata.recent_logs) {
|
|
648
|
+
const taskId = task.task_id.replace(/[^a-z0-9]/gi, '_');
|
|
649
|
+
logsHtml = `
|
|
650
|
+
<details class="task-logs-details">
|
|
651
|
+
<summary class="task-logs-summary">Show recent logs</summary>
|
|
652
|
+
<pre class="task-logs-content">${task.metadata.recent_logs.replace(/</g, '<').replace(/>/g, '>')}</pre>
|
|
653
|
+
</details>
|
|
654
|
+
`;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
// Add VM Details expandable section for Windows containers
|
|
658
|
+
let vmDetailsHtml = '';
|
|
659
|
+
if (task.task_type === 'docker_container' && task.metadata) {
|
|
660
|
+
vmDetailsHtml = renderVMDetails(task.metadata);
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// Progress label clarifies what % means
|
|
664
|
+
// Use a single unified status display to avoid showing conflicting states
|
|
665
|
+
let progressLabel;
|
|
666
|
+
if (task.status === 'completed' || phase === 'ready') {
|
|
667
|
+
progressLabel = 'Complete';
|
|
668
|
+
} else {
|
|
669
|
+
progressLabel = `Setup phase progress: ${progressPercent.toFixed(0)}%`;
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
return `
|
|
673
|
+
<div class="task-card">
|
|
674
|
+
<div class="task-card-header">
|
|
675
|
+
<div class="task-status-indicator ${statusClass}"></div>
|
|
676
|
+
<span class="task-title">${task.title || 'Unknown Task'}</span>
|
|
677
|
+
<span class="task-phase-badge">${phaseLabel}</span>
|
|
678
|
+
</div>
|
|
679
|
+
<div class="task-description">${task.description || ''}</div>
|
|
680
|
+
<div class="task-progress-bar">
|
|
681
|
+
<div class="task-progress-fill ${progressClass}" style="width: ${progressPercent}%"></div>
|
|
682
|
+
</div>
|
|
683
|
+
<div class="task-meta">
|
|
684
|
+
<span>${progressLabel}</span>
|
|
685
|
+
</div>
|
|
686
|
+
${credentialsHtml}
|
|
687
|
+
${linkHtml}
|
|
688
|
+
${vmDetailsHtml}
|
|
689
|
+
${logsHtml}
|
|
690
|
+
</div>
|
|
691
|
+
`;
|
|
692
|
+
}).join('');
|
|
693
|
+
|
|
694
|
+
container.innerHTML = html;
|
|
695
|
+
|
|
696
|
+
// Restore expanded states after DOM update
|
|
697
|
+
restoreTaskExpandedStates();
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
// Initial fetch and poll every 10 seconds
|
|
701
|
+
fetchBackgroundTasks();
|
|
702
|
+
setInterval(fetchBackgroundTasks, 10000);
|
|
703
|
+
</script>
|
|
704
|
+
'''
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def _get_live_evaluation_panel_css() -> str:
|
|
708
|
+
"""Return CSS for live evaluation progress panel."""
|
|
709
|
+
return '''
|
|
710
|
+
.live-eval-panel {
|
|
711
|
+
background: linear-gradient(135deg, rgba(139, 92, 246, 0.15) 0%, rgba(139, 92, 246, 0.05) 100%);
|
|
712
|
+
border: 1px solid rgba(139, 92, 246, 0.3);
|
|
713
|
+
border-radius: 12px;
|
|
714
|
+
padding: 20px 24px;
|
|
715
|
+
margin-bottom: 24px;
|
|
716
|
+
}
|
|
717
|
+
.live-eval-header {
|
|
718
|
+
display: flex;
|
|
719
|
+
align-items: center;
|
|
720
|
+
justify-content: space-between;
|
|
721
|
+
margin-bottom: 16px;
|
|
722
|
+
}
|
|
723
|
+
.live-eval-title {
|
|
724
|
+
display: flex;
|
|
725
|
+
align-items: center;
|
|
726
|
+
gap: 10px;
|
|
727
|
+
font-size: 1rem;
|
|
728
|
+
font-weight: 600;
|
|
729
|
+
color: #8b5cf6;
|
|
730
|
+
}
|
|
731
|
+
.live-eval-title svg {
|
|
732
|
+
width: 20px;
|
|
733
|
+
height: 20px;
|
|
734
|
+
}
|
|
735
|
+
.live-eval-refresh {
|
|
736
|
+
font-size: 0.75rem;
|
|
737
|
+
color: var(--text-muted);
|
|
738
|
+
}
|
|
739
|
+
.live-eval-status {
|
|
740
|
+
padding: 12px 16px;
|
|
741
|
+
background: rgba(0, 0, 0, 0.3);
|
|
742
|
+
border-radius: 8px;
|
|
743
|
+
margin-bottom: 12px;
|
|
744
|
+
}
|
|
745
|
+
.live-eval-progress {
|
|
746
|
+
font-size: 0.95rem;
|
|
747
|
+
color: var(--text-primary);
|
|
748
|
+
font-weight: 600;
|
|
749
|
+
margin-bottom: 8px;
|
|
750
|
+
}
|
|
751
|
+
.live-eval-task-name {
|
|
752
|
+
font-size: 0.85rem;
|
|
753
|
+
color: var(--text-secondary);
|
|
754
|
+
margin-bottom: 4px;
|
|
755
|
+
}
|
|
756
|
+
.live-eval-step {
|
|
757
|
+
padding: 12px;
|
|
758
|
+
background: var(--bg-tertiary);
|
|
759
|
+
border: 1px solid var(--border-color);
|
|
760
|
+
border-radius: 6px;
|
|
761
|
+
margin-bottom: 8px;
|
|
762
|
+
}
|
|
763
|
+
.live-eval-step-header {
|
|
764
|
+
display: flex;
|
|
765
|
+
align-items: center;
|
|
766
|
+
gap: 12px;
|
|
767
|
+
margin-bottom: 8px;
|
|
768
|
+
}
|
|
769
|
+
.live-eval-step-number {
|
|
770
|
+
font-weight: 600;
|
|
771
|
+
color: var(--accent);
|
|
772
|
+
min-width: 60px;
|
|
773
|
+
}
|
|
774
|
+
.live-eval-action {
|
|
775
|
+
flex: 1;
|
|
776
|
+
font-family: "SF Mono", Monaco, monospace;
|
|
777
|
+
font-size: 0.85rem;
|
|
778
|
+
color: var(--text-primary);
|
|
779
|
+
}
|
|
780
|
+
.live-eval-screenshot {
|
|
781
|
+
max-width: 300px;
|
|
782
|
+
border-radius: 4px;
|
|
783
|
+
border: 1px solid var(--border-color);
|
|
784
|
+
margin: 8px 0;
|
|
785
|
+
}
|
|
786
|
+
.live-eval-reasoning {
|
|
787
|
+
font-size: 0.8rem;
|
|
788
|
+
color: var(--text-secondary);
|
|
789
|
+
font-style: italic;
|
|
790
|
+
margin-top: 8px;
|
|
791
|
+
padding: 8px;
|
|
792
|
+
background: rgba(0, 0, 0, 0.2);
|
|
793
|
+
border-radius: 4px;
|
|
794
|
+
}
|
|
795
|
+
.live-eval-result {
|
|
796
|
+
display: inline-flex;
|
|
797
|
+
align-items: center;
|
|
798
|
+
gap: 6px;
|
|
799
|
+
padding: 4px 10px;
|
|
800
|
+
border-radius: 4px;
|
|
801
|
+
font-size: 0.75rem;
|
|
802
|
+
font-weight: 600;
|
|
803
|
+
}
|
|
804
|
+
.live-eval-result.success {
|
|
805
|
+
background: rgba(16, 185, 129, 0.2);
|
|
806
|
+
color: #10b981;
|
|
807
|
+
}
|
|
808
|
+
.live-eval-result.failure {
|
|
809
|
+
background: rgba(239, 68, 68, 0.2);
|
|
810
|
+
color: #ef4444;
|
|
811
|
+
}
|
|
812
|
+
.live-eval-idle {
|
|
813
|
+
text-align: center;
|
|
814
|
+
padding: 40px 20px;
|
|
815
|
+
color: var(--text-muted);
|
|
816
|
+
font-size: 0.9rem;
|
|
817
|
+
}
|
|
818
|
+
.live-eval-steps-container {
|
|
819
|
+
max-height: 400px;
|
|
820
|
+
overflow-y: auto;
|
|
821
|
+
}
|
|
822
|
+
/* SSE Connection Status Indicator */
|
|
823
|
+
.sse-connection-status {
|
|
824
|
+
display: inline-flex;
|
|
825
|
+
align-items: center;
|
|
826
|
+
gap: 6px;
|
|
827
|
+
padding: 4px 10px;
|
|
828
|
+
border-radius: 12px;
|
|
829
|
+
font-size: 0.7rem;
|
|
830
|
+
font-weight: 600;
|
|
831
|
+
margin-left: 12px;
|
|
832
|
+
}
|
|
833
|
+
.sse-connection-status.connected {
|
|
834
|
+
background: rgba(16, 185, 129, 0.2);
|
|
835
|
+
color: #10b981;
|
|
836
|
+
}
|
|
837
|
+
.sse-connection-status.connecting {
|
|
838
|
+
background: rgba(245, 158, 11, 0.2);
|
|
839
|
+
color: #f59e0b;
|
|
840
|
+
}
|
|
841
|
+
.sse-connection-status.disconnected {
|
|
842
|
+
background: rgba(239, 68, 68, 0.2);
|
|
843
|
+
color: #ef4444;
|
|
844
|
+
}
|
|
845
|
+
.sse-connection-status.fallback {
|
|
846
|
+
background: rgba(156, 163, 175, 0.2);
|
|
847
|
+
color: #9ca3af;
|
|
848
|
+
}
|
|
849
|
+
.sse-connection-dot {
|
|
850
|
+
width: 6px;
|
|
851
|
+
height: 6px;
|
|
852
|
+
border-radius: 50%;
|
|
853
|
+
background: currentColor;
|
|
854
|
+
}
|
|
855
|
+
.sse-connection-status.connecting .sse-connection-dot {
|
|
856
|
+
animation: pulse 1.5s ease-in-out infinite;
|
|
857
|
+
}
|
|
858
|
+
@keyframes pulse {
|
|
859
|
+
0%, 100% { opacity: 1; }
|
|
860
|
+
50% { opacity: 0.3; }
|
|
861
|
+
}
|
|
862
|
+
'''
|
|
863
|
+
|
|
864
|
+
|
|
865
|
+
def _get_live_evaluation_panel_html() -> str:
|
|
866
|
+
"""Return HTML for live evaluation panel with SSE and polling fallback."""
|
|
867
|
+
return '''
|
|
868
|
+
<div class="live-eval-panel" id="live-eval-panel">
|
|
869
|
+
<div class="live-eval-header">
|
|
870
|
+
<div class="live-eval-title">
|
|
871
|
+
<svg viewBox="0 0 24 24" fill="currentColor">
|
|
872
|
+
<path d="M9 5H7a2 2 0 00-2 2v12a2 2 0 002 2h10a2 2 0 002-2V7a2 2 0 00-2-2h-2M9 5a2 2 0 002 2h2a2 2 0 002-2M9 5a2 2 0 012-2h2a2 2 0 012 2m-6 9l2 2 4-4"/>
|
|
873
|
+
</svg>
|
|
874
|
+
Live Evaluation
|
|
875
|
+
<span class="sse-connection-status connecting" id="sse-status">
|
|
876
|
+
<span class="sse-connection-dot"></span>
|
|
877
|
+
<span id="sse-status-text">Connecting</span>
|
|
878
|
+
</span>
|
|
879
|
+
</div>
|
|
880
|
+
<div style="display: flex; align-items: center; gap: 12px;">
|
|
881
|
+
<span class="live-eval-refresh" id="live-eval-refresh-time">Checking...</span>
|
|
882
|
+
<button class="refresh-btn" onclick="if(window.sseManager) { window.sseManager.disconnect(); window.sseManager.connect(); }" title="Reconnect to live updates" style="background: rgba(245, 158, 11, 0.2); border-color: rgba(245, 158, 11, 0.4);">
|
|
883
|
+
<span class="refresh-icon">↻</span>
|
|
884
|
+
<span class="spinner" style="border-top-color: #f59e0b;"></span>
|
|
885
|
+
Reconnect
|
|
886
|
+
</button>
|
|
887
|
+
</div>
|
|
888
|
+
</div>
|
|
889
|
+
<div id="live-eval-content">
|
|
890
|
+
<div class="live-eval-idle">
|
|
891
|
+
<div style="font-size: 2rem; margin-bottom: 12px; opacity: 0.5;">⚡</div>
|
|
892
|
+
No evaluation running
|
|
893
|
+
<div style="font-size: 0.8rem; color: var(--text-muted); margin-top: 8px;">
|
|
894
|
+
Start an evaluation to see real-time progress
|
|
895
|
+
</div>
|
|
896
|
+
</div>
|
|
897
|
+
</div>
|
|
898
|
+
</div>
|
|
899
|
+
|
|
900
|
+
<script>
|
|
901
|
+
// SSE Manager for real-time benchmark updates
|
|
902
|
+
class BenchmarkSSEManager {
|
|
903
|
+
constructor() {
|
|
904
|
+
this.eventSource = null;
|
|
905
|
+
this.pollingInterval = null;
|
|
906
|
+
this.staleCheckInterval = null; // Track stale connection check interval
|
|
907
|
+
this.usePolling = false;
|
|
908
|
+
this.reconnectAttempts = 0;
|
|
909
|
+
this.maxReconnectAttempts = 5;
|
|
910
|
+
this.reconnectDelay = 2000;
|
|
911
|
+
this.lastHeartbeat = Date.now();
|
|
912
|
+
this.state = {
|
|
913
|
+
status: 'idle',
|
|
914
|
+
tasks_completed: 0,
|
|
915
|
+
total_tasks: 0,
|
|
916
|
+
current_task: null,
|
|
917
|
+
results: []
|
|
918
|
+
};
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
// Clear all intervals to prevent memory leaks
|
|
922
|
+
clearAllIntervals() {
|
|
923
|
+
if (this.pollingInterval) {
|
|
924
|
+
clearInterval(this.pollingInterval);
|
|
925
|
+
this.pollingInterval = null;
|
|
926
|
+
}
|
|
927
|
+
if (this.staleCheckInterval) {
|
|
928
|
+
clearInterval(this.staleCheckInterval);
|
|
929
|
+
this.staleCheckInterval = null;
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
connect() {
|
|
934
|
+
// Check if EventSource is supported
|
|
935
|
+
if (!window.EventSource) {
|
|
936
|
+
console.log('SSE not supported, falling back to polling');
|
|
937
|
+
this.startPolling();
|
|
938
|
+
return;
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
// Clear any existing intervals before reconnecting
|
|
942
|
+
this.clearAllIntervals();
|
|
943
|
+
|
|
944
|
+
this.updateConnectionStatus('connecting');
|
|
945
|
+
|
|
946
|
+
try {
|
|
947
|
+
this.eventSource = new EventSource('/api/benchmark-sse?interval=2');
|
|
948
|
+
|
|
949
|
+
this.eventSource.addEventListener('connected', (e) => {
|
|
950
|
+
console.log('SSE connected:', e.data);
|
|
951
|
+
this.reconnectAttempts = 0;
|
|
952
|
+
this.updateConnectionStatus('connected');
|
|
953
|
+
});
|
|
954
|
+
|
|
955
|
+
this.eventSource.addEventListener('status', (e) => {
|
|
956
|
+
const data = JSON.parse(e.data);
|
|
957
|
+
this.handleStatusEvent(data);
|
|
958
|
+
this.updateTimestamp();
|
|
959
|
+
});
|
|
960
|
+
|
|
961
|
+
this.eventSource.addEventListener('progress', (e) => {
|
|
962
|
+
const data = JSON.parse(e.data);
|
|
963
|
+
this.handleProgressEvent(data);
|
|
964
|
+
this.updateTimestamp();
|
|
965
|
+
});
|
|
966
|
+
|
|
967
|
+
this.eventSource.addEventListener('task_complete', (e) => {
|
|
968
|
+
const data = JSON.parse(e.data);
|
|
969
|
+
this.handleTaskCompleteEvent(data);
|
|
970
|
+
this.updateTimestamp();
|
|
971
|
+
});
|
|
972
|
+
|
|
973
|
+
this.eventSource.addEventListener('heartbeat', (e) => {
|
|
974
|
+
this.lastHeartbeat = Date.now();
|
|
975
|
+
// Heartbeats keep connection alive, no UI update needed
|
|
976
|
+
});
|
|
977
|
+
|
|
978
|
+
this.eventSource.addEventListener('error', (e) => {
|
|
979
|
+
const data = JSON.parse(e.data);
|
|
980
|
+
console.error('SSE error event:', data);
|
|
981
|
+
});
|
|
982
|
+
|
|
983
|
+
this.eventSource.onerror = (e) => {
|
|
984
|
+
console.error('SSE connection error:', e);
|
|
985
|
+
this.handleConnectionError();
|
|
986
|
+
};
|
|
987
|
+
|
|
988
|
+
// Check for stale connection (no heartbeat in 60 seconds)
|
|
989
|
+
// Store interval ID to clear on reconnect
|
|
990
|
+
this.staleCheckInterval = setInterval(() => {
|
|
991
|
+
if (this.eventSource && (Date.now() - this.lastHeartbeat > 60000)) {
|
|
992
|
+
console.log('SSE connection stale, reconnecting...');
|
|
993
|
+
this.reconnect();
|
|
994
|
+
}
|
|
995
|
+
}, 30000);
|
|
996
|
+
|
|
997
|
+
} catch (e) {
|
|
998
|
+
console.error('SSE connection failed:', e);
|
|
999
|
+
this.startPolling();
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
|
|
1003
|
+
handleStatusEvent(data) {
|
|
1004
|
+
console.log('[SSE Debug] handleStatusEvent:', JSON.stringify(data));
|
|
1005
|
+
// Clear previous vmStatus to prevent stale state accumulation
|
|
1006
|
+
this.state.vmStatus = data;
|
|
1007
|
+
if (data.waa_ready) {
|
|
1008
|
+
this.state.status = 'ready';
|
|
1009
|
+
}
|
|
1010
|
+
console.log('[SSE Debug] Updated state after status event:', JSON.stringify(this.state));
|
|
1011
|
+
this.render();
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
handleProgressEvent(data) {
|
|
1015
|
+
console.log('[SSE Debug] handleProgressEvent:', JSON.stringify(data));
|
|
1016
|
+
this.state.status = 'running';
|
|
1017
|
+
this.state.tasks_completed = data.tasks_completed;
|
|
1018
|
+
this.state.total_tasks = data.total_tasks;
|
|
1019
|
+
this.state.current_task = {
|
|
1020
|
+
task_id: data.current_task,
|
|
1021
|
+
instruction: `Task ${data.current_task}`,
|
|
1022
|
+
domain: 'waa'
|
|
1023
|
+
};
|
|
1024
|
+
console.log('[SSE Debug] Updated state after progress event:', JSON.stringify(this.state));
|
|
1025
|
+
this.render();
|
|
1026
|
+
}
|
|
1027
|
+
|
|
1028
|
+
handleTaskCompleteEvent(data) {
|
|
1029
|
+
this.state.results.push({
|
|
1030
|
+
task_id: data.task_id,
|
|
1031
|
+
success: data.success,
|
|
1032
|
+
score: data.score
|
|
1033
|
+
});
|
|
1034
|
+
this.render();
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
handleConnectionError() {
|
|
1038
|
+
this.updateConnectionStatus('disconnected');
|
|
1039
|
+
|
|
1040
|
+
if (this.reconnectAttempts < this.maxReconnectAttempts) {
|
|
1041
|
+
this.reconnectAttempts++;
|
|
1042
|
+
console.log(`SSE reconnect attempt ${this.reconnectAttempts}/${this.maxReconnectAttempts}`);
|
|
1043
|
+
setTimeout(() => this.reconnect(), this.reconnectDelay * this.reconnectAttempts);
|
|
1044
|
+
} else {
|
|
1045
|
+
console.log('Max SSE reconnect attempts reached, falling back to polling');
|
|
1046
|
+
this.startPolling();
|
|
1047
|
+
}
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
reconnect() {
|
|
1051
|
+
if (this.eventSource) {
|
|
1052
|
+
this.eventSource.close();
|
|
1053
|
+
this.eventSource = null;
|
|
1054
|
+
}
|
|
1055
|
+
this.connect();
|
|
1056
|
+
}
|
|
1057
|
+
|
|
1058
|
+
startPolling() {
|
|
1059
|
+
this.usePolling = true;
|
|
1060
|
+
this.updateConnectionStatus('fallback');
|
|
1061
|
+
|
|
1062
|
+
if (this.eventSource) {
|
|
1063
|
+
this.eventSource.close();
|
|
1064
|
+
this.eventSource = null;
|
|
1065
|
+
}
|
|
1066
|
+
|
|
1067
|
+
// Clear any existing intervals before starting new polling
|
|
1068
|
+
this.clearAllIntervals();
|
|
1069
|
+
|
|
1070
|
+
// Use existing polling function
|
|
1071
|
+
fetchLiveEvaluationPolling();
|
|
1072
|
+
this.pollingInterval = setInterval(fetchLiveEvaluationPolling, 2000);
|
|
1073
|
+
}
|
|
1074
|
+
|
|
1075
|
+
updateConnectionStatus(status) {
|
|
1076
|
+
const el = document.getElementById('sse-status');
|
|
1077
|
+
const textEl = document.getElementById('sse-status-text');
|
|
1078
|
+
if (!el || !textEl) return;
|
|
1079
|
+
|
|
1080
|
+
el.className = 'sse-connection-status ' + status;
|
|
1081
|
+
const statusText = {
|
|
1082
|
+
'connected': 'Live',
|
|
1083
|
+
'connecting': 'Connecting',
|
|
1084
|
+
'disconnected': 'Disconnected',
|
|
1085
|
+
'fallback': 'Polling'
|
|
1086
|
+
};
|
|
1087
|
+
textEl.textContent = statusText[status] || status;
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
updateTimestamp() {
|
|
1091
|
+
const el = document.getElementById('live-eval-refresh-time');
|
|
1092
|
+
if (el) {
|
|
1093
|
+
el.textContent = 'Updated ' + new Date().toLocaleTimeString();
|
|
1094
|
+
}
|
|
1095
|
+
}
|
|
1096
|
+
|
|
1097
|
+
render() {
|
|
1098
|
+
renderLiveEvaluation(this.state);
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
disconnect() {
|
|
1102
|
+
if (this.eventSource) {
|
|
1103
|
+
this.eventSource.close();
|
|
1104
|
+
this.eventSource = null;
|
|
1105
|
+
}
|
|
1106
|
+
// Clear all intervals using centralized cleanup
|
|
1107
|
+
this.clearAllIntervals();
|
|
1108
|
+
}
|
|
1109
|
+
}
|
|
1110
|
+
|
|
1111
|
+
// Polling fallback function
|
|
1112
|
+
async function fetchLiveEvaluationPolling() {
|
|
1113
|
+
try {
|
|
1114
|
+
const response = await fetch('/api/benchmark-live?' + Date.now());
|
|
1115
|
+
if (response.ok) {
|
|
1116
|
+
const state = await response.json();
|
|
1117
|
+
console.log('[SSE Debug] Polling received state:', JSON.stringify(state));
|
|
1118
|
+
renderLiveEvaluation(state);
|
|
1119
|
+
document.getElementById('live-eval-refresh-time').textContent =
|
|
1120
|
+
'Updated ' + new Date().toLocaleTimeString();
|
|
1121
|
+
}
|
|
1122
|
+
} catch (e) {
|
|
1123
|
+
console.log('Live evaluation API unavailable:', e);
|
|
1124
|
+
document.getElementById('live-eval-content').innerHTML =
|
|
1125
|
+
'<div class="live-eval-idle">Live evaluation API not available</div>';
|
|
1126
|
+
}
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
function renderLiveEvaluation(state) {
|
|
1130
|
+
const container = document.getElementById('live-eval-content');
|
|
1131
|
+
|
|
1132
|
+
if (!state || state.status === 'idle' || !state.current_task) {
|
|
1133
|
+
container.innerHTML = '<div class="live-eval-idle">No evaluation running</div>';
|
|
1134
|
+
return;
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
const task = state.current_task;
|
|
1138
|
+
const progress = `${state.tasks_completed || 0}/${state.total_tasks || 0}`;
|
|
1139
|
+
|
|
1140
|
+
// Build status section
|
|
1141
|
+
let statusHtml = `
|
|
1142
|
+
<div class="live-eval-status">
|
|
1143
|
+
<div class="live-eval-progress">Evaluating task ${progress}: ${task.task_id}</div>
|
|
1144
|
+
<div class="live-eval-task-name">${task.instruction || 'No instruction'}</div>
|
|
1145
|
+
<div class="live-eval-task-name">Domain: ${task.domain || 'unknown'}</div>
|
|
1146
|
+
</div>
|
|
1147
|
+
`;
|
|
1148
|
+
|
|
1149
|
+
// Build steps section
|
|
1150
|
+
let stepsHtml = '';
|
|
1151
|
+
if (task.steps && task.steps.length > 0) {
|
|
1152
|
+
stepsHtml = '<div class="live-eval-steps-container">';
|
|
1153
|
+
|
|
1154
|
+
// Show last 5 steps
|
|
1155
|
+
const recentSteps = task.steps.slice(-5);
|
|
1156
|
+
recentSteps.forEach(step => {
|
|
1157
|
+
const actionText = formatAction(step.action);
|
|
1158
|
+
const screenshotHtml = step.screenshot_url
|
|
1159
|
+
? `<img src="${step.screenshot_url}" class="live-eval-screenshot" alt="Step ${step.step_idx}" />`
|
|
1160
|
+
: '';
|
|
1161
|
+
const reasoningHtml = step.reasoning
|
|
1162
|
+
? `<div class="live-eval-reasoning">"${step.reasoning}"</div>`
|
|
1163
|
+
: '';
|
|
1164
|
+
|
|
1165
|
+
stepsHtml += `
|
|
1166
|
+
<div class="live-eval-step">
|
|
1167
|
+
<div class="live-eval-step-header">
|
|
1168
|
+
<div class="live-eval-step-number">Step ${step.step_idx}</div>
|
|
1169
|
+
<div class="live-eval-action">${actionText}</div>
|
|
1170
|
+
</div>
|
|
1171
|
+
${screenshotHtml}
|
|
1172
|
+
${reasoningHtml}
|
|
1173
|
+
</div>
|
|
1174
|
+
`;
|
|
1175
|
+
});
|
|
1176
|
+
|
|
1177
|
+
stepsHtml += '</div>';
|
|
1178
|
+
}
|
|
1179
|
+
|
|
1180
|
+
// Show result if task completed
|
|
1181
|
+
let resultHtml = '';
|
|
1182
|
+
if (task.result) {
|
|
1183
|
+
const resultClass = task.result.success ? 'success' : 'failure';
|
|
1184
|
+
const resultIcon = task.result.success ? '✓' : '✗';
|
|
1185
|
+
resultHtml = `
|
|
1186
|
+
<div class="live-eval-status">
|
|
1187
|
+
<div class="live-eval-result ${resultClass}">
|
|
1188
|
+
${resultIcon} ${task.result.success ? 'Success' : 'Failure'}
|
|
1189
|
+
(${task.result.num_steps} steps in ${task.result.total_time_seconds.toFixed(2)}s)
|
|
1190
|
+
</div>
|
|
1191
|
+
</div>
|
|
1192
|
+
`;
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
// Show recent results summary
|
|
1196
|
+
if (state.results && state.results.length > 0) {
|
|
1197
|
+
const successCount = state.results.filter(r => r.success).length;
|
|
1198
|
+
resultHtml += `
|
|
1199
|
+
<div class="live-eval-status" style="margin-top: 8px;">
|
|
1200
|
+
<small>Results: ${successCount}/${state.results.length} passed</small>
|
|
1201
|
+
</div>
|
|
1202
|
+
`;
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
container.innerHTML = statusHtml + stepsHtml + resultHtml;
|
|
1206
|
+
}
|
|
1207
|
+
|
|
1208
|
+
function formatAction(action) {
|
|
1209
|
+
if (!action) return 'No action';
|
|
1210
|
+
|
|
1211
|
+
const type = action.type || 'unknown';
|
|
1212
|
+
const parts = [type.toUpperCase()];
|
|
1213
|
+
|
|
1214
|
+
if (action.x !== null && action.y !== null) {
|
|
1215
|
+
parts.push(`(x=${action.x.toFixed(3)}, y=${action.y.toFixed(3)})`);
|
|
1216
|
+
} else if (action.target_node_id) {
|
|
1217
|
+
parts.push(`[${action.target_node_id}]`);
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
if (action.text) {
|
|
1221
|
+
parts.push(`"${action.text}"`);
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
if (action.key) {
|
|
1225
|
+
parts.push(`key=${action.key}`);
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
return parts.join(' ');
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
// Initialize SSE manager and store on window for reconnect button
|
|
1232
|
+
window.sseManager = new BenchmarkSSEManager();
|
|
1233
|
+
window.sseManager.connect();
|
|
1234
|
+
|
|
1235
|
+
// Cleanup on page unload
|
|
1236
|
+
window.addEventListener('beforeunload', () => {
|
|
1237
|
+
if (window.sseManager) window.sseManager.disconnect();
|
|
1238
|
+
});
|
|
1239
|
+
</script>
|
|
1240
|
+
'''
|
|
1241
|
+
|
|
1242
|
+
|
|
1243
|
+
def _get_azure_jobs_panel_css() -> str:
|
|
1244
|
+
"""Return CSS for the Azure jobs status panel with color-coded status indicators."""
|
|
1245
|
+
return '''
|
|
1246
|
+
.azure-jobs-panel {
|
|
1247
|
+
background: linear-gradient(135deg, rgba(0, 120, 212, 0.15) 0%, rgba(0, 120, 212, 0.05) 100%);
|
|
1248
|
+
border: 1px solid rgba(0, 120, 212, 0.3);
|
|
1249
|
+
border-radius: 12px;
|
|
1250
|
+
margin-bottom: 24px;
|
|
1251
|
+
overflow: hidden;
|
|
1252
|
+
}
|
|
1253
|
+
.azure-jobs-panel.collapsed .azure-jobs-body {
|
|
1254
|
+
display: none;
|
|
1255
|
+
}
|
|
1256
|
+
.azure-jobs-panel.collapsed .azure-jobs-header {
|
|
1257
|
+
margin-bottom: 0;
|
|
1258
|
+
}
|
|
1259
|
+
.azure-jobs-header {
|
|
1260
|
+
display: flex;
|
|
1261
|
+
align-items: center;
|
|
1262
|
+
justify-content: space-between;
|
|
1263
|
+
padding: 16px 24px;
|
|
1264
|
+
cursor: pointer;
|
|
1265
|
+
transition: background 0.2s;
|
|
1266
|
+
}
|
|
1267
|
+
.azure-jobs-header:hover {
|
|
1268
|
+
background: rgba(0, 120, 212, 0.1);
|
|
1269
|
+
}
|
|
1270
|
+
.azure-jobs-body {
|
|
1271
|
+
padding: 0 24px 20px 24px;
|
|
1272
|
+
}
|
|
1273
|
+
.azure-jobs-title {
|
|
1274
|
+
display: flex;
|
|
1275
|
+
align-items: center;
|
|
1276
|
+
gap: 10px;
|
|
1277
|
+
font-size: 1rem;
|
|
1278
|
+
font-weight: 600;
|
|
1279
|
+
color: #0078d4;
|
|
1280
|
+
}
|
|
1281
|
+
.azure-jobs-title svg {
|
|
1282
|
+
width: 20px;
|
|
1283
|
+
height: 20px;
|
|
1284
|
+
}
|
|
1285
|
+
.azure-jobs-expand-icon {
|
|
1286
|
+
font-size: 0.75rem;
|
|
1287
|
+
transition: transform 0.2s;
|
|
1288
|
+
margin-left: 8px;
|
|
1289
|
+
color: var(--text-muted);
|
|
1290
|
+
}
|
|
1291
|
+
.azure-jobs-panel:not(.collapsed) .azure-jobs-expand-icon {
|
|
1292
|
+
transform: rotate(90deg);
|
|
1293
|
+
}
|
|
1294
|
+
.azure-jobs-tooltip {
|
|
1295
|
+
font-size: 0.7rem;
|
|
1296
|
+
color: var(--text-muted);
|
|
1297
|
+
font-weight: 400;
|
|
1298
|
+
margin-left: 8px;
|
|
1299
|
+
}
|
|
1300
|
+
.azure-jobs-controls {
|
|
1301
|
+
display: flex;
|
|
1302
|
+
align-items: center;
|
|
1303
|
+
gap: 12px;
|
|
1304
|
+
}
|
|
1305
|
+
.azure-jobs-refresh {
|
|
1306
|
+
font-size: 0.75rem;
|
|
1307
|
+
color: var(--text-muted);
|
|
1308
|
+
transition: color 0.2s;
|
|
1309
|
+
}
|
|
1310
|
+
.azure-jobs-refresh.error {
|
|
1311
|
+
color: #ef4444;
|
|
1312
|
+
}
|
|
1313
|
+
.azure-jobs-refresh.success {
|
|
1314
|
+
color: #10b981;
|
|
1315
|
+
}
|
|
1316
|
+
/* API Error Banner */
|
|
1317
|
+
.api-error-banner {
|
|
1318
|
+
background: linear-gradient(135deg, rgba(239, 68, 68, 0.2) 0%, rgba(239, 68, 68, 0.1) 100%);
|
|
1319
|
+
border: 1px solid rgba(239, 68, 68, 0.4);
|
|
1320
|
+
border-radius: 8px;
|
|
1321
|
+
padding: 12px 16px;
|
|
1322
|
+
margin-bottom: 16px;
|
|
1323
|
+
display: none;
|
|
1324
|
+
align-items: center;
|
|
1325
|
+
gap: 12px;
|
|
1326
|
+
font-size: 0.85rem;
|
|
1327
|
+
color: #fca5a5;
|
|
1328
|
+
}
|
|
1329
|
+
.api-error-banner.show {
|
|
1330
|
+
display: flex;
|
|
1331
|
+
}
|
|
1332
|
+
.api-error-banner .error-icon {
|
|
1333
|
+
font-size: 1.2rem;
|
|
1334
|
+
flex-shrink: 0;
|
|
1335
|
+
}
|
|
1336
|
+
.api-error-banner .error-message {
|
|
1337
|
+
flex: 1;
|
|
1338
|
+
}
|
|
1339
|
+
.api-error-banner .retry-btn {
|
|
1340
|
+
padding: 4px 10px;
|
|
1341
|
+
background: rgba(239, 68, 68, 0.3);
|
|
1342
|
+
border: 1px solid rgba(239, 68, 68, 0.5);
|
|
1343
|
+
border-radius: 4px;
|
|
1344
|
+
color: #fca5a5;
|
|
1345
|
+
cursor: pointer;
|
|
1346
|
+
font-size: 0.75rem;
|
|
1347
|
+
transition: background 0.2s;
|
|
1348
|
+
}
|
|
1349
|
+
.api-error-banner .retry-btn:hover {
|
|
1350
|
+
background: rgba(239, 68, 68, 0.4);
|
|
1351
|
+
}
|
|
1352
|
+
/* Job items with color-coded borders */
|
|
1353
|
+
.azure-job-item {
|
|
1354
|
+
display: flex;
|
|
1355
|
+
align-items: center;
|
|
1356
|
+
gap: 16px;
|
|
1357
|
+
padding: 14px 18px;
|
|
1358
|
+
background: rgba(0, 0, 0, 0.3);
|
|
1359
|
+
border-radius: 8px;
|
|
1360
|
+
margin-bottom: 10px;
|
|
1361
|
+
border-left: 4px solid transparent;
|
|
1362
|
+
transition: all 0.2s ease;
|
|
1363
|
+
}
|
|
1364
|
+
.azure-job-item:last-child {
|
|
1365
|
+
margin-bottom: 0;
|
|
1366
|
+
}
|
|
1367
|
+
.azure-job-item:hover {
|
|
1368
|
+
background: rgba(0, 0, 0, 0.4);
|
|
1369
|
+
}
|
|
1370
|
+
/* Color-coded left border based on status - Running=Yellow, Completed=Green, Failed=Red */
|
|
1371
|
+
.azure-job-item.status-running {
|
|
1372
|
+
border-left-color: #f59e0b;
|
|
1373
|
+
background: linear-gradient(90deg, rgba(245, 158, 11, 0.1) 0%, rgba(0, 0, 0, 0.3) 20%);
|
|
1374
|
+
}
|
|
1375
|
+
.azure-job-item.status-completed {
|
|
1376
|
+
border-left-color: #10b981;
|
|
1377
|
+
background: linear-gradient(90deg, rgba(16, 185, 129, 0.1) 0%, rgba(0, 0, 0, 0.3) 20%);
|
|
1378
|
+
}
|
|
1379
|
+
.azure-job-item.status-failed,
|
|
1380
|
+
.azure-job-item.status-canceled {
|
|
1381
|
+
border-left-color: #ef4444;
|
|
1382
|
+
background: linear-gradient(90deg, rgba(239, 68, 68, 0.1) 0%, rgba(0, 0, 0, 0.3) 20%);
|
|
1383
|
+
}
|
|
1384
|
+
.azure-job-item.status-provisioning,
|
|
1385
|
+
.azure-job-item.status-preparing,
|
|
1386
|
+
.azure-job-item.status-queued,
|
|
1387
|
+
.azure-job-item.status-starting {
|
|
1388
|
+
border-left-color: #3b82f6;
|
|
1389
|
+
background: linear-gradient(90deg, rgba(59, 130, 246, 0.1) 0%, rgba(0, 0, 0, 0.3) 20%);
|
|
1390
|
+
}
|
|
1391
|
+
.azure-job-status {
|
|
1392
|
+
display: flex;
|
|
1393
|
+
align-items: center;
|
|
1394
|
+
gap: 8px;
|
|
1395
|
+
min-width: 130px;
|
|
1396
|
+
}
|
|
1397
|
+
.status-dot {
|
|
1398
|
+
width: 10px;
|
|
1399
|
+
height: 10px;
|
|
1400
|
+
border-radius: 50%;
|
|
1401
|
+
flex-shrink: 0;
|
|
1402
|
+
}
|
|
1403
|
+
.status-dot.provisioning,
|
|
1404
|
+
.status-dot.preparing,
|
|
1405
|
+
.status-dot.queued,
|
|
1406
|
+
.status-dot.starting {
|
|
1407
|
+
background: #3b82f6;
|
|
1408
|
+
animation: pulse-status 2s infinite;
|
|
1409
|
+
}
|
|
1410
|
+
.status-dot.running {
|
|
1411
|
+
background: #f59e0b;
|
|
1412
|
+
animation: pulse-status 1.5s infinite;
|
|
1413
|
+
}
|
|
1414
|
+
.status-dot.completed {
|
|
1415
|
+
background: #10b981;
|
|
1416
|
+
animation: none;
|
|
1417
|
+
}
|
|
1418
|
+
.status-dot.failed,
|
|
1419
|
+
.status-dot.canceled {
|
|
1420
|
+
background: #ef4444;
|
|
1421
|
+
animation: none;
|
|
1422
|
+
}
|
|
1423
|
+
.status-dot.unknown {
|
|
1424
|
+
background: #6b7280;
|
|
1425
|
+
animation: none;
|
|
1426
|
+
}
|
|
1427
|
+
@keyframes pulse-status {
|
|
1428
|
+
0%, 100% { opacity: 1; transform: scale(1); box-shadow: 0 0 0 0 currentColor; }
|
|
1429
|
+
50% { opacity: 0.6; transform: scale(0.9); }
|
|
1430
|
+
}
|
|
1431
|
+
.status-text {
|
|
1432
|
+
font-weight: 600;
|
|
1433
|
+
font-size: 0.8rem;
|
|
1434
|
+
text-transform: uppercase;
|
|
1435
|
+
letter-spacing: 0.5px;
|
|
1436
|
+
}
|
|
1437
|
+
.status-text.running { color: #f59e0b; }
|
|
1438
|
+
.status-text.completed { color: #10b981; }
|
|
1439
|
+
.status-text.failed, .status-text.canceled { color: #ef4444; }
|
|
1440
|
+
.status-text.provisioning, .status-text.preparing, .status-text.queued, .status-text.starting { color: #3b82f6; }
|
|
1441
|
+
.azure-job-info {
|
|
1442
|
+
flex: 1;
|
|
1443
|
+
min-width: 0;
|
|
1444
|
+
}
|
|
1445
|
+
.azure-job-id {
|
|
1446
|
+
font-family: "SF Mono", Monaco, monospace;
|
|
1447
|
+
font-size: 0.85rem;
|
|
1448
|
+
color: var(--text-primary);
|
|
1449
|
+
font-weight: 500;
|
|
1450
|
+
}
|
|
1451
|
+
.azure-job-meta {
|
|
1452
|
+
font-size: 0.75rem;
|
|
1453
|
+
color: var(--text-secondary);
|
|
1454
|
+
margin-top: 4px;
|
|
1455
|
+
display: flex;
|
|
1456
|
+
flex-wrap: wrap;
|
|
1457
|
+
gap: 8px;
|
|
1458
|
+
}
|
|
1459
|
+
.azure-job-meta-item {
|
|
1460
|
+
display: inline-flex;
|
|
1461
|
+
align-items: center;
|
|
1462
|
+
gap: 4px;
|
|
1463
|
+
}
|
|
1464
|
+
.azure-job-link {
|
|
1465
|
+
display: inline-flex;
|
|
1466
|
+
align-items: center;
|
|
1467
|
+
gap: 6px;
|
|
1468
|
+
padding: 8px 14px;
|
|
1469
|
+
background: #0078d4;
|
|
1470
|
+
color: white;
|
|
1471
|
+
border-radius: 6px;
|
|
1472
|
+
text-decoration: none;
|
|
1473
|
+
font-size: 0.8rem;
|
|
1474
|
+
font-weight: 500;
|
|
1475
|
+
transition: all 0.2s;
|
|
1476
|
+
}
|
|
1477
|
+
.azure-job-link:hover {
|
|
1478
|
+
background: #106ebe;
|
|
1479
|
+
transform: translateY(-1px);
|
|
1480
|
+
box-shadow: 0 4px 12px rgba(0, 120, 212, 0.3);
|
|
1481
|
+
}
|
|
1482
|
+
.no-jobs {
|
|
1483
|
+
text-align: center;
|
|
1484
|
+
padding: 30px 20px;
|
|
1485
|
+
color: var(--text-muted);
|
|
1486
|
+
font-size: 0.9rem;
|
|
1487
|
+
}
|
|
1488
|
+
.no-jobs code {
|
|
1489
|
+
display: block;
|
|
1490
|
+
margin-top: 12px;
|
|
1491
|
+
padding: 10px 14px;
|
|
1492
|
+
background: rgba(0, 0, 0, 0.4);
|
|
1493
|
+
border-radius: 6px;
|
|
1494
|
+
font-family: "SF Mono", Monaco, monospace;
|
|
1495
|
+
font-size: 0.8rem;
|
|
1496
|
+
color: var(--text-secondary);
|
|
1497
|
+
}
|
|
1498
|
+
/* Refresh button with loading spinner */
|
|
1499
|
+
.refresh-btn {
|
|
1500
|
+
background: rgba(0, 120, 212, 0.2);
|
|
1501
|
+
border: 1px solid rgba(0, 120, 212, 0.4);
|
|
1502
|
+
border-radius: 6px;
|
|
1503
|
+
color: var(--text-primary);
|
|
1504
|
+
cursor: pointer;
|
|
1505
|
+
padding: 6px 12px;
|
|
1506
|
+
font-size: 0.8rem;
|
|
1507
|
+
display: flex;
|
|
1508
|
+
align-items: center;
|
|
1509
|
+
gap: 6px;
|
|
1510
|
+
transition: all 0.2s;
|
|
1511
|
+
}
|
|
1512
|
+
.refresh-btn:hover:not(:disabled) {
|
|
1513
|
+
background: rgba(0, 120, 212, 0.3);
|
|
1514
|
+
transform: translateY(-1px);
|
|
1515
|
+
}
|
|
1516
|
+
.refresh-btn:disabled {
|
|
1517
|
+
opacity: 0.6;
|
|
1518
|
+
cursor: not-allowed;
|
|
1519
|
+
}
|
|
1520
|
+
.refresh-btn .spinner {
|
|
1521
|
+
display: none;
|
|
1522
|
+
width: 14px;
|
|
1523
|
+
height: 14px;
|
|
1524
|
+
border: 2px solid rgba(255,255,255,0.3);
|
|
1525
|
+
border-top-color: #0078d4;
|
|
1526
|
+
border-radius: 50%;
|
|
1527
|
+
animation: spin 0.8s linear infinite;
|
|
1528
|
+
}
|
|
1529
|
+
.refresh-btn.loading .spinner {
|
|
1530
|
+
display: inline-block;
|
|
1531
|
+
}
|
|
1532
|
+
.refresh-btn.loading .refresh-icon {
|
|
1533
|
+
display: none;
|
|
1534
|
+
}
|
|
1535
|
+
@keyframes spin {
|
|
1536
|
+
to { transform: rotate(360deg); }
|
|
1537
|
+
}
|
|
1538
|
+
'''
|
|
1539
|
+
|
|
1540
|
+
|
|
1541
|
+
def _get_azure_jobs_panel_html() -> str:
|
|
1542
|
+
"""Return HTML for the Azure jobs status panel with JS polling, error handling, and loading states.
|
|
1543
|
+
|
|
1544
|
+
NOTE: This panel is now used in the Training tab (not Benchmarks) because Azure ML
|
|
1545
|
+
is used for training jobs, not for WAA benchmarks (which require nested virtualization
|
|
1546
|
+
that managed compute doesn't support).
|
|
1547
|
+
"""
|
|
1548
|
+
return '''
|
|
1549
|
+
<div class="azure-jobs-panel collapsed" id="azure-jobs-panel">
|
|
1550
|
+
<div class="azure-jobs-header" onclick="toggleAzureJobsPanel()" title="Azure ML training jobs">
|
|
1551
|
+
<div class="azure-jobs-title">
|
|
1552
|
+
<svg viewBox="0 0 24 24" fill="currentColor">
|
|
1553
|
+
<path d="M12 2C6.48 2 2 6.48 2 12s4.48 10 10 10 10-4.48 10-10S17.52 2 12 2zm-1 17.93c-3.95-.49-7-3.85-7-7.93 0-.62.08-1.21.21-1.79L9 15v1c0 1.1.9 2 2 2v1.93zm6.9-2.54c-.26-.81-1-1.39-1.9-1.39h-1v-3c0-.55-.45-1-1-1H8v-2h2c.55 0 1-.45 1-1V7h2c1.1 0 2-.9 2-2v-.41c2.93 1.19 5 4.06 5 7.41 0 2.08-.8 3.97-2.1 5.39z"/>
|
|
1554
|
+
</svg>
|
|
1555
|
+
Azure ML Jobs
|
|
1556
|
+
<span class="azure-jobs-expand-icon">▶</span>
|
|
1557
|
+
</div>
|
|
1558
|
+
<div class="azure-jobs-controls" onclick="event.stopPropagation()">
|
|
1559
|
+
<span class="azure-jobs-refresh" id="jobs-refresh-time">Checking...</span>
|
|
1560
|
+
<button id="azure-jobs-refresh-btn" class="refresh-btn" onclick="refreshAzureJobs()" title="Refresh job status from Azure">
|
|
1561
|
+
<span class="refresh-icon">↻</span>
|
|
1562
|
+
<span class="spinner"></span>
|
|
1563
|
+
Refresh
|
|
1564
|
+
</button>
|
|
1565
|
+
</div>
|
|
1566
|
+
</div>
|
|
1567
|
+
|
|
1568
|
+
<div class="azure-jobs-body">
|
|
1569
|
+
<!-- API Error Banner (hidden by default) -->
|
|
1570
|
+
<div class="api-error-banner" id="azure-jobs-error">
|
|
1571
|
+
<span class="error-icon">!</span>
|
|
1572
|
+
<span class="error-message" id="azure-jobs-error-msg">Failed to fetch Azure jobs</span>
|
|
1573
|
+
<button class="retry-btn" onclick="refreshAzureJobs()">Retry</button>
|
|
1574
|
+
</div>
|
|
1575
|
+
|
|
1576
|
+
<!-- Loading state -->
|
|
1577
|
+
<div id="azure-jobs-loading" style="display: none; text-align: center; padding: 30px;">
|
|
1578
|
+
<div style="display: inline-block; width: 24px; height: 24px; border: 3px solid rgba(0,120,212,0.3); border-top-color: #0078d4; border-radius: 50%; animation: spin 1s linear infinite;"></div>
|
|
1579
|
+
<div style="margin-top: 12px; color: var(--text-muted); font-size: 0.85rem;">Loading Azure jobs...</div>
|
|
1580
|
+
</div>
|
|
1581
|
+
|
|
1582
|
+
<div id="azure-jobs-list">
|
|
1583
|
+
<div class="no-jobs">
|
|
1584
|
+
<div style="font-size: 2rem; margin-bottom: 12px; opacity: 0.5;">☁</div>
|
|
1585
|
+
Checking Azure ML for jobs...
|
|
1586
|
+
</div>
|
|
1587
|
+
</div>
|
|
1588
|
+
|
|
1589
|
+
<button id="toggle-logs-btn" onclick="toggleLogs()" style="
|
|
1590
|
+
margin-top: 12px;
|
|
1591
|
+
padding: 8px 14px;
|
|
1592
|
+
background: rgba(0, 120, 212, 0.2);
|
|
1593
|
+
border: 1px solid rgba(0, 120, 212, 0.4);
|
|
1594
|
+
border-radius: 6px;
|
|
1595
|
+
color: var(--text-primary);
|
|
1596
|
+
cursor: pointer;
|
|
1597
|
+
font-size: 0.8rem;
|
|
1598
|
+
display: flex;
|
|
1599
|
+
align-items: center;
|
|
1600
|
+
gap: 6px;
|
|
1601
|
+
transition: all 0.2s;
|
|
1602
|
+
">
|
|
1603
|
+
<span id="logs-icon">▼</span>
|
|
1604
|
+
<span id="logs-btn-text">Show Logs</span>
|
|
1605
|
+
</button>
|
|
1606
|
+
<div id="job-logs-panel" style="display: none; margin-top: 12px;">
|
|
1607
|
+
<div id="log-job-status" style="font-size: 0.75rem; color: var(--text-muted); margin-bottom: 6px;"></div>
|
|
1608
|
+
<pre id="job-logs-content" style="
|
|
1609
|
+
background: #1a1a1a;
|
|
1610
|
+
color: #10b981;
|
|
1611
|
+
padding: 14px;
|
|
1612
|
+
border-radius: 6px;
|
|
1613
|
+
font-size: 0.75rem;
|
|
1614
|
+
max-height: 300px;
|
|
1615
|
+
overflow-y: auto;
|
|
1616
|
+
white-space: pre-wrap;
|
|
1617
|
+
word-wrap: break-word;
|
|
1618
|
+
font-family: 'SF Mono', Monaco, monospace;
|
|
1619
|
+
border: 1px solid rgba(255,255,255,0.1);
|
|
1620
|
+
">Loading logs...</pre>
|
|
1621
|
+
</div>
|
|
1622
|
+
</div>
|
|
1623
|
+
</div>
|
|
1624
|
+
|
|
1625
|
+
<script>
|
|
1626
|
+
// Track refresh state
|
|
1627
|
+
let isAzureJobsRefreshing = false;
|
|
1628
|
+
let azureJobsErrorCount = 0;
|
|
1629
|
+
let azureJobsPanelUserToggled = false; // Track if user manually toggled panel
|
|
1630
|
+
|
|
1631
|
+
// Toggle Azure jobs panel expand/collapse
|
|
1632
|
+
function toggleAzureJobsPanel() {
|
|
1633
|
+
const panel = document.getElementById('azure-jobs-panel');
|
|
1634
|
+
if (panel) {
|
|
1635
|
+
panel.classList.toggle('collapsed');
|
|
1636
|
+
azureJobsPanelUserToggled = true; // User manually toggled, respect their choice
|
|
1637
|
+
}
|
|
1638
|
+
}
|
|
1639
|
+
|
|
1640
|
+
// Check if panel should auto-expand based on jobs (only for running jobs)
|
|
1641
|
+
// NOTE: Panel is collapsed by default and only auto-expands if there are running jobs
|
|
1642
|
+
function shouldAutoExpandAzurePanel(jobs) {
|
|
1643
|
+
if (!jobs || jobs.length === 0) return false;
|
|
1644
|
+
|
|
1645
|
+
for (const job of jobs) {
|
|
1646
|
+
const status = (job.status || '').toLowerCase();
|
|
1647
|
+
// Auto-expand only for running/active jobs
|
|
1648
|
+
if (['running', 'provisioning', 'preparing', 'queued', 'starting'].includes(status)) {
|
|
1649
|
+
return true;
|
|
1650
|
+
}
|
|
1651
|
+
}
|
|
1652
|
+
return false;
|
|
1653
|
+
}
|
|
1654
|
+
|
|
1655
|
+
// Auto-expand panel if there are running/recent jobs (only if user hasn't manually toggled)
|
|
1656
|
+
function maybeAutoExpandAzurePanel(jobs) {
|
|
1657
|
+
if (azureJobsPanelUserToggled) return; // Respect user's manual choice
|
|
1658
|
+
|
|
1659
|
+
const panel = document.getElementById('azure-jobs-panel');
|
|
1660
|
+
if (!panel) return;
|
|
1661
|
+
|
|
1662
|
+
if (shouldAutoExpandAzurePanel(jobs)) {
|
|
1663
|
+
panel.classList.remove('collapsed');
|
|
1664
|
+
}
|
|
1665
|
+
}
|
|
1666
|
+
|
|
1667
|
+
// Show/hide loading state and error banner
|
|
1668
|
+
function setAzureJobsState(state, errorMsg = '') {
|
|
1669
|
+
const loadingEl = document.getElementById('azure-jobs-loading');
|
|
1670
|
+
const listEl = document.getElementById('azure-jobs-list');
|
|
1671
|
+
const errorEl = document.getElementById('azure-jobs-error');
|
|
1672
|
+
const errorMsgEl = document.getElementById('azure-jobs-error-msg');
|
|
1673
|
+
const refreshTimeEl = document.getElementById('jobs-refresh-time');
|
|
1674
|
+
const refreshBtn = document.getElementById('azure-jobs-refresh-btn');
|
|
1675
|
+
|
|
1676
|
+
// Reset states
|
|
1677
|
+
loadingEl.style.display = 'none';
|
|
1678
|
+
errorEl.classList.remove('show');
|
|
1679
|
+
|
|
1680
|
+
if (state === 'loading') {
|
|
1681
|
+
loadingEl.style.display = 'block';
|
|
1682
|
+
listEl.style.display = 'none';
|
|
1683
|
+
refreshBtn.classList.add('loading');
|
|
1684
|
+
refreshBtn.disabled = true;
|
|
1685
|
+
} else if (state === 'error') {
|
|
1686
|
+
listEl.style.display = 'block';
|
|
1687
|
+
errorEl.classList.add('show');
|
|
1688
|
+
errorMsgEl.textContent = errorMsg || 'Failed to fetch Azure jobs. Check Azure CLI login.';
|
|
1689
|
+
refreshTimeEl.textContent = 'Error';
|
|
1690
|
+
refreshTimeEl.classList.add('error');
|
|
1691
|
+
refreshTimeEl.classList.remove('success');
|
|
1692
|
+
refreshBtn.classList.remove('loading');
|
|
1693
|
+
refreshBtn.disabled = false;
|
|
1694
|
+
} else if (state === 'success') {
|
|
1695
|
+
listEl.style.display = 'block';
|
|
1696
|
+
refreshTimeEl.classList.remove('error');
|
|
1697
|
+
refreshTimeEl.classList.add('success');
|
|
1698
|
+
refreshBtn.classList.remove('loading');
|
|
1699
|
+
refreshBtn.disabled = false;
|
|
1700
|
+
azureJobsErrorCount = 0; // Reset error count on success
|
|
1701
|
+
} else {
|
|
1702
|
+
listEl.style.display = 'block';
|
|
1703
|
+
refreshBtn.classList.remove('loading');
|
|
1704
|
+
refreshBtn.disabled = false;
|
|
1705
|
+
}
|
|
1706
|
+
}
|
|
1707
|
+
|
|
1708
|
+
// Force refresh from Azure (bypasses cache)
|
|
1709
|
+
async function refreshAzureJobs() {
|
|
1710
|
+
if (isAzureJobsRefreshing) return;
|
|
1711
|
+
isAzureJobsRefreshing = true;
|
|
1712
|
+
setAzureJobsState('loading');
|
|
1713
|
+
document.getElementById('jobs-refresh-time').textContent = 'Refreshing...';
|
|
1714
|
+
|
|
1715
|
+
try {
|
|
1716
|
+
const response = await fetch('/api/azure-jobs?force=true&t=' + Date.now());
|
|
1717
|
+
if (!response.ok) {
|
|
1718
|
+
throw new Error(`HTTP ${response.status}`);
|
|
1719
|
+
}
|
|
1720
|
+
const jobs = await response.json();
|
|
1721
|
+
if (jobs.error) {
|
|
1722
|
+
throw new Error(jobs.error);
|
|
1723
|
+
}
|
|
1724
|
+
renderAzureJobs(jobs, true);
|
|
1725
|
+
setAzureJobsState('success');
|
|
1726
|
+
document.getElementById('jobs-refresh-time').textContent =
|
|
1727
|
+
'Live from Azure - ' + new Date().toLocaleTimeString();
|
|
1728
|
+
} catch (e) {
|
|
1729
|
+
console.error('Azure jobs refresh failed:', e);
|
|
1730
|
+
azureJobsErrorCount++;
|
|
1731
|
+
setAzureJobsState('error', e.message || 'Connection failed');
|
|
1732
|
+
} finally {
|
|
1733
|
+
isAzureJobsRefreshing = false;
|
|
1734
|
+
}
|
|
1735
|
+
}
|
|
1736
|
+
|
|
1737
|
+
// Fetch Azure job status from API (normal polling)
|
|
1738
|
+
async function fetchAzureJobs() {
|
|
1739
|
+
if (isAzureJobsRefreshing) return;
|
|
1740
|
+
|
|
1741
|
+
// If we've had multiple errors, slow down polling
|
|
1742
|
+
if (azureJobsErrorCount >= 3) {
|
|
1743
|
+
document.getElementById('jobs-refresh-time').textContent =
|
|
1744
|
+
'Polling paused (too many errors). Click Refresh.';
|
|
1745
|
+
return;
|
|
1746
|
+
}
|
|
1747
|
+
|
|
1748
|
+
try {
|
|
1749
|
+
const response = await fetch('/api/azure-jobs?t=' + Date.now());
|
|
1750
|
+
if (!response.ok) {
|
|
1751
|
+
throw new Error(`HTTP ${response.status}`);
|
|
1752
|
+
}
|
|
1753
|
+
const jobs = await response.json();
|
|
1754
|
+
if (jobs.error) {
|
|
1755
|
+
throw new Error(jobs.error);
|
|
1756
|
+
}
|
|
1757
|
+
renderAzureJobs(jobs, true);
|
|
1758
|
+
setAzureJobsState('success');
|
|
1759
|
+
document.getElementById('jobs-refresh-time').textContent =
|
|
1760
|
+
'Live - ' + new Date().toLocaleTimeString();
|
|
1761
|
+
} catch (e) {
|
|
1762
|
+
console.log('Azure API error:', e);
|
|
1763
|
+
azureJobsErrorCount++;
|
|
1764
|
+
|
|
1765
|
+
// Try cached fallback
|
|
1766
|
+
try {
|
|
1767
|
+
const fallbackResponse = await fetch('benchmark_results/azure_jobs.json?t=' + Date.now());
|
|
1768
|
+
if (fallbackResponse.ok) {
|
|
1769
|
+
const jobs = await fallbackResponse.json();
|
|
1770
|
+
renderAzureJobs(jobs, false);
|
|
1771
|
+
document.getElementById('jobs-refresh-time').textContent =
|
|
1772
|
+
'Cached - ' + new Date().toLocaleTimeString();
|
|
1773
|
+
document.getElementById('jobs-refresh-time').classList.remove('error');
|
|
1774
|
+
return;
|
|
1775
|
+
}
|
|
1776
|
+
} catch (fallbackError) {
|
|
1777
|
+
// Fallback also failed
|
|
1778
|
+
}
|
|
1779
|
+
|
|
1780
|
+
// Show empty state with guidance
|
|
1781
|
+
document.getElementById('azure-jobs-list').innerHTML =
|
|
1782
|
+
'<div class="no-jobs">' +
|
|
1783
|
+
'<div style="font-size: 2rem; margin-bottom: 12px; opacity: 0.5;">☁</div>' +
|
|
1784
|
+
'No Azure jobs found<code>uv run python -m openadapt_ml.benchmarks.cli run-azure</code>' +
|
|
1785
|
+
'</div>';
|
|
1786
|
+
}
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1789
|
+
function renderAzureJobs(jobs, isLive) {
|
|
1790
|
+
// Auto-expand panel if there are running/recent jobs
|
|
1791
|
+
maybeAutoExpandAzurePanel(jobs);
|
|
1792
|
+
|
|
1793
|
+
if (!jobs || jobs.length === 0) {
|
|
1794
|
+
document.getElementById('azure-jobs-list').innerHTML =
|
|
1795
|
+
'<div class="no-jobs">' +
|
|
1796
|
+
'<div style="font-size: 2rem; margin-bottom: 12px; opacity: 0.5;">☁</div>' +
|
|
1797
|
+
'No Azure jobs found<code>uv run python -m openadapt_ml.benchmarks.cli run-azure</code>' +
|
|
1798
|
+
'</div>';
|
|
1799
|
+
return;
|
|
1800
|
+
}
|
|
1801
|
+
|
|
1802
|
+
const html = jobs.slice(0, 5).map(job => {
|
|
1803
|
+
const status = (job.status || 'unknown').toLowerCase();
|
|
1804
|
+
const statusClass = status;
|
|
1805
|
+
let statusText = job.status ? job.status.charAt(0).toUpperCase() + job.status.slice(1) : 'Unknown';
|
|
1806
|
+
|
|
1807
|
+
// Show display_name if available (live data), otherwise job_id
|
|
1808
|
+
const displayName = job.display_name || job.job_id;
|
|
1809
|
+
|
|
1810
|
+
// Calculate elapsed time for running jobs
|
|
1811
|
+
let elapsedMins = 0;
|
|
1812
|
+
let elapsedText = '';
|
|
1813
|
+
let isStuck = false;
|
|
1814
|
+
if (job.started_at) {
|
|
1815
|
+
const start = new Date(job.started_at);
|
|
1816
|
+
elapsedMins = (Date.now() - start.getTime()) / 60000;
|
|
1817
|
+
if (status === 'running') {
|
|
1818
|
+
elapsedText = elapsedMins < 60
|
|
1819
|
+
? Math.round(elapsedMins) + 'm'
|
|
1820
|
+
: Math.round(elapsedMins / 60) + 'h ' + Math.round(elapsedMins % 60) + 'm';
|
|
1821
|
+
// Warn if running > 30 mins
|
|
1822
|
+
if (elapsedMins > 30) {
|
|
1823
|
+
isStuck = true;
|
|
1824
|
+
}
|
|
1825
|
+
}
|
|
1826
|
+
}
|
|
1827
|
+
|
|
1828
|
+
// Build metadata items
|
|
1829
|
+
const metaItems = [];
|
|
1830
|
+
if (elapsedText && status === 'running') {
|
|
1831
|
+
metaItems.push('<span class="azure-job-meta-item">🕑 ' + elapsedText + '</span>');
|
|
1832
|
+
}
|
|
1833
|
+
if (!isLive && job.num_tasks) {
|
|
1834
|
+
metaItems.push('<span class="azure-job-meta-item">~' + job.num_tasks + ' tasks</span>');
|
|
1835
|
+
}
|
|
1836
|
+
if (job.results?.success_rate !== undefined) {
|
|
1837
|
+
metaItems.push('<span class="azure-job-meta-item">' + (job.results.success_rate * 100).toFixed(1) + '% success</span>');
|
|
1838
|
+
}
|
|
1839
|
+
if (job.started_at && status !== 'running') {
|
|
1840
|
+
const date = new Date(job.started_at);
|
|
1841
|
+
metaItems.push('<span class="azure-job-meta-item">' + date.toLocaleString() + '</span>');
|
|
1842
|
+
}
|
|
1843
|
+
const metaHtml = metaItems.join('');
|
|
1844
|
+
|
|
1845
|
+
// Add warning for stuck jobs
|
|
1846
|
+
const stuckWarning = isStuck
|
|
1847
|
+
? '<div style="color: #ff9800; font-size: 0.7rem; margin-top: 6px; display: flex; align-items: center; gap: 4px;"><span>⚠</span> Running > 30min. May be stuck. Consider canceling.</div>'
|
|
1848
|
+
: '';
|
|
1849
|
+
|
|
1850
|
+
return '<div class="azure-job-item status-' + statusClass + '">' +
|
|
1851
|
+
'<div class="azure-job-status">' +
|
|
1852
|
+
'<span class="status-dot ' + statusClass + '"></span>' +
|
|
1853
|
+
'<span class="status-text ' + statusClass + '">' + statusText + '</span>' +
|
|
1854
|
+
'</div>' +
|
|
1855
|
+
'<div class="azure-job-info">' +
|
|
1856
|
+
'<div class="azure-job-id">' + displayName + '</div>' +
|
|
1857
|
+
'<div class="azure-job-meta">' + metaHtml + '</div>' +
|
|
1858
|
+
stuckWarning +
|
|
1859
|
+
'</div>' +
|
|
1860
|
+
'<a href="' + (job.azure_dashboard_url || '#') + '" target="_blank" class="azure-job-link">' +
|
|
1861
|
+
'Open in Azure →' +
|
|
1862
|
+
'</a>' +
|
|
1863
|
+
'</div>';
|
|
1864
|
+
}).join('');
|
|
1865
|
+
|
|
1866
|
+
document.getElementById('azure-jobs-list').innerHTML = html;
|
|
1867
|
+
}
|
|
1868
|
+
|
|
1869
|
+
// Log viewer state
|
|
1870
|
+
let showLogs = false;
|
|
1871
|
+
let currentLogJobId = null;
|
|
1872
|
+
|
|
1873
|
+
async function fetchJobLogs() {
|
|
1874
|
+
if (!showLogs) return;
|
|
1875
|
+
|
|
1876
|
+
const logEl = document.getElementById('job-logs-content');
|
|
1877
|
+
const statusEl = document.getElementById('log-job-status');
|
|
1878
|
+
|
|
1879
|
+
try {
|
|
1880
|
+
const url = currentLogJobId
|
|
1881
|
+
? '/api/azure-job-logs?job_id=' + currentLogJobId
|
|
1882
|
+
: '/api/azure-job-logs';
|
|
1883
|
+
const response = await fetch(url + '&t=' + Date.now());
|
|
1884
|
+
if (response.ok) {
|
|
1885
|
+
const data = await response.json();
|
|
1886
|
+
if (logEl) {
|
|
1887
|
+
logEl.textContent = data.logs || 'No logs available';
|
|
1888
|
+
if (data.command) {
|
|
1889
|
+
logEl.textContent = 'Command: ' + data.command + '\\n\\n' + (data.logs || '');
|
|
1890
|
+
}
|
|
1891
|
+
// Color code based on status
|
|
1892
|
+
logEl.style.color = data.status === 'running' ? '#f59e0b' :
|
|
1893
|
+
data.status === 'completed' ? '#10b981' :
|
|
1894
|
+
data.status === 'failed' ? '#ef4444' : '#10b981';
|
|
1895
|
+
}
|
|
1896
|
+
if (statusEl && data.job_id) {
|
|
1897
|
+
statusEl.textContent = 'Job: ' + data.job_id + ' (' + data.status + ')';
|
|
1898
|
+
}
|
|
1899
|
+
} else {
|
|
1900
|
+
if (logEl) logEl.textContent = 'Failed to fetch logs (HTTP ' + response.status + ')';
|
|
1901
|
+
}
|
|
1902
|
+
} catch (e) {
|
|
1903
|
+
console.log('Error fetching logs:', e);
|
|
1904
|
+
if (logEl) logEl.textContent = 'Error fetching logs: ' + e.message;
|
|
1905
|
+
}
|
|
1906
|
+
}
|
|
1907
|
+
|
|
1908
|
+
function toggleLogs() {
|
|
1909
|
+
showLogs = !showLogs;
|
|
1910
|
+
const panel = document.getElementById('job-logs-panel');
|
|
1911
|
+
const icon = document.getElementById('logs-icon');
|
|
1912
|
+
const btnText = document.getElementById('logs-btn-text');
|
|
1913
|
+
|
|
1914
|
+
if (panel) {
|
|
1915
|
+
panel.style.display = showLogs ? 'block' : 'none';
|
|
1916
|
+
}
|
|
1917
|
+
if (icon) {
|
|
1918
|
+
icon.innerHTML = showLogs ? '▲' : '▼';
|
|
1919
|
+
}
|
|
1920
|
+
if (btnText) {
|
|
1921
|
+
btnText.textContent = showLogs ? 'Hide Logs' : 'Show Logs';
|
|
1922
|
+
}
|
|
1923
|
+
if (showLogs) fetchJobLogs();
|
|
1924
|
+
}
|
|
1925
|
+
|
|
1926
|
+
// Initial fetch and poll every 30 seconds (use Refresh button for immediate updates)
|
|
1927
|
+
fetchAzureJobs();
|
|
1928
|
+
setInterval(fetchAzureJobs, 30000);
|
|
1929
|
+
setInterval(fetchJobLogs, 5000); // Poll logs every 5 seconds
|
|
1930
|
+
</script>
|
|
1931
|
+
'''
|
|
1932
|
+
|
|
1933
|
+
|
|
1934
|
+
def _get_vm_discovery_panel_css() -> str:
|
|
1935
|
+
"""Return CSS for VM Discovery panel with prominent VNC button."""
|
|
1936
|
+
return '''
|
|
1937
|
+
.vm-discovery-panel {
|
|
1938
|
+
background: linear-gradient(135deg, rgba(16, 185, 129, 0.15) 0%, rgba(5, 150, 105, 0.05) 100%);
|
|
1939
|
+
border: 1px solid rgba(16, 185, 129, 0.3);
|
|
1940
|
+
border-radius: 12px;
|
|
1941
|
+
padding: 20px 24px;
|
|
1942
|
+
margin-bottom: 24px;
|
|
1943
|
+
}
|
|
1944
|
+
.vm-discovery-header {
|
|
1945
|
+
display: flex;
|
|
1946
|
+
align-items: center;
|
|
1947
|
+
justify-content: space-between;
|
|
1948
|
+
margin-bottom: 16px;
|
|
1949
|
+
}
|
|
1950
|
+
.vm-discovery-title {
|
|
1951
|
+
display: flex;
|
|
1952
|
+
align-items: center;
|
|
1953
|
+
gap: 10px;
|
|
1954
|
+
font-size: 1rem;
|
|
1955
|
+
font-weight: 600;
|
|
1956
|
+
color: #10b981;
|
|
1957
|
+
}
|
|
1958
|
+
.vm-discovery-title svg {
|
|
1959
|
+
width: 20px;
|
|
1960
|
+
height: 20px;
|
|
1961
|
+
}
|
|
1962
|
+
.vm-discovery-controls {
|
|
1963
|
+
display: flex;
|
|
1964
|
+
align-items: center;
|
|
1965
|
+
gap: 12px;
|
|
1966
|
+
}
|
|
1967
|
+
.vm-discovery-refresh {
|
|
1968
|
+
font-size: 0.75rem;
|
|
1969
|
+
color: var(--text-muted);
|
|
1970
|
+
}
|
|
1971
|
+
.vm-item {
|
|
1972
|
+
background: rgba(0, 0, 0, 0.3);
|
|
1973
|
+
border: 1px solid var(--border-color);
|
|
1974
|
+
border-radius: 10px;
|
|
1975
|
+
padding: 18px;
|
|
1976
|
+
margin-bottom: 12px;
|
|
1977
|
+
transition: all 0.2s;
|
|
1978
|
+
}
|
|
1979
|
+
.vm-item:last-child {
|
|
1980
|
+
margin-bottom: 0;
|
|
1981
|
+
}
|
|
1982
|
+
.vm-item:hover {
|
|
1983
|
+
border-color: rgba(16, 185, 129, 0.5);
|
|
1984
|
+
}
|
|
1985
|
+
.vm-item-header {
|
|
1986
|
+
display: flex;
|
|
1987
|
+
align-items: center;
|
|
1988
|
+
justify-content: space-between;
|
|
1989
|
+
margin-bottom: 12px;
|
|
1990
|
+
}
|
|
1991
|
+
.vm-name {
|
|
1992
|
+
font-weight: 600;
|
|
1993
|
+
font-size: 1rem;
|
|
1994
|
+
color: var(--text-primary);
|
|
1995
|
+
}
|
|
1996
|
+
.vm-status-indicator {
|
|
1997
|
+
display: flex;
|
|
1998
|
+
align-items: center;
|
|
1999
|
+
gap: 6px;
|
|
2000
|
+
font-size: 0.8rem;
|
|
2001
|
+
padding: 4px 10px;
|
|
2002
|
+
border-radius: 12px;
|
|
2003
|
+
background: rgba(0, 0, 0, 0.2);
|
|
2004
|
+
}
|
|
2005
|
+
.vm-status-indicator.online {
|
|
2006
|
+
background: rgba(16, 185, 129, 0.2);
|
|
2007
|
+
color: #10b981;
|
|
2008
|
+
}
|
|
2009
|
+
.vm-status-indicator.offline {
|
|
2010
|
+
background: rgba(239, 68, 68, 0.2);
|
|
2011
|
+
color: #ef4444;
|
|
2012
|
+
}
|
|
2013
|
+
.vm-status-dot {
|
|
2014
|
+
width: 8px;
|
|
2015
|
+
height: 8px;
|
|
2016
|
+
border-radius: 50%;
|
|
2017
|
+
}
|
|
2018
|
+
.vm-status-dot.online {
|
|
2019
|
+
background: #10b981;
|
|
2020
|
+
box-shadow: 0 0 6px #10b981;
|
|
2021
|
+
}
|
|
2022
|
+
.vm-status-dot.offline {
|
|
2023
|
+
background: #ef4444;
|
|
2024
|
+
}
|
|
2025
|
+
.vm-status-dot.unknown {
|
|
2026
|
+
background: #6b7280;
|
|
2027
|
+
}
|
|
2028
|
+
/* IP Address display - prominent */
|
|
2029
|
+
.vm-ip-display {
|
|
2030
|
+
display: flex;
|
|
2031
|
+
align-items: center;
|
|
2032
|
+
gap: 8px;
|
|
2033
|
+
padding: 10px 14px;
|
|
2034
|
+
background: rgba(0, 0, 0, 0.4);
|
|
2035
|
+
border: 1px solid rgba(16, 185, 129, 0.3);
|
|
2036
|
+
border-radius: 8px;
|
|
2037
|
+
margin-bottom: 14px;
|
|
2038
|
+
}
|
|
2039
|
+
.vm-ip-label {
|
|
2040
|
+
font-size: 0.75rem;
|
|
2041
|
+
color: var(--text-muted);
|
|
2042
|
+
text-transform: uppercase;
|
|
2043
|
+
letter-spacing: 0.5px;
|
|
2044
|
+
}
|
|
2045
|
+
.vm-ip-value {
|
|
2046
|
+
font-family: 'SF Mono', Monaco, monospace;
|
|
2047
|
+
font-size: 1.1rem;
|
|
2048
|
+
font-weight: 600;
|
|
2049
|
+
color: #10b981;
|
|
2050
|
+
letter-spacing: 0.5px;
|
|
2051
|
+
}
|
|
2052
|
+
.vm-ip-copy {
|
|
2053
|
+
margin-left: auto;
|
|
2054
|
+
padding: 4px 8px;
|
|
2055
|
+
background: rgba(16, 185, 129, 0.2);
|
|
2056
|
+
border: 1px solid rgba(16, 185, 129, 0.3);
|
|
2057
|
+
border-radius: 4px;
|
|
2058
|
+
color: #10b981;
|
|
2059
|
+
cursor: pointer;
|
|
2060
|
+
font-size: 0.7rem;
|
|
2061
|
+
transition: all 0.2s;
|
|
2062
|
+
}
|
|
2063
|
+
.vm-ip-copy:hover {
|
|
2064
|
+
background: rgba(16, 185, 129, 0.3);
|
|
2065
|
+
}
|
|
2066
|
+
.vm-info {
|
|
2067
|
+
display: grid;
|
|
2068
|
+
grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
|
|
2069
|
+
gap: 10px;
|
|
2070
|
+
margin-bottom: 14px;
|
|
2071
|
+
font-size: 0.85rem;
|
|
2072
|
+
color: var(--text-secondary);
|
|
2073
|
+
}
|
|
2074
|
+
.vm-info-item {
|
|
2075
|
+
display: flex;
|
|
2076
|
+
gap: 6px;
|
|
2077
|
+
}
|
|
2078
|
+
.vm-info-label {
|
|
2079
|
+
color: var(--text-muted);
|
|
2080
|
+
}
|
|
2081
|
+
.vm-info-value {
|
|
2082
|
+
color: var(--text-primary);
|
|
2083
|
+
font-family: 'SF Mono', Monaco, monospace;
|
|
2084
|
+
}
|
|
2085
|
+
.vm-actions {
|
|
2086
|
+
display: flex;
|
|
2087
|
+
gap: 10px;
|
|
2088
|
+
align-items: center;
|
|
2089
|
+
flex-wrap: wrap;
|
|
2090
|
+
}
|
|
2091
|
+
/* VNC Button - Large and Prominent */
|
|
2092
|
+
.vm-vnc-link {
|
|
2093
|
+
display: inline-flex;
|
|
2094
|
+
align-items: center;
|
|
2095
|
+
gap: 8px;
|
|
2096
|
+
padding: 12px 20px;
|
|
2097
|
+
background: linear-gradient(135deg, #10b981 0%, #059669 100%);
|
|
2098
|
+
border: none;
|
|
2099
|
+
border-radius: 8px;
|
|
2100
|
+
color: white;
|
|
2101
|
+
text-decoration: none;
|
|
2102
|
+
font-size: 0.9rem;
|
|
2103
|
+
font-weight: 600;
|
|
2104
|
+
transition: all 0.2s;
|
|
2105
|
+
box-shadow: 0 4px 12px rgba(16, 185, 129, 0.3);
|
|
2106
|
+
}
|
|
2107
|
+
.vm-vnc-link:hover {
|
|
2108
|
+
background: linear-gradient(135deg, #059669 0%, #047857 100%);
|
|
2109
|
+
transform: translateY(-2px);
|
|
2110
|
+
box-shadow: 0 6px 16px rgba(16, 185, 129, 0.4);
|
|
2111
|
+
}
|
|
2112
|
+
.vm-vnc-link .vnc-icon {
|
|
2113
|
+
font-size: 1.1rem;
|
|
2114
|
+
}
|
|
2115
|
+
.vm-vnc-link .vnc-ip {
|
|
2116
|
+
font-family: 'SF Mono', Monaco, monospace;
|
|
2117
|
+
font-size: 0.8rem;
|
|
2118
|
+
opacity: 0.9;
|
|
2119
|
+
margin-left: 4px;
|
|
2120
|
+
}
|
|
2121
|
+
.vm-vnc-link .tunnel-badge {
|
|
2122
|
+
font-size: 0.7rem;
|
|
2123
|
+
padding: 2px 6px;
|
|
2124
|
+
border-radius: 4px;
|
|
2125
|
+
background: rgba(255, 255, 255, 0.2);
|
|
2126
|
+
margin-left: 6px;
|
|
2127
|
+
}
|
|
2128
|
+
.vm-vnc-link .tunnel-badge.tunnel-error {
|
|
2129
|
+
background: rgba(239, 68, 68, 0.3);
|
|
2130
|
+
color: #fca5a5;
|
|
2131
|
+
}
|
|
2132
|
+
.vm-vnc-link.tunnel-inactive {
|
|
2133
|
+
background: linear-gradient(135deg, #6b7280 0%, #4b5563 100%);
|
|
2134
|
+
opacity: 0.8;
|
|
2135
|
+
}
|
|
2136
|
+
.vm-vnc-link.tunnel-inactive:hover {
|
|
2137
|
+
background: linear-gradient(135deg, #4b5563 0%, #374151 100%);
|
|
2138
|
+
}
|
|
2139
|
+
.tunnel-mini {
|
|
2140
|
+
font-size: 0.7rem;
|
|
2141
|
+
color: #10b981;
|
|
2142
|
+
}
|
|
2143
|
+
.vm-waa-status {
|
|
2144
|
+
display: inline-flex;
|
|
2145
|
+
align-items: center;
|
|
2146
|
+
gap: 6px;
|
|
2147
|
+
padding: 8px 14px;
|
|
2148
|
+
background: rgba(0, 0, 0, 0.2);
|
|
2149
|
+
border-radius: 6px;
|
|
2150
|
+
font-size: 0.85rem;
|
|
2151
|
+
font-weight: 500;
|
|
2152
|
+
}
|
|
2153
|
+
.vm-waa-status.ready {
|
|
2154
|
+
color: #10b981;
|
|
2155
|
+
border: 1px solid rgba(16, 185, 129, 0.4);
|
|
2156
|
+
background: rgba(16, 185, 129, 0.1);
|
|
2157
|
+
}
|
|
2158
|
+
.vm-waa-status.not-ready {
|
|
2159
|
+
color: #ef4444;
|
|
2160
|
+
border: 1px solid rgba(239, 68, 68, 0.4);
|
|
2161
|
+
background: rgba(239, 68, 68, 0.1);
|
|
2162
|
+
}
|
|
2163
|
+
.vm-waa-status.checking {
|
|
2164
|
+
color: #f59e0b;
|
|
2165
|
+
border: 1px solid rgba(245, 158, 11, 0.4);
|
|
2166
|
+
background: rgba(245, 158, 11, 0.1);
|
|
2167
|
+
}
|
|
2168
|
+
.vm-last-checked {
|
|
2169
|
+
font-size: 0.7rem;
|
|
2170
|
+
color: var(--text-muted);
|
|
2171
|
+
margin-top: 10px;
|
|
2172
|
+
display: flex;
|
|
2173
|
+
align-items: center;
|
|
2174
|
+
gap: 6px;
|
|
2175
|
+
}
|
|
2176
|
+
.no-vms {
|
|
2177
|
+
text-align: center;
|
|
2178
|
+
padding: 30px 20px;
|
|
2179
|
+
color: var(--text-muted);
|
|
2180
|
+
font-size: 0.9rem;
|
|
2181
|
+
}
|
|
2182
|
+
.no-vms-icon {
|
|
2183
|
+
font-size: 2rem;
|
|
2184
|
+
margin-bottom: 12px;
|
|
2185
|
+
opacity: 0.5;
|
|
2186
|
+
}
|
|
2187
|
+
.vm-add-button {
|
|
2188
|
+
margin-top: 12px;
|
|
2189
|
+
padding: 10px 18px;
|
|
2190
|
+
background: rgba(16, 185, 129, 0.2);
|
|
2191
|
+
border: 1px solid rgba(16, 185, 129, 0.4);
|
|
2192
|
+
border-radius: 6px;
|
|
2193
|
+
color: #10b981;
|
|
2194
|
+
cursor: pointer;
|
|
2195
|
+
font-size: 0.85rem;
|
|
2196
|
+
font-weight: 500;
|
|
2197
|
+
transition: all 0.2s;
|
|
2198
|
+
display: flex;
|
|
2199
|
+
align-items: center;
|
|
2200
|
+
gap: 6px;
|
|
2201
|
+
}
|
|
2202
|
+
.vm-add-button:hover {
|
|
2203
|
+
background: rgba(16, 185, 129, 0.3);
|
|
2204
|
+
transform: translateY(-1px);
|
|
2205
|
+
}
|
|
2206
|
+
.vm-add-form {
|
|
2207
|
+
display: none;
|
|
2208
|
+
margin-top: 12px;
|
|
2209
|
+
padding: 18px;
|
|
2210
|
+
background: rgba(0, 0, 0, 0.3);
|
|
2211
|
+
border: 1px solid var(--border-color);
|
|
2212
|
+
border-radius: 10px;
|
|
2213
|
+
}
|
|
2214
|
+
.vm-add-form.show {
|
|
2215
|
+
display: block;
|
|
2216
|
+
}
|
|
2217
|
+
.vm-form-row {
|
|
2218
|
+
margin-bottom: 14px;
|
|
2219
|
+
}
|
|
2220
|
+
.vm-form-row label {
|
|
2221
|
+
display: block;
|
|
2222
|
+
font-size: 0.8rem;
|
|
2223
|
+
color: var(--text-secondary);
|
|
2224
|
+
margin-bottom: 6px;
|
|
2225
|
+
font-weight: 500;
|
|
2226
|
+
}
|
|
2227
|
+
.vm-form-row input {
|
|
2228
|
+
width: 100%;
|
|
2229
|
+
padding: 8px 12px;
|
|
2230
|
+
background: rgba(0, 0, 0, 0.4);
|
|
2231
|
+
border: 1px solid var(--border-color);
|
|
2232
|
+
border-radius: 6px;
|
|
2233
|
+
color: var(--text-primary);
|
|
2234
|
+
font-size: 0.85rem;
|
|
2235
|
+
transition: border-color 0.2s;
|
|
2236
|
+
}
|
|
2237
|
+
.vm-form-row input:focus {
|
|
2238
|
+
outline: none;
|
|
2239
|
+
border-color: #10b981;
|
|
2240
|
+
}
|
|
2241
|
+
.vm-form-actions {
|
|
2242
|
+
display: flex;
|
|
2243
|
+
gap: 10px;
|
|
2244
|
+
margin-top: 18px;
|
|
2245
|
+
}
|
|
2246
|
+
.vm-form-submit {
|
|
2247
|
+
padding: 10px 18px;
|
|
2248
|
+
background: #10b981;
|
|
2249
|
+
border: none;
|
|
2250
|
+
border-radius: 6px;
|
|
2251
|
+
color: white;
|
|
2252
|
+
cursor: pointer;
|
|
2253
|
+
font-size: 0.85rem;
|
|
2254
|
+
font-weight: 500;
|
|
2255
|
+
}
|
|
2256
|
+
.vm-form-cancel {
|
|
2257
|
+
padding: 8px 16px;
|
|
2258
|
+
background: rgba(255, 255, 255, 0.1);
|
|
2259
|
+
border: 1px solid var(--border-color);
|
|
2260
|
+
border-radius: 6px;
|
|
2261
|
+
color: var(--text-primary);
|
|
2262
|
+
cursor: pointer;
|
|
2263
|
+
font-size: 0.85rem;
|
|
2264
|
+
}
|
|
2265
|
+
'''
|
|
2266
|
+
|
|
2267
|
+
|
|
2268
|
+
def _get_vm_discovery_panel_html() -> str:
|
|
2269
|
+
"""Return HTML for VM Discovery panel with prominent VNC button and loading states."""
|
|
2270
|
+
return '''
|
|
2271
|
+
<div class="vm-discovery-panel" id="vm-discovery-panel">
|
|
2272
|
+
<div class="vm-discovery-header">
|
|
2273
|
+
<div class="vm-discovery-title">
|
|
2274
|
+
<svg viewBox="0 0 24 24" fill="currentColor">
|
|
2275
|
+
<path d="M3 3h18v4H3V3zm0 6h18v12H3V9zm2 2v8h14v-8H5zm2 2h4v4H7v-4z"/>
|
|
2276
|
+
</svg>
|
|
2277
|
+
Windows VMs
|
|
2278
|
+
</div>
|
|
2279
|
+
<div class="vm-discovery-controls">
|
|
2280
|
+
<span class="vm-discovery-refresh" id="vm-refresh-time">Checking...</span>
|
|
2281
|
+
<button class="refresh-btn" onclick="refreshVMs()" title="Refresh VM status" id="vm-refresh-btn">
|
|
2282
|
+
<span class="refresh-icon">↻</span>
|
|
2283
|
+
<span class="spinner"></span>
|
|
2284
|
+
Refresh
|
|
2285
|
+
</button>
|
|
2286
|
+
</div>
|
|
2287
|
+
</div>
|
|
2288
|
+
|
|
2289
|
+
<!-- API Error Banner -->
|
|
2290
|
+
<div class="api-error-banner" id="vm-api-error">
|
|
2291
|
+
<span class="error-icon">!</span>
|
|
2292
|
+
<span class="error-message" id="vm-error-msg">Failed to fetch VMs</span>
|
|
2293
|
+
<button class="retry-btn" onclick="refreshVMs()">Retry</button>
|
|
2294
|
+
</div>
|
|
2295
|
+
|
|
2296
|
+
<!-- Loading state -->
|
|
2297
|
+
<div id="vm-loading" style="display: none; text-align: center; padding: 30px;">
|
|
2298
|
+
<div style="display: inline-block; width: 24px; height: 24px; border: 3px solid rgba(16,185,129,0.3); border-top-color: #10b981; border-radius: 50%; animation: spin 1s linear infinite;"></div>
|
|
2299
|
+
<div style="margin-top: 12px; color: var(--text-muted); font-size: 0.85rem;">Checking VM status...</div>
|
|
2300
|
+
</div>
|
|
2301
|
+
|
|
2302
|
+
<div id="vm-list">
|
|
2303
|
+
<div class="no-vms">
|
|
2304
|
+
<div class="no-vms-icon">💻</div>
|
|
2305
|
+
Checking for registered VMs...
|
|
2306
|
+
</div>
|
|
2307
|
+
</div>
|
|
2308
|
+
<button id="vm-add-button" class="vm-add-button" onclick="toggleVMAddForm()">
|
|
2309
|
+
<span>+</span> Add VM
|
|
2310
|
+
</button>
|
|
2311
|
+
<div id="vm-add-form" class="vm-add-form">
|
|
2312
|
+
<div class="vm-form-row">
|
|
2313
|
+
<label>VM Name:</label>
|
|
2314
|
+
<input type="text" id="vm-name" placeholder="e.g., azure-waa-vm" />
|
|
2315
|
+
</div>
|
|
2316
|
+
<div class="vm-form-row">
|
|
2317
|
+
<label>SSH Host (IP):</label>
|
|
2318
|
+
<input type="text" id="vm-ssh-host" placeholder="e.g., 172.171.112.41" />
|
|
2319
|
+
</div>
|
|
2320
|
+
<div class="vm-form-row">
|
|
2321
|
+
<label>SSH User:</label>
|
|
2322
|
+
<input type="text" id="vm-ssh-user" value="azureuser" />
|
|
2323
|
+
</div>
|
|
2324
|
+
<div class="vm-form-row">
|
|
2325
|
+
<label>VNC Port:</label>
|
|
2326
|
+
<input type="number" id="vm-vnc-port" value="8006" />
|
|
2327
|
+
</div>
|
|
2328
|
+
<div class="vm-form-row">
|
|
2329
|
+
<label>WAA Port:</label>
|
|
2330
|
+
<input type="number" id="vm-waa-port" value="5000" />
|
|
2331
|
+
</div>
|
|
2332
|
+
<div class="vm-form-row">
|
|
2333
|
+
<label>Docker Container:</label>
|
|
2334
|
+
<input type="text" id="vm-docker-container" value="win11-waa" />
|
|
2335
|
+
</div>
|
|
2336
|
+
<div class="vm-form-row">
|
|
2337
|
+
<label>Internal IP:</label>
|
|
2338
|
+
<input type="text" id="vm-internal-ip" value="20.20.20.21" />
|
|
2339
|
+
</div>
|
|
2340
|
+
<div class="vm-form-actions">
|
|
2341
|
+
<button class="vm-form-submit" onclick="submitVMRegistration()">Register VM</button>
|
|
2342
|
+
<button class="vm-form-cancel" onclick="toggleVMAddForm()">Cancel</button>
|
|
2343
|
+
</div>
|
|
2344
|
+
</div>
|
|
2345
|
+
</div>
|
|
2346
|
+
|
|
2347
|
+
<script>
|
|
2348
|
+
let isVMRefreshing = false;
|
|
2349
|
+
let vmErrorCount = 0;
|
|
2350
|
+
|
|
2351
|
+
function setVMLoadingState(loading) {
|
|
2352
|
+
const loadingEl = document.getElementById('vm-loading');
|
|
2353
|
+
const listEl = document.getElementById('vm-list');
|
|
2354
|
+
const btn = document.getElementById('vm-refresh-btn');
|
|
2355
|
+
|
|
2356
|
+
if (loading) {
|
|
2357
|
+
loadingEl.style.display = 'block';
|
|
2358
|
+
listEl.style.display = 'none';
|
|
2359
|
+
if (btn) btn.classList.add('loading');
|
|
2360
|
+
} else {
|
|
2361
|
+
loadingEl.style.display = 'none';
|
|
2362
|
+
listEl.style.display = 'block';
|
|
2363
|
+
if (btn) btn.classList.remove('loading');
|
|
2364
|
+
}
|
|
2365
|
+
}
|
|
2366
|
+
|
|
2367
|
+
function showVMError(msg) {
|
|
2368
|
+
const errorEl = document.getElementById('vm-api-error');
|
|
2369
|
+
const errorMsgEl = document.getElementById('vm-error-msg');
|
|
2370
|
+
if (errorEl && errorMsgEl) {
|
|
2371
|
+
errorMsgEl.textContent = msg;
|
|
2372
|
+
errorEl.style.display = 'flex'; // Override any inline display:none
|
|
2373
|
+
errorEl.classList.add('show');
|
|
2374
|
+
}
|
|
2375
|
+
}
|
|
2376
|
+
|
|
2377
|
+
function hideVMError() {
|
|
2378
|
+
const errorEl = document.getElementById('vm-api-error');
|
|
2379
|
+
if (errorEl) {
|
|
2380
|
+
errorEl.classList.remove('show');
|
|
2381
|
+
errorEl.style.display = 'none'; // Explicit hide as backup
|
|
2382
|
+
}
|
|
2383
|
+
}
|
|
2384
|
+
|
|
2385
|
+
async function refreshVMs() {
|
|
2386
|
+
if (isVMRefreshing) return;
|
|
2387
|
+
isVMRefreshing = true;
|
|
2388
|
+
setVMLoadingState(true);
|
|
2389
|
+
hideVMError();
|
|
2390
|
+
|
|
2391
|
+
try {
|
|
2392
|
+
const response = await fetch('/api/vms?' + Date.now());
|
|
2393
|
+
if (!response.ok) throw new Error('HTTP ' + response.status);
|
|
2394
|
+
const vms = await response.json();
|
|
2395
|
+
if (vms.error) throw new Error(vms.error);
|
|
2396
|
+
|
|
2397
|
+
renderVMs(vms);
|
|
2398
|
+
hideVMError(); // Hide error again after successful render
|
|
2399
|
+
vmErrorCount = 0;
|
|
2400
|
+
document.getElementById('vm-refresh-time').textContent =
|
|
2401
|
+
'Updated ' + new Date().toLocaleTimeString();
|
|
2402
|
+
} catch (e) {
|
|
2403
|
+
console.error('VM refresh failed:', e);
|
|
2404
|
+
vmErrorCount++;
|
|
2405
|
+
showVMError(e.message || 'Connection failed');
|
|
2406
|
+
} finally {
|
|
2407
|
+
isVMRefreshing = false;
|
|
2408
|
+
setVMLoadingState(false);
|
|
2409
|
+
}
|
|
2410
|
+
}
|
|
2411
|
+
|
|
2412
|
+
async function fetchVMs() {
|
|
2413
|
+
if (isVMRefreshing) return;
|
|
2414
|
+
if (vmErrorCount >= 3) {
|
|
2415
|
+
document.getElementById('vm-refresh-time').textContent = 'Polling paused';
|
|
2416
|
+
return;
|
|
2417
|
+
}
|
|
2418
|
+
|
|
2419
|
+
try {
|
|
2420
|
+
const response = await fetch('/api/vms?' + Date.now());
|
|
2421
|
+
if (response.ok) {
|
|
2422
|
+
const vms = await response.json();
|
|
2423
|
+
if (!vms.error) {
|
|
2424
|
+
renderVMs(vms);
|
|
2425
|
+
hideVMError();
|
|
2426
|
+
vmErrorCount = 0;
|
|
2427
|
+
document.getElementById('vm-refresh-time').textContent =
|
|
2428
|
+
'Updated ' + new Date().toLocaleTimeString();
|
|
2429
|
+
}
|
|
2430
|
+
}
|
|
2431
|
+
} catch (e) {
|
|
2432
|
+
console.log('VM API unavailable:', e);
|
|
2433
|
+
vmErrorCount++;
|
|
2434
|
+
}
|
|
2435
|
+
}
|
|
2436
|
+
|
|
2437
|
+
function copyToClipboard(text, btn) {
|
|
2438
|
+
navigator.clipboard.writeText(text).then(() => {
|
|
2439
|
+
const originalText = btn.textContent;
|
|
2440
|
+
btn.textContent = 'Copied!';
|
|
2441
|
+
setTimeout(() => { btn.textContent = originalText; }, 1500);
|
|
2442
|
+
});
|
|
2443
|
+
}
|
|
2444
|
+
|
|
2445
|
+
function renderVMs(vms) {
|
|
2446
|
+
const container = document.getElementById('vm-list');
|
|
2447
|
+
|
|
2448
|
+
if (!vms || vms.length === 0) {
|
|
2449
|
+
container.innerHTML = '<div class="no-vms"><div class="no-vms-icon">💻</div>No VMs registered. Click "Add VM" to register one.</div>';
|
|
2450
|
+
return;
|
|
2451
|
+
}
|
|
2452
|
+
|
|
2453
|
+
const html = vms.map(vm => {
|
|
2454
|
+
const statusClass = vm.status || 'unknown';
|
|
2455
|
+
const statusText = statusClass.charAt(0).toUpperCase() + statusClass.slice(1);
|
|
2456
|
+
const waaStatusClass = vm.waa_probe_status === 'ready' ? 'ready' :
|
|
2457
|
+
vm.waa_probe_status === 'checking' ? 'checking' : 'not-ready';
|
|
2458
|
+
const waaStatusIcon = vm.waa_probe_status === 'ready' ? '✓' :
|
|
2459
|
+
vm.waa_probe_status === 'checking' ? '⌛' : '✗';
|
|
2460
|
+
const waaStatusText = vm.waa_probe_status === 'ready' ? 'WAA Server Ready' :
|
|
2461
|
+
vm.waa_probe_status === 'not responding' ? 'WAA Not Responding' :
|
|
2462
|
+
vm.waa_probe_status === 'checking' ? 'Checking...' :
|
|
2463
|
+
vm.waa_probe_status === 'ssh failed' ? 'SSH Failed' : 'Unknown';
|
|
2464
|
+
|
|
2465
|
+
// Use localhost for VNC (requires SSH tunnel: ssh -fN -L 8006:localhost:8006 user@vm-ip)
|
|
2466
|
+
const vncPort = vm.vnc_port || 8006;
|
|
2467
|
+
const vncUrl = 'http://localhost:' + vncPort;
|
|
2468
|
+
const vmIp = vm.ssh_host;
|
|
2469
|
+
|
|
2470
|
+
return '<div class="vm-item">' +
|
|
2471
|
+
'<div class="vm-item-header">' +
|
|
2472
|
+
'<span class="vm-name">' + (vm.name || 'Unnamed VM') + '</span>' +
|
|
2473
|
+
'<div class="vm-status-indicator ' + statusClass + '">' +
|
|
2474
|
+
'<div class="vm-status-dot ' + statusClass + '"></div>' +
|
|
2475
|
+
'<span>' + statusText + '</span>' +
|
|
2476
|
+
'</div>' +
|
|
2477
|
+
'</div>' +
|
|
2478
|
+
|
|
2479
|
+
// Prominent IP display
|
|
2480
|
+
'<div class="vm-ip-display">' +
|
|
2481
|
+
'<span class="vm-ip-label">IP Address:</span>' +
|
|
2482
|
+
'<span class="vm-ip-value">' + vmIp + '</span>' +
|
|
2483
|
+
'<button class="vm-ip-copy" onclick="copyToClipboard(\\\'' + vmIp + '\\\', this)">Copy</button>' +
|
|
2484
|
+
'</div>' +
|
|
2485
|
+
|
|
2486
|
+
'<div class="vm-info">' +
|
|
2487
|
+
'<div class="vm-info-item">' +
|
|
2488
|
+
'<span class="vm-info-label">SSH:</span>' +
|
|
2489
|
+
'<span class="vm-info-value">' + (vm.ssh_user || 'azureuser') + '@' + vmIp + '</span>' +
|
|
2490
|
+
'</div>' +
|
|
2491
|
+
'<div class="vm-info-item">' +
|
|
2492
|
+
'<span class="vm-info-label">Container:</span>' +
|
|
2493
|
+
'<span class="vm-info-value">' + (vm.docker_container || 'win11-waa') + '</span>' +
|
|
2494
|
+
'</div>' +
|
|
2495
|
+
'</div>' +
|
|
2496
|
+
|
|
2497
|
+
'<div class="vm-actions">' +
|
|
2498
|
+
// Large prominent VNC button - uses localhost (SSH tunnel)
|
|
2499
|
+
'<a href="' + vncUrl + '" target="_blank" class="vm-vnc-link' + (vm.tunnels && vm.tunnels.vnc && vm.tunnels.vnc.active ? ' tunnel-active' : ' tunnel-inactive') + '">' +
|
|
2500
|
+
'<span class="vnc-icon">🖨</span>' +
|
|
2501
|
+
'Open VNC' +
|
|
2502
|
+
'<span class="vnc-ip">localhost:' + vncPort + '</span>' +
|
|
2503
|
+
(vm.tunnels && vm.tunnels.vnc && vm.tunnels.vnc.active ? '<span class="tunnel-badge">✓ tunnel</span>' : '<span class="tunnel-badge tunnel-error">✗ no tunnel</span>') +
|
|
2504
|
+
'</a>' +
|
|
2505
|
+
'<div class="vm-waa-status ' + waaStatusClass + '">' +
|
|
2506
|
+
waaStatusIcon + ' ' + waaStatusText +
|
|
2507
|
+
(vm.tunnels && vm.tunnels.waa && vm.tunnels.waa.active ? ' <span class="tunnel-mini">✓</span>' : '') +
|
|
2508
|
+
'</div>' +
|
|
2509
|
+
'</div>' +
|
|
2510
|
+
|
|
2511
|
+
'<div class="vm-last-checked">' +
|
|
2512
|
+
'<span>🕑</span> Last checked: ' + (vm.last_checked ? new Date(vm.last_checked).toLocaleString() : 'Never') +
|
|
2513
|
+
'</div>' +
|
|
2514
|
+
'</div>';
|
|
2515
|
+
}).join('');
|
|
2516
|
+
|
|
2517
|
+
container.innerHTML = html;
|
|
2518
|
+
}
|
|
2519
|
+
|
|
2520
|
+
function toggleVMAddForm() {
|
|
2521
|
+
const form = document.getElementById('vm-add-form');
|
|
2522
|
+
form.classList.toggle('show');
|
|
2523
|
+
}
|
|
2524
|
+
|
|
2525
|
+
async function submitVMRegistration() {
|
|
2526
|
+
const vmData = {
|
|
2527
|
+
name: document.getElementById('vm-name').value,
|
|
2528
|
+
ssh_host: document.getElementById('vm-ssh-host').value,
|
|
2529
|
+
ssh_user: document.getElementById('vm-ssh-user').value,
|
|
2530
|
+
vnc_port: parseInt(document.getElementById('vm-vnc-port').value),
|
|
2531
|
+
waa_port: parseInt(document.getElementById('vm-waa-port').value),
|
|
2532
|
+
docker_container: document.getElementById('vm-docker-container').value,
|
|
2533
|
+
internal_ip: document.getElementById('vm-internal-ip').value
|
|
2534
|
+
};
|
|
2535
|
+
|
|
2536
|
+
// Basic validation
|
|
2537
|
+
if (!vmData.name || !vmData.ssh_host) {
|
|
2538
|
+
alert('Please fill in VM Name and SSH Host');
|
|
2539
|
+
return;
|
|
2540
|
+
}
|
|
2541
|
+
|
|
2542
|
+
try {
|
|
2543
|
+
const response = await fetch('/api/vms/register', {
|
|
2544
|
+
method: 'POST',
|
|
2545
|
+
headers: {
|
|
2546
|
+
'Content-Type': 'application/json'
|
|
2547
|
+
},
|
|
2548
|
+
body: JSON.stringify(vmData)
|
|
2549
|
+
});
|
|
2550
|
+
|
|
2551
|
+
if (response.ok) {
|
|
2552
|
+
const result = await response.json();
|
|
2553
|
+
if (result.status === 'success') {
|
|
2554
|
+
toggleVMAddForm();
|
|
2555
|
+
fetchVMs();
|
|
2556
|
+
// Clear form
|
|
2557
|
+
document.getElementById('vm-name').value = '';
|
|
2558
|
+
document.getElementById('vm-ssh-host').value = '';
|
|
2559
|
+
} else {
|
|
2560
|
+
alert('Failed to register VM: ' + (result.message || 'Unknown error'));
|
|
2561
|
+
}
|
|
2562
|
+
} else {
|
|
2563
|
+
alert('Failed to register VM: Server error (HTTP ' + response.status + ')');
|
|
2564
|
+
}
|
|
2565
|
+
} catch (e) {
|
|
2566
|
+
alert('Failed to register VM: ' + e.message);
|
|
2567
|
+
}
|
|
2568
|
+
}
|
|
2569
|
+
|
|
2570
|
+
// Initial fetch and poll every 10 seconds
|
|
2571
|
+
fetchVMs();
|
|
2572
|
+
setInterval(fetchVMs, 10000);
|
|
2573
|
+
</script>
|
|
2574
|
+
'''
|
|
2575
|
+
|
|
2576
|
+
|
|
2577
|
+
def _get_run_benchmark_panel_css() -> str:
|
|
2578
|
+
"""Return CSS for the Run Benchmark configuration panel."""
|
|
2579
|
+
return '''
|
|
2580
|
+
.run-benchmark-panel {
|
|
2581
|
+
background: linear-gradient(135deg, rgba(16, 185, 129, 0.1) 0%, rgba(16, 185, 129, 0.05) 100%);
|
|
2582
|
+
border: 1px solid rgba(16, 185, 129, 0.3);
|
|
2583
|
+
border-radius: 12px;
|
|
2584
|
+
padding: 20px 24px;
|
|
2585
|
+
margin-bottom: 24px;
|
|
2586
|
+
}
|
|
2587
|
+
.run-benchmark-header {
|
|
2588
|
+
display: flex;
|
|
2589
|
+
align-items: center;
|
|
2590
|
+
justify-content: space-between;
|
|
2591
|
+
margin-bottom: 16px;
|
|
2592
|
+
}
|
|
2593
|
+
.run-benchmark-title {
|
|
2594
|
+
display: flex;
|
|
2595
|
+
align-items: center;
|
|
2596
|
+
gap: 10px;
|
|
2597
|
+
font-size: 1rem;
|
|
2598
|
+
font-weight: 600;
|
|
2599
|
+
color: #10b981;
|
|
2600
|
+
}
|
|
2601
|
+
.run-benchmark-title svg {
|
|
2602
|
+
width: 20px;
|
|
2603
|
+
height: 20px;
|
|
2604
|
+
}
|
|
2605
|
+
.run-benchmark-form {
|
|
2606
|
+
display: grid;
|
|
2607
|
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
|
2608
|
+
gap: 16px;
|
|
2609
|
+
margin-bottom: 16px;
|
|
2610
|
+
}
|
|
2611
|
+
.form-group {
|
|
2612
|
+
display: flex;
|
|
2613
|
+
flex-direction: column;
|
|
2614
|
+
gap: 6px;
|
|
2615
|
+
}
|
|
2616
|
+
.form-group label {
|
|
2617
|
+
font-size: 0.8rem;
|
|
2618
|
+
color: var(--text-secondary);
|
|
2619
|
+
font-weight: 500;
|
|
2620
|
+
}
|
|
2621
|
+
.form-group select,
|
|
2622
|
+
.form-group input[type="text"],
|
|
2623
|
+
.form-group input[type="number"] {
|
|
2624
|
+
padding: 8px 12px;
|
|
2625
|
+
background: rgba(0, 0, 0, 0.3);
|
|
2626
|
+
border: 1px solid var(--border-color);
|
|
2627
|
+
border-radius: 6px;
|
|
2628
|
+
color: var(--text-primary);
|
|
2629
|
+
font-size: 0.9rem;
|
|
2630
|
+
}
|
|
2631
|
+
.form-group select:focus,
|
|
2632
|
+
.form-group input:focus {
|
|
2633
|
+
outline: none;
|
|
2634
|
+
border-color: #10b981;
|
|
2635
|
+
}
|
|
2636
|
+
.task-selection-group {
|
|
2637
|
+
grid-column: 1 / -1;
|
|
2638
|
+
display: flex;
|
|
2639
|
+
flex-direction: column;
|
|
2640
|
+
gap: 10px;
|
|
2641
|
+
padding: 12px 16px;
|
|
2642
|
+
background: rgba(0, 0, 0, 0.2);
|
|
2643
|
+
border-radius: 8px;
|
|
2644
|
+
}
|
|
2645
|
+
.task-selection-group-label {
|
|
2646
|
+
font-size: 0.8rem;
|
|
2647
|
+
color: var(--text-secondary);
|
|
2648
|
+
font-weight: 500;
|
|
2649
|
+
margin-bottom: 4px;
|
|
2650
|
+
}
|
|
2651
|
+
.task-selection-option {
|
|
2652
|
+
display: flex;
|
|
2653
|
+
align-items: center;
|
|
2654
|
+
gap: 10px;
|
|
2655
|
+
}
|
|
2656
|
+
.task-selection-option input[type="radio"] {
|
|
2657
|
+
accent-color: #10b981;
|
|
2658
|
+
}
|
|
2659
|
+
.task-selection-option label {
|
|
2660
|
+
font-size: 0.85rem;
|
|
2661
|
+
color: var(--text-primary);
|
|
2662
|
+
cursor: pointer;
|
|
2663
|
+
}
|
|
2664
|
+
.task-selection-option select,
|
|
2665
|
+
.task-selection-option input[type="text"] {
|
|
2666
|
+
padding: 6px 10px;
|
|
2667
|
+
background: rgba(0, 0, 0, 0.3);
|
|
2668
|
+
border: 1px solid var(--border-color);
|
|
2669
|
+
border-radius: 4px;
|
|
2670
|
+
color: var(--text-primary);
|
|
2671
|
+
font-size: 0.85rem;
|
|
2672
|
+
flex: 1;
|
|
2673
|
+
max-width: 200px;
|
|
2674
|
+
}
|
|
2675
|
+
.task-selection-option select:disabled,
|
|
2676
|
+
.task-selection-option input:disabled {
|
|
2677
|
+
opacity: 0.5;
|
|
2678
|
+
cursor: not-allowed;
|
|
2679
|
+
}
|
|
2680
|
+
.custom-model-input {
|
|
2681
|
+
display: none;
|
|
2682
|
+
margin-top: 8px;
|
|
2683
|
+
}
|
|
2684
|
+
.custom-model-input.show {
|
|
2685
|
+
display: block;
|
|
2686
|
+
}
|
|
2687
|
+
.start-btn {
|
|
2688
|
+
padding: 10px 20px;
|
|
2689
|
+
background: linear-gradient(135deg, #10b981, #059669);
|
|
2690
|
+
border: none;
|
|
2691
|
+
border-radius: 8px;
|
|
2692
|
+
color: white;
|
|
2693
|
+
font-weight: 600;
|
|
2694
|
+
cursor: pointer;
|
|
2695
|
+
transition: all 0.2s;
|
|
2696
|
+
display: flex;
|
|
2697
|
+
align-items: center;
|
|
2698
|
+
gap: 8px;
|
|
2699
|
+
}
|
|
2700
|
+
.start-btn:hover:not(:disabled) {
|
|
2701
|
+
transform: translateY(-2px);
|
|
2702
|
+
box-shadow: 0 4px 12px rgba(16, 185, 129, 0.3);
|
|
2703
|
+
}
|
|
2704
|
+
.start-btn:disabled {
|
|
2705
|
+
opacity: 0.5;
|
|
2706
|
+
cursor: not-allowed;
|
|
2707
|
+
}
|
|
2708
|
+
.start-btn .spinner {
|
|
2709
|
+
display: none;
|
|
2710
|
+
width: 14px;
|
|
2711
|
+
height: 14px;
|
|
2712
|
+
border: 2px solid rgba(255,255,255,0.3);
|
|
2713
|
+
border-top-color: white;
|
|
2714
|
+
border-radius: 50%;
|
|
2715
|
+
animation: spin 0.8s linear infinite;
|
|
2716
|
+
}
|
|
2717
|
+
.start-btn.loading .spinner {
|
|
2718
|
+
display: inline-block;
|
|
2719
|
+
}
|
|
2720
|
+
.start-btn.loading .start-icon {
|
|
2721
|
+
display: none;
|
|
2722
|
+
}
|
|
2723
|
+
.run-benchmark-status {
|
|
2724
|
+
margin-top: 12px;
|
|
2725
|
+
padding: 10px 14px;
|
|
2726
|
+
background: rgba(0, 0, 0, 0.2);
|
|
2727
|
+
border-radius: 6px;
|
|
2728
|
+
font-size: 0.85rem;
|
|
2729
|
+
color: var(--text-secondary);
|
|
2730
|
+
display: none;
|
|
2731
|
+
}
|
|
2732
|
+
.run-benchmark-status.show {
|
|
2733
|
+
display: block;
|
|
2734
|
+
}
|
|
2735
|
+
.run-benchmark-status.error {
|
|
2736
|
+
background: rgba(239, 68, 68, 0.15);
|
|
2737
|
+
color: #fca5a5;
|
|
2738
|
+
border: 1px solid rgba(239, 68, 68, 0.3);
|
|
2739
|
+
}
|
|
2740
|
+
.run-benchmark-status.success {
|
|
2741
|
+
background: rgba(16, 185, 129, 0.15);
|
|
2742
|
+
color: #6ee7b7;
|
|
2743
|
+
border: 1px solid rgba(16, 185, 129, 0.3);
|
|
2744
|
+
}
|
|
2745
|
+
'''
|
|
2746
|
+
|
|
2747
|
+
|
|
2748
|
+
def _get_run_benchmark_panel_html() -> str:
|
|
2749
|
+
"""Return HTML for the Run Benchmark configuration panel."""
|
|
2750
|
+
return '''
|
|
2751
|
+
<div class="run-benchmark-panel" id="run-benchmark-panel">
|
|
2752
|
+
<div class="run-benchmark-header">
|
|
2753
|
+
<div class="run-benchmark-title">
|
|
2754
|
+
<svg viewBox="0 0 24 24" fill="currentColor">
|
|
2755
|
+
<path d="M8 5v14l11-7z"/>
|
|
2756
|
+
</svg>
|
|
2757
|
+
Run Benchmark
|
|
2758
|
+
</div>
|
|
2759
|
+
<button class="start-btn" id="start-benchmark-btn" onclick="startBenchmarkRun()">
|
|
2760
|
+
<span class="start-icon">▶</span>
|
|
2761
|
+
<span class="spinner"></span>
|
|
2762
|
+
Start Run
|
|
2763
|
+
</button>
|
|
2764
|
+
</div>
|
|
2765
|
+
|
|
2766
|
+
<div class="run-benchmark-form">
|
|
2767
|
+
<div class="form-group">
|
|
2768
|
+
<label for="benchmark-model">Model</label>
|
|
2769
|
+
<select id="benchmark-model" onchange="handleModelChange()">
|
|
2770
|
+
<option value="gpt-4o">GPT-4o</option>
|
|
2771
|
+
<option value="gpt-4o-mini">GPT-4o-mini</option>
|
|
2772
|
+
<option value="claude-sonnet-4-5-20250929">Claude Sonnet 4.5</option>
|
|
2773
|
+
<option value="claude-opus-4-5-20251101">Claude Opus 4.5</option>
|
|
2774
|
+
<option value="custom">Custom...</option>
|
|
2775
|
+
</select>
|
|
2776
|
+
<div class="custom-model-input" id="custom-model-container">
|
|
2777
|
+
<input type="text" id="custom-model-id" placeholder="Enter model ID (e.g., gpt-4-turbo)">
|
|
2778
|
+
</div>
|
|
2779
|
+
</div>
|
|
2780
|
+
|
|
2781
|
+
<div class="form-group">
|
|
2782
|
+
<label for="benchmark-tasks">Number of Tasks</label>
|
|
2783
|
+
<input type="number" id="benchmark-tasks" value="5" min="1" max="154">
|
|
2784
|
+
</div>
|
|
2785
|
+
|
|
2786
|
+
<div class="form-group">
|
|
2787
|
+
<label for="benchmark-agent">Agent</label>
|
|
2788
|
+
<select id="benchmark-agent">
|
|
2789
|
+
<option value="navi">Navi (default)</option>
|
|
2790
|
+
<option value="som">Set-of-Marks</option>
|
|
2791
|
+
<option value="random">Random (baseline)</option>
|
|
2792
|
+
</select>
|
|
2793
|
+
</div>
|
|
2794
|
+
|
|
2795
|
+
<div class="task-selection-group">
|
|
2796
|
+
<div class="task-selection-group-label">Task Selection</div>
|
|
2797
|
+
|
|
2798
|
+
<div class="task-selection-option">
|
|
2799
|
+
<input type="radio" id="task-selection-all" name="task-selection" value="all" checked onchange="updateTaskSelectionState()">
|
|
2800
|
+
<label for="task-selection-all">All tasks (154 total, random selection)</label>
|
|
2801
|
+
</div>
|
|
2802
|
+
|
|
2803
|
+
<div class="task-selection-option">
|
|
2804
|
+
<input type="radio" id="task-selection-domain" name="task-selection" value="domain" onchange="updateTaskSelectionState()">
|
|
2805
|
+
<label for="task-selection-domain">Domain:</label>
|
|
2806
|
+
<select id="benchmark-domain" disabled>
|
|
2807
|
+
<option value="general">General</option>
|
|
2808
|
+
<option value="office">Office</option>
|
|
2809
|
+
<option value="web">Web</option>
|
|
2810
|
+
<option value="coding">Coding</option>
|
|
2811
|
+
<option value="system">System</option>
|
|
2812
|
+
<option value="creative">Creative</option>
|
|
2813
|
+
<option value="data">Data</option>
|
|
2814
|
+
<option value="communication">Communication</option>
|
|
2815
|
+
<option value="media">Media</option>
|
|
2816
|
+
<option value="gaming">Gaming</option>
|
|
2817
|
+
<option value="utility">Utility</option>
|
|
2818
|
+
</select>
|
|
2819
|
+
</div>
|
|
2820
|
+
|
|
2821
|
+
<div class="task-selection-option">
|
|
2822
|
+
<input type="radio" id="task-selection-ids" name="task-selection" value="task_ids" onchange="updateTaskSelectionState()">
|
|
2823
|
+
<label for="task-selection-ids">Task IDs:</label>
|
|
2824
|
+
<input type="text" id="benchmark-task-ids" placeholder="e.g., task_001, task_015, task_042" disabled>
|
|
2825
|
+
</div>
|
|
2826
|
+
</div>
|
|
2827
|
+
</div>
|
|
2828
|
+
|
|
2829
|
+
<div class="run-benchmark-status" id="run-benchmark-status"></div>
|
|
2830
|
+
</div>
|
|
2831
|
+
'''
|
|
2832
|
+
|
|
2833
|
+
|
|
2834
|
+
def _get_run_benchmark_panel_js(include_script_tags: bool = True) -> str:
|
|
2835
|
+
"""Return JavaScript for the Run Benchmark panel form handling and API calls.
|
|
2836
|
+
|
|
2837
|
+
Args:
|
|
2838
|
+
include_script_tags: If True, wrap JS in <script> tags. Set to False when
|
|
2839
|
+
inserting into an existing script block.
|
|
2840
|
+
"""
|
|
2841
|
+
js_code = '''
|
|
2842
|
+
// Handle model dropdown change to show/hide custom input
|
|
2843
|
+
function handleModelChange() {
|
|
2844
|
+
const select = document.getElementById('benchmark-model');
|
|
2845
|
+
const customContainer = document.getElementById('custom-model-container');
|
|
2846
|
+
if (select.value === 'custom') {
|
|
2847
|
+
customContainer.classList.add('show');
|
|
2848
|
+
} else {
|
|
2849
|
+
customContainer.classList.remove('show');
|
|
2850
|
+
}
|
|
2851
|
+
}
|
|
2852
|
+
|
|
2853
|
+
// Enable/disable task selection inputs based on radio selection
|
|
2854
|
+
function updateTaskSelectionState() {
|
|
2855
|
+
const allRadio = document.getElementById('task-selection-all');
|
|
2856
|
+
const domainRadio = document.getElementById('task-selection-domain');
|
|
2857
|
+
const idsRadio = document.getElementById('task-selection-ids');
|
|
2858
|
+
const domainSelect = document.getElementById('benchmark-domain');
|
|
2859
|
+
const taskIdsInput = document.getElementById('benchmark-task-ids');
|
|
2860
|
+
|
|
2861
|
+
domainSelect.disabled = !domainRadio.checked;
|
|
2862
|
+
taskIdsInput.disabled = !idsRadio.checked;
|
|
2863
|
+
}
|
|
2864
|
+
|
|
2865
|
+
// Show status message
|
|
2866
|
+
function showBenchmarkStatus(message, type) {
|
|
2867
|
+
const statusEl = document.getElementById('run-benchmark-status');
|
|
2868
|
+
statusEl.textContent = message;
|
|
2869
|
+
statusEl.className = 'run-benchmark-status show ' + (type || '');
|
|
2870
|
+
}
|
|
2871
|
+
|
|
2872
|
+
// Hide status message
|
|
2873
|
+
function hideBenchmarkStatus() {
|
|
2874
|
+
const statusEl = document.getElementById('run-benchmark-status');
|
|
2875
|
+
statusEl.classList.remove('show');
|
|
2876
|
+
}
|
|
2877
|
+
|
|
2878
|
+
// Start benchmark run
|
|
2879
|
+
async function startBenchmarkRun() {
|
|
2880
|
+
const btn = document.getElementById('start-benchmark-btn');
|
|
2881
|
+
|
|
2882
|
+
// Build params object
|
|
2883
|
+
const modelSelect = document.getElementById('benchmark-model');
|
|
2884
|
+
let model = modelSelect.value;
|
|
2885
|
+
if (model === 'custom') {
|
|
2886
|
+
model = document.getElementById('custom-model-id').value.trim();
|
|
2887
|
+
if (!model) {
|
|
2888
|
+
showBenchmarkStatus('Please enter a custom model ID', 'error');
|
|
2889
|
+
return;
|
|
2890
|
+
}
|
|
2891
|
+
}
|
|
2892
|
+
|
|
2893
|
+
const numTasks = parseInt(document.getElementById('benchmark-tasks').value);
|
|
2894
|
+
if (isNaN(numTasks) || numTasks < 1 || numTasks > 154) {
|
|
2895
|
+
showBenchmarkStatus('Number of tasks must be between 1 and 154', 'error');
|
|
2896
|
+
return;
|
|
2897
|
+
}
|
|
2898
|
+
|
|
2899
|
+
const agent = document.getElementById('benchmark-agent').value;
|
|
2900
|
+
|
|
2901
|
+
// Get task selection
|
|
2902
|
+
const taskSelection = document.querySelector('input[name="task-selection"]:checked').value;
|
|
2903
|
+
|
|
2904
|
+
const params = {
|
|
2905
|
+
model: model,
|
|
2906
|
+
num_tasks: numTasks,
|
|
2907
|
+
agent: agent,
|
|
2908
|
+
task_selection: taskSelection
|
|
2909
|
+
};
|
|
2910
|
+
|
|
2911
|
+
if (taskSelection === 'domain') {
|
|
2912
|
+
params.domain = document.getElementById('benchmark-domain').value;
|
|
2913
|
+
} else if (taskSelection === 'task_ids') {
|
|
2914
|
+
const taskIdsStr = document.getElementById('benchmark-task-ids').value.trim();
|
|
2915
|
+
if (!taskIdsStr) {
|
|
2916
|
+
showBenchmarkStatus('Please enter task IDs', 'error');
|
|
2917
|
+
return;
|
|
2918
|
+
}
|
|
2919
|
+
params.task_ids = taskIdsStr.split(',').map(id => id.trim()).filter(id => id);
|
|
2920
|
+
if (params.task_ids.length === 0) {
|
|
2921
|
+
showBenchmarkStatus('Please enter valid task IDs', 'error');
|
|
2922
|
+
return;
|
|
2923
|
+
}
|
|
2924
|
+
}
|
|
2925
|
+
|
|
2926
|
+
// Disable button and show loading state
|
|
2927
|
+
btn.disabled = true;
|
|
2928
|
+
btn.classList.add('loading');
|
|
2929
|
+
hideBenchmarkStatus();
|
|
2930
|
+
|
|
2931
|
+
try {
|
|
2932
|
+
const response = await fetch('/api/benchmark/start', {
|
|
2933
|
+
method: 'POST',
|
|
2934
|
+
headers: {'Content-Type': 'application/json'},
|
|
2935
|
+
body: JSON.stringify(params)
|
|
2936
|
+
});
|
|
2937
|
+
|
|
2938
|
+
const result = await response.json();
|
|
2939
|
+
|
|
2940
|
+
if (response.ok && result.status === 'started') {
|
|
2941
|
+
showBenchmarkStatus('Benchmark started! Model: ' + params.model + ', Tasks: ' + params.num_tasks + '. Check progress in Background Tasks section below.', 'success');
|
|
2942
|
+
// Refresh background tasks to show new benchmark
|
|
2943
|
+
if (typeof refreshBackgroundTasks === 'function') {
|
|
2944
|
+
setTimeout(refreshBackgroundTasks, 1000);
|
|
2945
|
+
}
|
|
2946
|
+
} else {
|
|
2947
|
+
throw new Error(result.error || result.message || 'Failed to start benchmark');
|
|
2948
|
+
}
|
|
2949
|
+
} catch (e) {
|
|
2950
|
+
console.error('Failed to start benchmark:', e);
|
|
2951
|
+
showBenchmarkStatus('Error: ' + e.message, 'error');
|
|
2952
|
+
btn.disabled = false;
|
|
2953
|
+
btn.classList.remove('loading');
|
|
2954
|
+
}
|
|
2955
|
+
}
|
|
2956
|
+
|
|
2957
|
+
// Initialize on load
|
|
2958
|
+
document.addEventListener('DOMContentLoaded', function() {
|
|
2959
|
+
updateTaskSelectionState();
|
|
2960
|
+
});
|
|
2961
|
+
'''
|
|
2962
|
+
if include_script_tags:
|
|
2963
|
+
return f'<script>{js_code}</script>'
|
|
2964
|
+
return js_code
|
|
2965
|
+
|
|
2966
|
+
|
|
13
2967
|
def generate_benchmark_viewer(
|
|
14
2968
|
benchmark_dir: Path | str,
|
|
15
2969
|
output_path: Path | str | None = None,
|
|
@@ -217,6 +3171,16 @@ def generate_empty_benchmark_viewer(output_path: Path | str) -> Path:
|
|
|
217
3171
|
|
|
218
3172
|
shared_header_css = _get_shared_header_css()
|
|
219
3173
|
shared_header_html = _generate_shared_header_html("benchmarks")
|
|
3174
|
+
# NOTE: Azure ML Jobs panel moved to Training tab (not used for WAA benchmarks)
|
|
3175
|
+
run_benchmark_css = _get_run_benchmark_panel_css()
|
|
3176
|
+
run_benchmark_html = _get_run_benchmark_panel_html()
|
|
3177
|
+
run_benchmark_js = _get_run_benchmark_panel_js()
|
|
3178
|
+
tasks_css = _get_background_tasks_panel_css()
|
|
3179
|
+
tasks_html = _get_background_tasks_panel_html()
|
|
3180
|
+
live_eval_css = _get_live_evaluation_panel_css()
|
|
3181
|
+
live_eval_html = _get_live_evaluation_panel_html()
|
|
3182
|
+
vm_discovery_css = _get_vm_discovery_panel_css()
|
|
3183
|
+
vm_discovery_html = _get_vm_discovery_panel_html()
|
|
220
3184
|
|
|
221
3185
|
html = f'''<!DOCTYPE html>
|
|
222
3186
|
<html lang="en">
|
|
@@ -244,6 +3208,15 @@ def generate_empty_benchmark_viewer(output_path: Path | str) -> Path:
|
|
|
244
3208
|
min-height: 100vh;
|
|
245
3209
|
}}
|
|
246
3210
|
{shared_header_css}
|
|
3211
|
+
{run_benchmark_css}
|
|
3212
|
+
{tasks_css}
|
|
3213
|
+
{live_eval_css}
|
|
3214
|
+
{vm_discovery_css}
|
|
3215
|
+
.container {{
|
|
3216
|
+
max-width: 900px;
|
|
3217
|
+
margin: 0 auto;
|
|
3218
|
+
padding: 24px;
|
|
3219
|
+
}}
|
|
247
3220
|
.empty-state {{
|
|
248
3221
|
display: flex;
|
|
249
3222
|
flex-direction: column;
|
|
@@ -311,6 +3284,15 @@ def generate_empty_benchmark_viewer(output_path: Path | str) -> Path:
|
|
|
311
3284
|
<body>
|
|
312
3285
|
{shared_header_html}
|
|
313
3286
|
|
|
3287
|
+
<div class="container">
|
|
3288
|
+
{run_benchmark_html}
|
|
3289
|
+
{live_eval_html}
|
|
3290
|
+
{tasks_html}
|
|
3291
|
+
{vm_discovery_html}
|
|
3292
|
+
</div>
|
|
3293
|
+
|
|
3294
|
+
{run_benchmark_js}
|
|
3295
|
+
|
|
314
3296
|
<div class="empty-state">
|
|
315
3297
|
<div class="empty-icon">🚧</div>
|
|
316
3298
|
<h1 class="empty-title">Windows Agent Arena Integration</h1>
|
|
@@ -669,12 +3651,89 @@ def _generate_benchmark_viewer_html(
|
|
|
669
3651
|
margin-bottom: 16px;
|
|
670
3652
|
opacity: 0.5;
|
|
671
3653
|
}}
|
|
3654
|
+
|
|
3655
|
+
.mock-banner {{
|
|
3656
|
+
background: linear-gradient(135deg, rgba(255, 152, 0, 0.2) 0%, rgba(255, 87, 34, 0.2) 100%);
|
|
3657
|
+
border: 2px solid #ff9800;
|
|
3658
|
+
border-radius: 12px;
|
|
3659
|
+
padding: 20px 24px;
|
|
3660
|
+
margin-bottom: 24px;
|
|
3661
|
+
display: flex;
|
|
3662
|
+
align-items: center;
|
|
3663
|
+
gap: 16px;
|
|
3664
|
+
}}
|
|
3665
|
+
|
|
3666
|
+
.mock-banner-icon {{
|
|
3667
|
+
font-size: 2rem;
|
|
3668
|
+
flex-shrink: 0;
|
|
3669
|
+
}}
|
|
3670
|
+
|
|
3671
|
+
.mock-banner-content {{
|
|
3672
|
+
flex: 1;
|
|
3673
|
+
}}
|
|
3674
|
+
|
|
3675
|
+
.mock-banner-title {{
|
|
3676
|
+
font-size: 1.1rem;
|
|
3677
|
+
font-weight: 700;
|
|
3678
|
+
color: #ff9800;
|
|
3679
|
+
margin-bottom: 6px;
|
|
3680
|
+
}}
|
|
3681
|
+
|
|
3682
|
+
.mock-banner-text {{
|
|
3683
|
+
font-size: 0.9rem;
|
|
3684
|
+
color: var(--text-secondary);
|
|
3685
|
+
line-height: 1.5;
|
|
3686
|
+
}}
|
|
3687
|
+
|
|
3688
|
+
.run-badge {{
|
|
3689
|
+
display: inline-flex;
|
|
3690
|
+
align-items: center;
|
|
3691
|
+
gap: 8px;
|
|
3692
|
+
padding: 8px 16px;
|
|
3693
|
+
border-radius: 8px;
|
|
3694
|
+
font-size: 0.85rem;
|
|
3695
|
+
font-weight: 600;
|
|
3696
|
+
margin-bottom: 24px;
|
|
3697
|
+
}}
|
|
3698
|
+
|
|
3699
|
+
.run-badge.mock {{
|
|
3700
|
+
background: linear-gradient(135deg, rgba(255, 152, 0, 0.2) 0%, rgba(255, 87, 34, 0.2) 100%);
|
|
3701
|
+
border: 1px solid #ff9800;
|
|
3702
|
+
color: #ffb74d;
|
|
3703
|
+
}}
|
|
3704
|
+
|
|
3705
|
+
.run-badge.real {{
|
|
3706
|
+
background: linear-gradient(135deg, rgba(0, 212, 170, 0.2) 0%, rgba(0, 150, 136, 0.2) 100%);
|
|
3707
|
+
border: 1px solid var(--success);
|
|
3708
|
+
color: var(--success);
|
|
3709
|
+
}}
|
|
3710
|
+
|
|
3711
|
+
.run-badge-icon {{
|
|
3712
|
+
font-size: 1rem;
|
|
3713
|
+
}}
|
|
672
3714
|
</style>
|
|
673
3715
|
</head>
|
|
674
3716
|
<body>
|
|
675
3717
|
{shared_header_html}
|
|
676
3718
|
|
|
677
3719
|
<div class="container">
|
|
3720
|
+
<div id="mock-banner" class="mock-banner" style="display: none;">
|
|
3721
|
+
<div class="mock-banner-icon">WARNING</div>
|
|
3722
|
+
<div class="mock-banner-content">
|
|
3723
|
+
<div class="mock-banner-title">Mock Data - Simulated Results Only</div>
|
|
3724
|
+
<div class="mock-banner-text">
|
|
3725
|
+
This benchmark run uses simulated mock data for pipeline testing and development.
|
|
3726
|
+
These results do NOT represent actual Windows Agent Arena evaluation performance.
|
|
3727
|
+
To run real WAA evaluation, use: <code>uv run python -m openadapt_ml.benchmarks.cli run-local</code> or <code>run-azure</code>
|
|
3728
|
+
</div>
|
|
3729
|
+
</div>
|
|
3730
|
+
</div>
|
|
3731
|
+
|
|
3732
|
+
<div id="run-badge" class="run-badge" style="display: none;">
|
|
3733
|
+
<span class="run-badge-icon"></span>
|
|
3734
|
+
<span class="run-badge-text"></span>
|
|
3735
|
+
</div>
|
|
3736
|
+
|
|
678
3737
|
<div class="summary-cards">
|
|
679
3738
|
<div class="summary-card">
|
|
680
3739
|
<div class="label">Total Tasks</div>
|
|
@@ -730,8 +3789,40 @@ def _generate_benchmark_viewer_html(
|
|
|
730
3789
|
domain: 'all'
|
|
731
3790
|
}};
|
|
732
3791
|
|
|
3792
|
+
// Detect mock vs real run and show appropriate badges
|
|
3793
|
+
function detectAndShowRunType() {{
|
|
3794
|
+
const isMock = metadata.benchmark_name && metadata.benchmark_name.includes('mock');
|
|
3795
|
+
const badge = document.getElementById('run-badge');
|
|
3796
|
+
const banner = document.getElementById('mock-banner');
|
|
3797
|
+
const badgeIcon = badge.querySelector('.run-badge-icon');
|
|
3798
|
+
const badgeText = badge.querySelector('.run-badge-text');
|
|
3799
|
+
|
|
3800
|
+
if (isMock) {{
|
|
3801
|
+
// Show mock warning badge
|
|
3802
|
+
badge.classList.add('mock');
|
|
3803
|
+
badge.classList.remove('real');
|
|
3804
|
+
badgeIcon.textContent = '⚠️';
|
|
3805
|
+
badgeText.textContent = 'MOCK DATA - Simulated results for pipeline testing';
|
|
3806
|
+
badge.style.display = 'inline-flex';
|
|
3807
|
+
|
|
3808
|
+
// Show mock banner
|
|
3809
|
+
banner.style.display = 'flex';
|
|
3810
|
+
}} else {{
|
|
3811
|
+
// Show real evaluation badge
|
|
3812
|
+
badge.classList.add('real');
|
|
3813
|
+
badge.classList.remove('mock');
|
|
3814
|
+
badgeIcon.textContent = '✓';
|
|
3815
|
+
badgeText.textContent = 'REAL - Actual Windows Agent Arena evaluation';
|
|
3816
|
+
badge.style.display = 'inline-flex';
|
|
3817
|
+
|
|
3818
|
+
// Hide mock banner
|
|
3819
|
+
banner.style.display = 'none';
|
|
3820
|
+
}}
|
|
3821
|
+
}}
|
|
3822
|
+
|
|
733
3823
|
// Initialize
|
|
734
3824
|
function init() {{
|
|
3825
|
+
detectAndShowRunType();
|
|
735
3826
|
updateSummaryCards();
|
|
736
3827
|
populateDomainFilter();
|
|
737
3828
|
renderTaskList();
|
|
@@ -916,6 +4007,18 @@ def _generate_multi_run_benchmark_viewer_html(
|
|
|
916
4007
|
Returns:
|
|
917
4008
|
Complete HTML string
|
|
918
4009
|
"""
|
|
4010
|
+
# NOTE: Azure ML Jobs panel moved to Training tab (not used for WAA benchmarks)
|
|
4011
|
+
run_benchmark_css = _get_run_benchmark_panel_css()
|
|
4012
|
+
run_benchmark_html = _get_run_benchmark_panel_html()
|
|
4013
|
+
# Use include_script_tags=False since we insert into existing script block
|
|
4014
|
+
run_benchmark_js = _get_run_benchmark_panel_js(include_script_tags=False)
|
|
4015
|
+
tasks_css = _get_background_tasks_panel_css()
|
|
4016
|
+
tasks_html = _get_background_tasks_panel_html()
|
|
4017
|
+
live_eval_css = _get_live_evaluation_panel_css()
|
|
4018
|
+
live_eval_html = _get_live_evaluation_panel_html()
|
|
4019
|
+
vm_discovery_css = _get_vm_discovery_panel_css()
|
|
4020
|
+
vm_discovery_html = _get_vm_discovery_panel_html()
|
|
4021
|
+
|
|
919
4022
|
# Prepare runs data as JSON
|
|
920
4023
|
runs_json = json.dumps(runs)
|
|
921
4024
|
|
|
@@ -974,6 +4077,10 @@ def _generate_multi_run_benchmark_viewer_html(
|
|
|
974
4077
|
}}
|
|
975
4078
|
|
|
976
4079
|
{shared_header_css}
|
|
4080
|
+
{run_benchmark_css}
|
|
4081
|
+
{tasks_css}
|
|
4082
|
+
{live_eval_css}
|
|
4083
|
+
{vm_discovery_css}
|
|
977
4084
|
|
|
978
4085
|
.run-selector-section {{
|
|
979
4086
|
background: var(--bg-secondary);
|
|
@@ -1266,12 +4373,89 @@ def _generate_multi_run_benchmark_viewer_html(
|
|
|
1266
4373
|
margin-bottom: 16px;
|
|
1267
4374
|
opacity: 0.5;
|
|
1268
4375
|
}}
|
|
4376
|
+
|
|
4377
|
+
.mock-banner {{
|
|
4378
|
+
background: linear-gradient(135deg, rgba(255, 152, 0, 0.2) 0%, rgba(255, 87, 34, 0.2) 100%);
|
|
4379
|
+
border: 2px solid #ff9800;
|
|
4380
|
+
border-radius: 12px;
|
|
4381
|
+
padding: 20px 24px;
|
|
4382
|
+
margin-bottom: 24px;
|
|
4383
|
+
display: flex;
|
|
4384
|
+
align-items: center;
|
|
4385
|
+
gap: 16px;
|
|
4386
|
+
}}
|
|
4387
|
+
|
|
4388
|
+
.mock-banner-icon {{
|
|
4389
|
+
font-size: 2rem;
|
|
4390
|
+
flex-shrink: 0;
|
|
4391
|
+
}}
|
|
4392
|
+
|
|
4393
|
+
.mock-banner-content {{
|
|
4394
|
+
flex: 1;
|
|
4395
|
+
}}
|
|
4396
|
+
|
|
4397
|
+
.mock-banner-title {{
|
|
4398
|
+
font-size: 1.1rem;
|
|
4399
|
+
font-weight: 700;
|
|
4400
|
+
color: #ff9800;
|
|
4401
|
+
margin-bottom: 6px;
|
|
4402
|
+
}}
|
|
4403
|
+
|
|
4404
|
+
.mock-banner-text {{
|
|
4405
|
+
font-size: 0.9rem;
|
|
4406
|
+
color: var(--text-secondary);
|
|
4407
|
+
line-height: 1.5;
|
|
4408
|
+
}}
|
|
4409
|
+
|
|
4410
|
+
.run-badge {{
|
|
4411
|
+
display: inline-flex;
|
|
4412
|
+
align-items: center;
|
|
4413
|
+
gap: 8px;
|
|
4414
|
+
padding: 8px 16px;
|
|
4415
|
+
border-radius: 8px;
|
|
4416
|
+
font-size: 0.85rem;
|
|
4417
|
+
font-weight: 600;
|
|
4418
|
+
margin-bottom: 24px;
|
|
4419
|
+
}}
|
|
4420
|
+
|
|
4421
|
+
.run-badge.mock {{
|
|
4422
|
+
background: linear-gradient(135deg, rgba(255, 152, 0, 0.2) 0%, rgba(255, 87, 34, 0.2) 100%);
|
|
4423
|
+
border: 1px solid #ff9800;
|
|
4424
|
+
color: #ffb74d;
|
|
4425
|
+
}}
|
|
4426
|
+
|
|
4427
|
+
.run-badge.real {{
|
|
4428
|
+
background: linear-gradient(135deg, rgba(0, 212, 170, 0.2) 0%, rgba(0, 150, 136, 0.2) 100%);
|
|
4429
|
+
border: 1px solid var(--success);
|
|
4430
|
+
color: var(--success);
|
|
4431
|
+
}}
|
|
4432
|
+
|
|
4433
|
+
.run-badge-icon {{
|
|
4434
|
+
font-size: 1rem;
|
|
4435
|
+
}}
|
|
1269
4436
|
</style>
|
|
1270
4437
|
</head>
|
|
1271
4438
|
<body>
|
|
1272
4439
|
{shared_header_html}
|
|
1273
4440
|
|
|
1274
4441
|
<div class="container">
|
|
4442
|
+
{run_benchmark_html}
|
|
4443
|
+
{live_eval_html}
|
|
4444
|
+
{tasks_html}
|
|
4445
|
+
{vm_discovery_html}
|
|
4446
|
+
|
|
4447
|
+
<div id="mock-banner" class="mock-banner" style="display: none;">
|
|
4448
|
+
<div class="mock-banner-icon">WARNING</div>
|
|
4449
|
+
<div class="mock-banner-content">
|
|
4450
|
+
<div class="mock-banner-title">Mock Data - Simulated Results Only</div>
|
|
4451
|
+
<div class="mock-banner-text">
|
|
4452
|
+
This benchmark run uses simulated mock data for pipeline testing and development.
|
|
4453
|
+
These results do NOT represent actual Windows Agent Arena evaluation performance.
|
|
4454
|
+
To run real WAA evaluation, use: <code>uv run python -m openadapt_ml.benchmarks.cli run-local</code> or <code>run-azure</code>
|
|
4455
|
+
</div>
|
|
4456
|
+
</div>
|
|
4457
|
+
</div>
|
|
4458
|
+
|
|
1275
4459
|
<div class="run-selector-section">
|
|
1276
4460
|
<span class="run-selector-label">Benchmark Run:</span>
|
|
1277
4461
|
<select id="run-selector">
|
|
@@ -1279,6 +4463,11 @@ def _generate_multi_run_benchmark_viewer_html(
|
|
|
1279
4463
|
</select>
|
|
1280
4464
|
</div>
|
|
1281
4465
|
|
|
4466
|
+
<div id="run-badge" class="run-badge" style="display: none;">
|
|
4467
|
+
<span class="run-badge-icon"></span>
|
|
4468
|
+
<span class="run-badge-text"></span>
|
|
4469
|
+
</div>
|
|
4470
|
+
|
|
1282
4471
|
<div class="summary-cards">
|
|
1283
4472
|
<div class="summary-card">
|
|
1284
4473
|
<div class="label">Total Tasks</div>
|
|
@@ -1346,6 +4535,38 @@ def _generate_multi_run_benchmark_viewer_html(
|
|
|
1346
4535
|
return getCurrentRun().summary;
|
|
1347
4536
|
}}
|
|
1348
4537
|
|
|
4538
|
+
// Detect mock vs real run and show appropriate badges
|
|
4539
|
+
function detectAndShowRunType() {{
|
|
4540
|
+
const currentRun = getCurrentRun();
|
|
4541
|
+
const isMock = currentRun.benchmark_name && currentRun.benchmark_name.includes('mock');
|
|
4542
|
+
const badge = document.getElementById('run-badge');
|
|
4543
|
+
const banner = document.getElementById('mock-banner');
|
|
4544
|
+
const badgeIcon = badge.querySelector('.run-badge-icon');
|
|
4545
|
+
const badgeText = badge.querySelector('.run-badge-text');
|
|
4546
|
+
|
|
4547
|
+
if (isMock) {{
|
|
4548
|
+
// Show mock warning badge
|
|
4549
|
+
badge.classList.add('mock');
|
|
4550
|
+
badge.classList.remove('real');
|
|
4551
|
+
badgeIcon.textContent = '⚠️';
|
|
4552
|
+
badgeText.textContent = 'MOCK DATA - Simulated results for pipeline testing';
|
|
4553
|
+
badge.style.display = 'inline-flex';
|
|
4554
|
+
|
|
4555
|
+
// Show mock banner
|
|
4556
|
+
banner.style.display = 'flex';
|
|
4557
|
+
}} else {{
|
|
4558
|
+
// Show real evaluation badge
|
|
4559
|
+
badge.classList.add('real');
|
|
4560
|
+
badge.classList.remove('mock');
|
|
4561
|
+
badgeIcon.textContent = '✓';
|
|
4562
|
+
badgeText.textContent = 'REAL - Actual Windows Agent Arena evaluation';
|
|
4563
|
+
badge.style.display = 'inline-flex';
|
|
4564
|
+
|
|
4565
|
+
// Hide mock banner
|
|
4566
|
+
banner.style.display = 'none';
|
|
4567
|
+
}}
|
|
4568
|
+
}}
|
|
4569
|
+
|
|
1349
4570
|
// Initialize
|
|
1350
4571
|
function init() {{
|
|
1351
4572
|
populateDomainFilter();
|
|
@@ -1369,6 +4590,7 @@ def _generate_multi_run_benchmark_viewer_html(
|
|
|
1369
4590
|
}}
|
|
1370
4591
|
|
|
1371
4592
|
function updateDisplay() {{
|
|
4593
|
+
detectAndShowRunType();
|
|
1372
4594
|
updateSummaryCards();
|
|
1373
4595
|
renderTaskList();
|
|
1374
4596
|
}}
|
|
@@ -1529,6 +4751,9 @@ def _generate_multi_run_benchmark_viewer_html(
|
|
|
1529
4751
|
return parts.length > 0 ? parts.join(', ') : 'No details';
|
|
1530
4752
|
}}
|
|
1531
4753
|
|
|
4754
|
+
// Run Benchmark panel functionality
|
|
4755
|
+
{run_benchmark_js}
|
|
4756
|
+
|
|
1532
4757
|
// Initialize on page load
|
|
1533
4758
|
init();
|
|
1534
4759
|
</script>
|