@runloop/rl-cli 1.7.1 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -5
- package/dist/cli.js +0 -0
- package/dist/commands/blueprint/delete.js +21 -0
- package/dist/commands/blueprint/list.js +226 -174
- package/dist/commands/blueprint/prune.js +13 -28
- package/dist/commands/devbox/create.js +41 -0
- package/dist/commands/devbox/list.js +125 -109
- package/dist/commands/devbox/tunnel.js +4 -19
- package/dist/commands/gateway-config/create.js +44 -0
- package/dist/commands/gateway-config/delete.js +21 -0
- package/dist/commands/gateway-config/get.js +15 -0
- package/dist/commands/gateway-config/list.js +493 -0
- package/dist/commands/gateway-config/update.js +60 -0
- package/dist/commands/menu.js +2 -1
- package/dist/commands/secret/list.js +379 -4
- package/dist/commands/snapshot/list.js +11 -2
- package/dist/commands/snapshot/prune.js +265 -0
- package/dist/components/BenchmarkMenu.js +108 -0
- package/dist/components/DetailedInfoView.js +20 -0
- package/dist/components/DevboxActionsMenu.js +9 -61
- package/dist/components/DevboxCreatePage.js +531 -14
- package/dist/components/DevboxDetailPage.js +27 -22
- package/dist/components/GatewayConfigCreatePage.js +265 -0
- package/dist/components/LogsViewer.js +6 -40
- package/dist/components/MainMenu.js +63 -22
- package/dist/components/ResourceDetailPage.js +143 -160
- package/dist/components/ResourceListView.js +3 -33
- package/dist/components/ResourcePicker.js +220 -0
- package/dist/components/SecretCreatePage.js +183 -0
- package/dist/components/SettingsMenu.js +95 -0
- package/dist/components/StateHistory.js +1 -20
- package/dist/components/StatusBadge.js +80 -0
- package/dist/components/StreamingLogsViewer.js +8 -42
- package/dist/components/form/FormTextInput.js +4 -2
- package/dist/components/resourceDetailTypes.js +18 -0
- package/dist/hooks/useInputHandler.js +103 -0
- package/dist/router/Router.js +99 -2
- package/dist/screens/BenchmarkDetailScreen.js +163 -0
- package/dist/screens/BenchmarkJobCreateScreen.js +524 -0
- package/dist/screens/BenchmarkJobDetailScreen.js +614 -0
- package/dist/screens/BenchmarkJobListScreen.js +479 -0
- package/dist/screens/BenchmarkListScreen.js +266 -0
- package/dist/screens/BenchmarkMenuScreen.js +29 -0
- package/dist/screens/BenchmarkRunDetailScreen.js +425 -0
- package/dist/screens/BenchmarkRunListScreen.js +275 -0
- package/dist/screens/BlueprintDetailScreen.js +5 -1
- package/dist/screens/DevboxCreateScreen.js +2 -2
- package/dist/screens/GatewayConfigDetailScreen.js +236 -0
- package/dist/screens/GatewayConfigListScreen.js +7 -0
- package/dist/screens/MenuScreen.js +5 -2
- package/dist/screens/ScenarioRunDetailScreen.js +226 -0
- package/dist/screens/ScenarioRunListScreen.js +245 -0
- package/dist/screens/SecretCreateScreen.js +7 -0
- package/dist/screens/SecretDetailScreen.js +198 -0
- package/dist/screens/SecretListScreen.js +7 -0
- package/dist/screens/SettingsMenuScreen.js +26 -0
- package/dist/screens/SnapshotDetailScreen.js +6 -0
- package/dist/services/agentService.js +42 -0
- package/dist/services/benchmarkJobService.js +122 -0
- package/dist/services/benchmarkService.js +120 -0
- package/dist/services/gatewayConfigService.js +114 -0
- package/dist/services/scenarioService.js +34 -0
- package/dist/store/benchmarkJobStore.js +66 -0
- package/dist/store/benchmarkStore.js +183 -0
- package/dist/store/betaFeatureStore.js +47 -0
- package/dist/store/gatewayConfigStore.js +83 -0
- package/dist/store/index.js +1 -0
- package/dist/utils/browser.js +22 -0
- package/dist/utils/clipboard.js +41 -0
- package/dist/utils/commands.js +80 -0
- package/dist/utils/config.js +8 -0
- package/dist/utils/time.js +121 -0
- package/package.json +42 -43
|
@@ -0,0 +1,614 @@
|
|
|
1
|
+
import { jsx as _jsx, Fragment as _Fragment, jsxs as _jsxs } from "react/jsx-runtime";
|
|
2
|
+
/**
|
|
3
|
+
* BenchmarkJobDetailScreen - Detail page for benchmark jobs
|
|
4
|
+
* Uses the generic ResourceDetailPage component
|
|
5
|
+
*/
|
|
6
|
+
import React from "react";
|
|
7
|
+
import { Text } from "ink";
|
|
8
|
+
import figures from "figures";
|
|
9
|
+
import { useNavigation } from "../store/navigationStore.js";
|
|
10
|
+
import { useBenchmarkJobStore, } from "../store/benchmarkJobStore.js";
|
|
11
|
+
import { ResourceDetailPage, formatTimestamp, } from "../components/ResourceDetailPage.js";
|
|
12
|
+
import { getBenchmarkJob } from "../services/benchmarkJobService.js";
|
|
13
|
+
import { getBenchmarkRun } from "../services/benchmarkService.js";
|
|
14
|
+
import { SpinnerComponent } from "../components/Spinner.js";
|
|
15
|
+
import { ErrorMessage } from "../components/ErrorMessage.js";
|
|
16
|
+
import { Breadcrumb } from "../components/Breadcrumb.js";
|
|
17
|
+
import { colors } from "../utils/theme.js";
|
|
18
|
+
export function BenchmarkJobDetailScreen({ benchmarkJobId, }) {
|
|
19
|
+
const { goBack, navigate } = useNavigation();
|
|
20
|
+
const benchmarkJobs = useBenchmarkJobStore((state) => state.benchmarkJobs);
|
|
21
|
+
const [loading, setLoading] = React.useState(false);
|
|
22
|
+
const [error, setError] = React.useState(null);
|
|
23
|
+
const [fetchedJob, setFetchedJob] = React.useState(null);
|
|
24
|
+
const [runNames, setRunNames] = React.useState(new Map());
|
|
25
|
+
// Find job in store first
|
|
26
|
+
const jobFromStore = benchmarkJobs.find((j) => j.id === benchmarkJobId);
|
|
27
|
+
// Polling function
|
|
28
|
+
const pollJob = React.useCallback(async () => {
|
|
29
|
+
if (!benchmarkJobId)
|
|
30
|
+
return null;
|
|
31
|
+
return getBenchmarkJob(benchmarkJobId);
|
|
32
|
+
}, [benchmarkJobId]);
|
|
33
|
+
// Fetch job from API if not in store
|
|
34
|
+
React.useEffect(() => {
|
|
35
|
+
if (benchmarkJobId && !loading && !fetchedJob) {
|
|
36
|
+
setLoading(true);
|
|
37
|
+
setError(null);
|
|
38
|
+
getBenchmarkJob(benchmarkJobId)
|
|
39
|
+
.then((job) => {
|
|
40
|
+
setFetchedJob(job);
|
|
41
|
+
setLoading(false);
|
|
42
|
+
})
|
|
43
|
+
.catch((err) => {
|
|
44
|
+
setError(err);
|
|
45
|
+
setLoading(false);
|
|
46
|
+
});
|
|
47
|
+
}
|
|
48
|
+
}, [benchmarkJobId, loading, fetchedJob]);
|
|
49
|
+
// Use fetched job for full details, fall back to store for basic display
|
|
50
|
+
const job = fetchedJob || jobFromStore;
|
|
51
|
+
// Fetch run names when job is loaded
|
|
52
|
+
React.useEffect(() => {
|
|
53
|
+
if (!job)
|
|
54
|
+
return;
|
|
55
|
+
const runIds = [];
|
|
56
|
+
// Collect run IDs from outcomes
|
|
57
|
+
if (job.benchmark_outcomes) {
|
|
58
|
+
job.benchmark_outcomes.forEach((outcome) => {
|
|
59
|
+
runIds.push(outcome.benchmark_run_id);
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
// Collect run IDs from in-progress runs
|
|
63
|
+
if (job.in_progress_runs) {
|
|
64
|
+
job.in_progress_runs.forEach((run) => {
|
|
65
|
+
if (!runIds.includes(run.benchmark_run_id)) {
|
|
66
|
+
runIds.push(run.benchmark_run_id);
|
|
67
|
+
}
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
// Fetch run details for each run ID
|
|
71
|
+
Promise.all(runIds.map(async (runId) => {
|
|
72
|
+
try {
|
|
73
|
+
const run = await getBenchmarkRun(runId);
|
|
74
|
+
return { id: runId, name: run.name || runId };
|
|
75
|
+
}
|
|
76
|
+
catch {
|
|
77
|
+
return { id: runId, name: runId };
|
|
78
|
+
}
|
|
79
|
+
})).then((results) => {
|
|
80
|
+
const namesMap = new Map();
|
|
81
|
+
results.forEach((result) => {
|
|
82
|
+
namesMap.set(result.id, result.name);
|
|
83
|
+
});
|
|
84
|
+
setRunNames(namesMap);
|
|
85
|
+
});
|
|
86
|
+
}, [job]);
|
|
87
|
+
// Show loading state
|
|
88
|
+
if (!job && benchmarkJobId && !error) {
|
|
89
|
+
return (_jsxs(_Fragment, { children: [_jsx(Breadcrumb, { items: [
|
|
90
|
+
{ label: "Home" },
|
|
91
|
+
{ label: "Benchmark Jobs" },
|
|
92
|
+
{ label: "Loading...", active: true },
|
|
93
|
+
] }), _jsx(SpinnerComponent, { message: "Loading benchmark job details..." })] }));
|
|
94
|
+
}
|
|
95
|
+
// Show error state
|
|
96
|
+
if (error && !job) {
|
|
97
|
+
return (_jsxs(_Fragment, { children: [_jsx(Breadcrumb, { items: [
|
|
98
|
+
{ label: "Home" },
|
|
99
|
+
{ label: "Benchmark Jobs" },
|
|
100
|
+
{ label: "Error", active: true },
|
|
101
|
+
] }), _jsx(ErrorMessage, { message: "Failed to load benchmark job details", error: error })] }));
|
|
102
|
+
}
|
|
103
|
+
// Show not found error
|
|
104
|
+
if (!job) {
|
|
105
|
+
return (_jsxs(_Fragment, { children: [_jsx(Breadcrumb, { items: [
|
|
106
|
+
{ label: "Home" },
|
|
107
|
+
{ label: "Benchmark Jobs" },
|
|
108
|
+
{ label: "Not Found", active: true },
|
|
109
|
+
] }), _jsx(ErrorMessage, { message: `Benchmark job ${benchmarkJobId || "unknown"} not found`, error: new Error("Benchmark job not found") })] }));
|
|
110
|
+
}
|
|
111
|
+
// Helper to format duration
|
|
112
|
+
const formatDuration = (ms) => {
|
|
113
|
+
if (ms < 1000)
|
|
114
|
+
return `${ms}ms`;
|
|
115
|
+
const seconds = Math.floor(ms / 1000);
|
|
116
|
+
if (seconds < 60)
|
|
117
|
+
return `${seconds}s`;
|
|
118
|
+
const minutes = Math.floor(seconds / 60);
|
|
119
|
+
const remainingSeconds = seconds % 60;
|
|
120
|
+
if (minutes < 60)
|
|
121
|
+
return `${minutes}m ${remainingSeconds}s`;
|
|
122
|
+
const hours = Math.floor(minutes / 60);
|
|
123
|
+
const remainingMinutes = minutes % 60;
|
|
124
|
+
return `${hours}h ${remainingMinutes}m`;
|
|
125
|
+
};
|
|
126
|
+
// Build detail sections
|
|
127
|
+
const detailSections = [];
|
|
128
|
+
// Basic details section
|
|
129
|
+
const basicFields = [];
|
|
130
|
+
if (job.create_time_ms) {
|
|
131
|
+
basicFields.push({
|
|
132
|
+
label: "Created",
|
|
133
|
+
value: formatTimestamp(job.create_time_ms),
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
// Calculate overall score if available
|
|
137
|
+
if (job.benchmark_outcomes && job.benchmark_outcomes.length > 0) {
|
|
138
|
+
const scores = job.benchmark_outcomes
|
|
139
|
+
.map((o) => o.average_score)
|
|
140
|
+
.filter((s) => s !== null && s !== undefined);
|
|
141
|
+
if (scores.length > 0) {
|
|
142
|
+
const avgScore = scores.reduce((a, b) => a + b, 0) / scores.length;
|
|
143
|
+
basicFields.push({
|
|
144
|
+
label: "Overall Score",
|
|
145
|
+
value: (_jsx(Text, { color: colors.success, bold: true, children: avgScore.toFixed(2) })),
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
// Summary stats
|
|
150
|
+
if (job.benchmark_outcomes && job.benchmark_outcomes.length > 0) {
|
|
151
|
+
const totalCompleted = job.benchmark_outcomes.reduce((acc, o) => acc + o.n_completed, 0);
|
|
152
|
+
const totalFailed = job.benchmark_outcomes.reduce((acc, o) => acc + o.n_failed, 0);
|
|
153
|
+
const totalTimeout = job.benchmark_outcomes.reduce((acc, o) => acc + o.n_timeout, 0);
|
|
154
|
+
const total = totalCompleted + totalFailed + totalTimeout;
|
|
155
|
+
basicFields.push({
|
|
156
|
+
label: "Scenarios",
|
|
157
|
+
value: (_jsxs(Text, { children: [_jsxs(Text, { color: colors.success, children: [totalCompleted, " completed"] }), totalFailed > 0 && (_jsxs(Text, { color: colors.error, children: [" / ", totalFailed, " failed"] })), totalTimeout > 0 && (_jsxs(Text, { color: colors.warning, children: [" / ", totalTimeout, " timeout"] })), _jsxs(Text, { dimColor: true, children: [" (", total, " total)"] })] })),
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
if (job.failure_reason) {
|
|
161
|
+
basicFields.push({
|
|
162
|
+
label: "Failure Reason",
|
|
163
|
+
value: _jsx(Text, { color: colors.error, children: job.failure_reason }),
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
if (basicFields.length > 0) {
|
|
167
|
+
detailSections.push({
|
|
168
|
+
title: "Summary",
|
|
169
|
+
icon: figures.squareSmallFilled,
|
|
170
|
+
color: colors.warning,
|
|
171
|
+
fields: basicFields,
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
const agentRuns = [];
|
|
175
|
+
// First, add completed runs from benchmark_outcomes
|
|
176
|
+
if (job.benchmark_outcomes) {
|
|
177
|
+
job.benchmark_outcomes.forEach((outcome) => {
|
|
178
|
+
const total = outcome.n_completed + outcome.n_failed + outcome.n_timeout;
|
|
179
|
+
const status = outcome.n_failed > 0 || outcome.n_timeout > 0
|
|
180
|
+
? outcome.n_completed === 0
|
|
181
|
+
? "failed"
|
|
182
|
+
: "completed"
|
|
183
|
+
: "completed";
|
|
184
|
+
agentRuns.push({
|
|
185
|
+
agentName: outcome.agent_name,
|
|
186
|
+
modelName: outcome.model_name || undefined,
|
|
187
|
+
status,
|
|
188
|
+
benchmarkRunId: outcome.benchmark_run_id,
|
|
189
|
+
score: outcome.average_score ?? undefined,
|
|
190
|
+
nCompleted: outcome.n_completed,
|
|
191
|
+
nFailed: outcome.n_failed,
|
|
192
|
+
nTimeout: outcome.n_timeout,
|
|
193
|
+
duration: outcome.duration_ms ?? undefined,
|
|
194
|
+
});
|
|
195
|
+
});
|
|
196
|
+
}
|
|
197
|
+
// Add in-progress runs
|
|
198
|
+
if (job.in_progress_runs) {
|
|
199
|
+
job.in_progress_runs.forEach((run) => {
|
|
200
|
+
// Get agent name from agent_config if available
|
|
201
|
+
let agentName = "Unknown Agent";
|
|
202
|
+
if (run.agent_config && "name" in run.agent_config) {
|
|
203
|
+
agentName = run.agent_config.name;
|
|
204
|
+
}
|
|
205
|
+
agentRuns.push({
|
|
206
|
+
agentName,
|
|
207
|
+
status: "running",
|
|
208
|
+
benchmarkRunId: run.benchmark_run_id,
|
|
209
|
+
duration: run.duration_ms ?? undefined,
|
|
210
|
+
startTime: run.start_time_ms,
|
|
211
|
+
});
|
|
212
|
+
});
|
|
213
|
+
}
|
|
214
|
+
// Add pending agents from job_spec that don't have runs yet
|
|
215
|
+
if (job.job_spec?.agent_configs) {
|
|
216
|
+
const runningOrCompletedAgents = new Set(agentRuns.map((r) => r.agentName));
|
|
217
|
+
job.job_spec.agent_configs.forEach((agent) => {
|
|
218
|
+
if (!runningOrCompletedAgents.has(agent.name)) {
|
|
219
|
+
agentRuns.push({
|
|
220
|
+
agentName: agent.name,
|
|
221
|
+
modelName: agent.model_name || undefined,
|
|
222
|
+
status: "pending",
|
|
223
|
+
});
|
|
224
|
+
}
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
// Benchmark Runs section - show all agent runs with their status
|
|
228
|
+
if (agentRuns.length > 0) {
|
|
229
|
+
const runsFields = agentRuns.map((run) => {
|
|
230
|
+
const parts = [];
|
|
231
|
+
// Status indicator
|
|
232
|
+
switch (run.status) {
|
|
233
|
+
case "pending":
|
|
234
|
+
parts.push(_jsxs(Text, { color: colors.textDim, children: [figures.circleDotted, " Pending"] }, "status"));
|
|
235
|
+
break;
|
|
236
|
+
case "running":
|
|
237
|
+
parts.push(_jsxs(Text, { color: colors.info, children: [figures.play, " Running"] }, "status"));
|
|
238
|
+
if (run.duration) {
|
|
239
|
+
parts.push(_jsxs(Text, { dimColor: true, children: [" ", "(", formatDuration(run.duration), ")"] }, "dur"));
|
|
240
|
+
}
|
|
241
|
+
break;
|
|
242
|
+
case "completed":
|
|
243
|
+
parts.push(_jsxs(Text, { color: colors.success, children: [figures.tick, " Completed"] }, "status"));
|
|
244
|
+
if (run.score !== undefined) {
|
|
245
|
+
parts.push(_jsxs(Text, { color: colors.success, bold: true, children: [" ", "Score: ", run.score.toFixed(2)] }, "score"));
|
|
246
|
+
}
|
|
247
|
+
break;
|
|
248
|
+
case "failed":
|
|
249
|
+
parts.push(_jsxs(Text, { color: colors.error, children: [figures.cross, " Failed"] }, "status"));
|
|
250
|
+
if (run.score !== undefined) {
|
|
251
|
+
parts.push(_jsxs(Text, { dimColor: true, children: [" ", "Score: ", run.score.toFixed(2)] }, "score"));
|
|
252
|
+
}
|
|
253
|
+
break;
|
|
254
|
+
case "timeout":
|
|
255
|
+
parts.push(_jsxs(Text, { color: colors.warning, children: [figures.warning, " Timeout"] }, "status"));
|
|
256
|
+
break;
|
|
257
|
+
}
|
|
258
|
+
// Stats for completed/failed runs
|
|
259
|
+
if (run.nCompleted !== undefined) {
|
|
260
|
+
parts.push(_jsxs(Text, { dimColor: true, children: [" ", "(", run.nCompleted, "\u2713", run.nFailed ? ` ${run.nFailed}✗` : "", run.nTimeout ? ` ${run.nTimeout}⏱` : "", ")"] }, "stats"));
|
|
261
|
+
}
|
|
262
|
+
// Duration for completed runs
|
|
263
|
+
if (run.status !== "running" && run.duration) {
|
|
264
|
+
parts.push(_jsxs(Text, { dimColor: true, children: [" ", formatDuration(run.duration)] }, "dur"));
|
|
265
|
+
}
|
|
266
|
+
// Benchmark Run ID (clickable hint)
|
|
267
|
+
if (run.benchmarkRunId) {
|
|
268
|
+
parts.push(_jsxs(Text, { dimColor: true, children: ["\n", " ", figures.arrowRight, " Run:", " ", _jsx(Text, { color: colors.idColor, children: run.benchmarkRunId })] }, "id"));
|
|
269
|
+
}
|
|
270
|
+
// Model name
|
|
271
|
+
if (run.modelName) {
|
|
272
|
+
parts.push(_jsxs(Text, { dimColor: true, children: [" ", "[", run.modelName, "]"] }, "model"));
|
|
273
|
+
}
|
|
274
|
+
return {
|
|
275
|
+
label: run.agentName,
|
|
276
|
+
value: _jsx(Text, { children: parts }),
|
|
277
|
+
...(run.benchmarkRunId
|
|
278
|
+
? {
|
|
279
|
+
action: {
|
|
280
|
+
type: "navigate",
|
|
281
|
+
screen: "benchmark-run-detail",
|
|
282
|
+
params: { benchmarkRunId: run.benchmarkRunId },
|
|
283
|
+
hint: "View Run",
|
|
284
|
+
},
|
|
285
|
+
}
|
|
286
|
+
: {}),
|
|
287
|
+
};
|
|
288
|
+
});
|
|
289
|
+
const pendingCount = agentRuns.filter((r) => r.status === "pending").length;
|
|
290
|
+
const runningCount = agentRuns.filter((r) => r.status === "running").length;
|
|
291
|
+
const completedCount = agentRuns.filter((r) => r.status === "completed" || r.status === "failed").length;
|
|
292
|
+
let sectionTitle = `Benchmark Runs (${agentRuns.length} agents)`;
|
|
293
|
+
if (pendingCount > 0 || runningCount > 0) {
|
|
294
|
+
const statusParts = [];
|
|
295
|
+
if (completedCount > 0)
|
|
296
|
+
statusParts.push(`${completedCount} done`);
|
|
297
|
+
if (runningCount > 0)
|
|
298
|
+
statusParts.push(`${runningCount} running`);
|
|
299
|
+
if (pendingCount > 0)
|
|
300
|
+
statusParts.push(`${pendingCount} pending`);
|
|
301
|
+
sectionTitle = `Benchmark Runs - ${statusParts.join(", ")}`;
|
|
302
|
+
}
|
|
303
|
+
detailSections.push({
|
|
304
|
+
title: sectionTitle,
|
|
305
|
+
icon: figures.pointer,
|
|
306
|
+
color: colors.primary,
|
|
307
|
+
fields: runsFields,
|
|
308
|
+
});
|
|
309
|
+
}
|
|
310
|
+
// Job Configuration section (condensed)
|
|
311
|
+
if (job.job_spec) {
|
|
312
|
+
const spec = job.job_spec;
|
|
313
|
+
const specFields = [];
|
|
314
|
+
if (spec.scenario_ids && spec.scenario_ids.length > 0) {
|
|
315
|
+
specFields.push({
|
|
316
|
+
label: "Scenarios",
|
|
317
|
+
value: `${spec.scenario_ids.length} scenario(s)`,
|
|
318
|
+
});
|
|
319
|
+
}
|
|
320
|
+
if (spec.orchestrator_config) {
|
|
321
|
+
const orch = spec.orchestrator_config;
|
|
322
|
+
const orchParts = [];
|
|
323
|
+
if (orch.n_concurrent_trials)
|
|
324
|
+
orchParts.push(`${orch.n_concurrent_trials} concurrent`);
|
|
325
|
+
if (orch.n_attempts)
|
|
326
|
+
orchParts.push(`${orch.n_attempts} retries`);
|
|
327
|
+
if (orch.timeout_multiplier && orch.timeout_multiplier !== 1) {
|
|
328
|
+
orchParts.push(`${orch.timeout_multiplier}x timeout`);
|
|
329
|
+
}
|
|
330
|
+
if (orchParts.length > 0) {
|
|
331
|
+
specFields.push({
|
|
332
|
+
label: "Orchestrator",
|
|
333
|
+
value: orchParts.join(", "),
|
|
334
|
+
});
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
if (specFields.length > 0) {
|
|
338
|
+
detailSections.push({
|
|
339
|
+
title: "Job Configuration",
|
|
340
|
+
icon: figures.circleFilled,
|
|
341
|
+
color: colors.secondary,
|
|
342
|
+
fields: specFields,
|
|
343
|
+
});
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
// Job Source section
|
|
347
|
+
if (job.job_source) {
|
|
348
|
+
const source = job.job_source;
|
|
349
|
+
const sourceFields = [];
|
|
350
|
+
if ("type" in source) {
|
|
351
|
+
sourceFields.push({
|
|
352
|
+
label: "Source Type",
|
|
353
|
+
value: source.type,
|
|
354
|
+
});
|
|
355
|
+
}
|
|
356
|
+
if ("benchmark_id" in source && source.benchmark_id) {
|
|
357
|
+
sourceFields.push({
|
|
358
|
+
label: "Benchmark ID",
|
|
359
|
+
value: _jsx(Text, { color: colors.idColor, children: source.benchmark_id }),
|
|
360
|
+
action: {
|
|
361
|
+
type: "navigate",
|
|
362
|
+
screen: "benchmark-detail",
|
|
363
|
+
params: { benchmarkId: source.benchmark_id },
|
|
364
|
+
hint: "View Benchmark",
|
|
365
|
+
},
|
|
366
|
+
});
|
|
367
|
+
}
|
|
368
|
+
if (sourceFields.length > 0) {
|
|
369
|
+
detailSections.push({
|
|
370
|
+
title: "Job Source",
|
|
371
|
+
icon: figures.info,
|
|
372
|
+
color: colors.textDim,
|
|
373
|
+
fields: sourceFields,
|
|
374
|
+
});
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
// Collect benchmark run IDs for operations
|
|
378
|
+
const benchmarkRunIds = [];
|
|
379
|
+
if (job.benchmark_outcomes) {
|
|
380
|
+
job.benchmark_outcomes.forEach((outcome) => {
|
|
381
|
+
// Use fetched run name from state, fallback to run ID
|
|
382
|
+
const runName = runNames.get(outcome.benchmark_run_id) || outcome.benchmark_run_id;
|
|
383
|
+
benchmarkRunIds.push({
|
|
384
|
+
id: outcome.benchmark_run_id,
|
|
385
|
+
name: runName,
|
|
386
|
+
});
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
if (job.in_progress_runs) {
|
|
390
|
+
job.in_progress_runs.forEach((run) => {
|
|
391
|
+
// Avoid duplicates
|
|
392
|
+
if (!benchmarkRunIds.find((r) => r.id === run.benchmark_run_id)) {
|
|
393
|
+
// Use fetched run name from state, fallback to run ID
|
|
394
|
+
const runName = runNames.get(run.benchmark_run_id) || run.benchmark_run_id;
|
|
395
|
+
benchmarkRunIds.push({ id: run.benchmark_run_id, name: runName });
|
|
396
|
+
}
|
|
397
|
+
});
|
|
398
|
+
}
|
|
399
|
+
// Operations available for benchmark jobs
|
|
400
|
+
const operations = [];
|
|
401
|
+
// Add "View Run" operations for each benchmark run (limit to first 9 for shortcuts)
|
|
402
|
+
benchmarkRunIds.slice(0, 9).forEach((run, idx) => {
|
|
403
|
+
operations.push({
|
|
404
|
+
key: `view-run-${idx}`,
|
|
405
|
+
label: `View Run: ${run.name}`,
|
|
406
|
+
color: colors.info,
|
|
407
|
+
icon: figures.arrowRight,
|
|
408
|
+
shortcut: String(idx + 1),
|
|
409
|
+
});
|
|
410
|
+
});
|
|
411
|
+
// Always add clone job option
|
|
412
|
+
operations.push({
|
|
413
|
+
key: "clone-job",
|
|
414
|
+
label: "Clone Job",
|
|
415
|
+
color: colors.success,
|
|
416
|
+
icon: figures.play,
|
|
417
|
+
shortcut: "c",
|
|
418
|
+
});
|
|
419
|
+
// Handle operation selection
|
|
420
|
+
const handleOperation = async (operation, resource) => {
|
|
421
|
+
if (operation.startsWith("view-run-")) {
|
|
422
|
+
const idx = parseInt(operation.replace("view-run-", ""), 10);
|
|
423
|
+
if (benchmarkRunIds[idx]) {
|
|
424
|
+
navigate("benchmark-run-detail", {
|
|
425
|
+
benchmarkRunId: benchmarkRunIds[idx].id,
|
|
426
|
+
});
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
else if (operation === "clone-job") {
|
|
430
|
+
// Pass job data for cloning
|
|
431
|
+
const cloneParams = {
|
|
432
|
+
cloneFromJobId: resource.id,
|
|
433
|
+
cloneJobName: resource.name,
|
|
434
|
+
};
|
|
435
|
+
// Determine source type and extract IDs
|
|
436
|
+
if (resource.job_spec) {
|
|
437
|
+
const spec = resource.job_spec;
|
|
438
|
+
// Check if it's a scenarios spec (has scenario_ids array)
|
|
439
|
+
if (spec.scenario_ids && Array.isArray(spec.scenario_ids)) {
|
|
440
|
+
cloneParams.cloneSourceType = "scenarios";
|
|
441
|
+
cloneParams.initialScenarioIds = spec.scenario_ids.join(",");
|
|
442
|
+
}
|
|
443
|
+
// Check if it's a benchmark spec (has benchmark_id)
|
|
444
|
+
else if (spec.benchmark_id) {
|
|
445
|
+
cloneParams.cloneSourceType = "benchmark";
|
|
446
|
+
cloneParams.initialBenchmarkIds = spec.benchmark_id;
|
|
447
|
+
}
|
|
448
|
+
// Fallback: check job_source
|
|
449
|
+
else if (resource.job_source) {
|
|
450
|
+
const source = resource.job_source;
|
|
451
|
+
if (source.scenario_ids && Array.isArray(source.scenario_ids)) {
|
|
452
|
+
cloneParams.cloneSourceType = "scenarios";
|
|
453
|
+
cloneParams.initialScenarioIds = source.scenario_ids.join(",");
|
|
454
|
+
}
|
|
455
|
+
else if (source.benchmark_id) {
|
|
456
|
+
cloneParams.cloneSourceType = "benchmark";
|
|
457
|
+
cloneParams.initialBenchmarkIds = source.benchmark_id;
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
}
|
|
461
|
+
// Extract agent configs - both full configs and legacy fields
|
|
462
|
+
if (resource.job_spec?.agent_configs) {
|
|
463
|
+
const agentConfigs = resource.job_spec.agent_configs.map((a) => ({
|
|
464
|
+
agentId: a.agent_id,
|
|
465
|
+
name: a.name,
|
|
466
|
+
modelName: a.model_name,
|
|
467
|
+
timeoutSeconds: a.timeout_seconds,
|
|
468
|
+
kwargs: a.kwargs,
|
|
469
|
+
environmentVariables: a.agent_environment?.environment_variables,
|
|
470
|
+
secrets: a.agent_environment?.secrets,
|
|
471
|
+
}));
|
|
472
|
+
cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs);
|
|
473
|
+
// Also extract legacy fields for form initialization
|
|
474
|
+
cloneParams.cloneAgentIds = resource.job_spec.agent_configs
|
|
475
|
+
.map((a) => a.agent_id)
|
|
476
|
+
.join(",");
|
|
477
|
+
cloneParams.cloneAgentNames = resource.job_spec.agent_configs
|
|
478
|
+
.map((a) => a.name)
|
|
479
|
+
.join(",");
|
|
480
|
+
}
|
|
481
|
+
// Extract orchestrator config
|
|
482
|
+
if (resource.job_spec?.orchestrator_config) {
|
|
483
|
+
const orch = resource.job_spec.orchestrator_config;
|
|
484
|
+
cloneParams.cloneOrchestratorConfig = JSON.stringify({
|
|
485
|
+
nAttempts: orch.n_attempts,
|
|
486
|
+
nConcurrentTrials: orch.n_concurrent_trials,
|
|
487
|
+
quiet: orch.quiet,
|
|
488
|
+
timeoutMultiplier: orch.timeout_multiplier,
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
navigate("benchmark-job-create", cloneParams);
|
|
492
|
+
}
|
|
493
|
+
};
|
|
494
|
+
// Build detailed info lines for full details view
|
|
495
|
+
const buildDetailLines = (j) => {
|
|
496
|
+
const lines = [];
|
|
497
|
+
// Core Information
|
|
498
|
+
lines.push(_jsx(Text, { color: colors.warning, bold: true, children: "Benchmark Job Details" }, "core-title"));
|
|
499
|
+
lines.push(_jsxs(Text, { color: colors.idColor, children: [" ", "ID: ", j.id] }, "core-id"));
|
|
500
|
+
lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Name: ", j.name || "(none)"] }, "core-name"));
|
|
501
|
+
lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Status: ", j.state] }, "core-status"));
|
|
502
|
+
if (j.create_time_ms) {
|
|
503
|
+
lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Created: ", new Date(j.create_time_ms).toLocaleString()] }, "core-created"));
|
|
504
|
+
}
|
|
505
|
+
if (j.failure_reason) {
|
|
506
|
+
lines.push(_jsxs(Text, { color: colors.error, children: [" ", "Failure: ", j.failure_reason] }, "core-failure"));
|
|
507
|
+
}
|
|
508
|
+
lines.push(_jsx(Text, { children: " " }, "core-space"));
|
|
509
|
+
// Benchmark Runs - unified view
|
|
510
|
+
lines.push(_jsx(Text, { color: colors.primary, bold: true, children: "Benchmark Runs" }, "runs-title"));
|
|
511
|
+
// Completed runs from benchmark_outcomes
|
|
512
|
+
if (j.benchmark_outcomes && j.benchmark_outcomes.length > 0) {
|
|
513
|
+
j.benchmark_outcomes.forEach((outcome, idx) => {
|
|
514
|
+
const scoreStr = outcome.average_score !== undefined && outcome.average_score !== null
|
|
515
|
+
? `Score: ${outcome.average_score.toFixed(2)}`
|
|
516
|
+
: "No score";
|
|
517
|
+
const statsStr = `${outcome.n_completed}✓ ${outcome.n_failed}✗ ${outcome.n_timeout}⏱`;
|
|
518
|
+
const durationStr = outcome.duration_ms
|
|
519
|
+
? formatDuration(outcome.duration_ms)
|
|
520
|
+
: "";
|
|
521
|
+
const statusIcon = outcome.n_failed > 0 ? figures.cross : figures.tick;
|
|
522
|
+
const statusColor = outcome.n_failed > 0 ? colors.error : colors.success;
|
|
523
|
+
lines.push(_jsxs(Text, { children: [" ", _jsx(Text, { color: statusColor, children: statusIcon }), _jsxs(Text, { color: colors.info, children: [" ", outcome.agent_name || `Agent ${idx + 1}`] }), _jsxs(Text, { dimColor: true, children: [": ", scoreStr, " (", statsStr, ") ", durationStr] })] }, `outcome-${idx}`));
|
|
524
|
+
lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Run ID: ", outcome.benchmark_run_id] }, `outcome-${idx}-id`));
|
|
525
|
+
// Show scenario outcomes
|
|
526
|
+
if (outcome.scenario_outcomes && outcome.scenario_outcomes.length > 0) {
|
|
527
|
+
outcome.scenario_outcomes.forEach((scenario, sIdx) => {
|
|
528
|
+
const scenarioScore = scenario.score !== undefined && scenario.score !== null
|
|
529
|
+
? scenario.score.toFixed(2)
|
|
530
|
+
: "-";
|
|
531
|
+
const scenarioDur = scenario.duration_ms
|
|
532
|
+
? formatDuration(scenario.duration_ms)
|
|
533
|
+
: "";
|
|
534
|
+
const scenarioIcon = scenario.state === "COMPLETED"
|
|
535
|
+
? figures.tick
|
|
536
|
+
: scenario.state === "FAILED"
|
|
537
|
+
? figures.cross
|
|
538
|
+
: figures.warning;
|
|
539
|
+
const scenarioColor = scenario.state === "COMPLETED"
|
|
540
|
+
? colors.success
|
|
541
|
+
: scenario.state === "FAILED"
|
|
542
|
+
? colors.error
|
|
543
|
+
: colors.warning;
|
|
544
|
+
lines.push(_jsxs(Text, { children: [" ", _jsx(Text, { color: scenarioColor, children: scenarioIcon }), _jsxs(Text, { dimColor: true, children: [" ", scenario.scenario_name, ": ", scenario.state, " (score:", " ", scenarioScore, ") ", scenarioDur] })] }, `outcome-${idx}-scenario-${sIdx}`));
|
|
545
|
+
if (scenario.failure_reason) {
|
|
546
|
+
lines.push(_jsxs(Text, { color: colors.error, children: [" ", scenario.failure_reason.exception_type, ":", " ", scenario.failure_reason.exception_message] }, `outcome-${idx}-scenario-${sIdx}-fail`));
|
|
547
|
+
}
|
|
548
|
+
});
|
|
549
|
+
}
|
|
550
|
+
});
|
|
551
|
+
}
|
|
552
|
+
// In-progress runs
|
|
553
|
+
if (j.in_progress_runs && j.in_progress_runs.length > 0) {
|
|
554
|
+
j.in_progress_runs.forEach((run, idx) => {
|
|
555
|
+
let agentName = "Unknown Agent";
|
|
556
|
+
if (run.agent_config && "name" in run.agent_config) {
|
|
557
|
+
agentName = run.agent_config.name;
|
|
558
|
+
}
|
|
559
|
+
const durationStr = run.duration_ms
|
|
560
|
+
? formatDuration(run.duration_ms)
|
|
561
|
+
: "";
|
|
562
|
+
lines.push(_jsxs(Text, { children: [" ", _jsx(Text, { color: colors.info, children: figures.play }), _jsxs(Text, { color: colors.info, children: [" ", agentName] }), _jsxs(Text, { dimColor: true, children: [": Running ", durationStr] })] }, `run-${idx}`));
|
|
563
|
+
lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Run ID: ", run.benchmark_run_id] }, `run-${idx}-id`));
|
|
564
|
+
});
|
|
565
|
+
}
|
|
566
|
+
// Pending agents
|
|
567
|
+
if (j.job_spec?.agent_configs) {
|
|
568
|
+
const runningOrCompletedAgents = new Set();
|
|
569
|
+
j.benchmark_outcomes?.forEach((o) => runningOrCompletedAgents.add(o.agent_name));
|
|
570
|
+
j.in_progress_runs?.forEach((r) => {
|
|
571
|
+
if (r.agent_config && "name" in r.agent_config) {
|
|
572
|
+
runningOrCompletedAgents.add(r.agent_config.name);
|
|
573
|
+
}
|
|
574
|
+
});
|
|
575
|
+
j.job_spec.agent_configs.forEach((agent, idx) => {
|
|
576
|
+
if (!runningOrCompletedAgents.has(agent.name)) {
|
|
577
|
+
lines.push(_jsxs(Text, { children: [" ", _jsx(Text, { color: colors.textDim, children: figures.circleDotted }), _jsxs(Text, { dimColor: true, children: [" ", agent.name, ": Pending"] })] }, `pending-${idx}`));
|
|
578
|
+
}
|
|
579
|
+
});
|
|
580
|
+
}
|
|
581
|
+
lines.push(_jsx(Text, { children: " " }, "runs-space"));
|
|
582
|
+
// Job Configuration
|
|
583
|
+
if (j.job_spec) {
|
|
584
|
+
lines.push(_jsx(Text, { color: colors.secondary, bold: true, children: "Job Configuration" }, "spec-title"));
|
|
585
|
+
if (j.job_spec.scenario_ids) {
|
|
586
|
+
lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Scenarios: ", j.job_spec.scenario_ids.length] }, "spec-scenarios"));
|
|
587
|
+
}
|
|
588
|
+
if (j.job_spec.orchestrator_config) {
|
|
589
|
+
const orch = j.job_spec.orchestrator_config;
|
|
590
|
+
const orchInfo = [];
|
|
591
|
+
if (orch.n_concurrent_trials)
|
|
592
|
+
orchInfo.push(`${orch.n_concurrent_trials} concurrent`);
|
|
593
|
+
if (orch.n_attempts)
|
|
594
|
+
orchInfo.push(`${orch.n_attempts} retries`);
|
|
595
|
+
if (orchInfo.length > 0) {
|
|
596
|
+
lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Orchestrator: ", orchInfo.join(", ")] }, "spec-orch"));
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
lines.push(_jsx(Text, { children: " " }, "spec-space"));
|
|
600
|
+
}
|
|
601
|
+
// Raw JSON
|
|
602
|
+
lines.push(_jsx(Text, { color: colors.warning, bold: true, children: "Raw JSON" }, "json-title"));
|
|
603
|
+
const jsonLines = JSON.stringify(j, null, 2).split("\n");
|
|
604
|
+
jsonLines.forEach((line, idx) => {
|
|
605
|
+
lines.push(_jsxs(Text, { dimColor: true, children: [" ", line] }, `json-${idx}`));
|
|
606
|
+
});
|
|
607
|
+
return lines;
|
|
608
|
+
};
|
|
609
|
+
// Check if job is still in progress for polling
|
|
610
|
+
const isRunning = job.state === "running" ||
|
|
611
|
+
job.state === "queued" ||
|
|
612
|
+
job.state === "initializing";
|
|
613
|
+
return (_jsx(ResourceDetailPage, { resource: job, resourceType: "Benchmark Jobs", getDisplayName: (j) => j.name || j.id, getId: (j) => j.id, getStatus: (j) => j.state, detailSections: detailSections, operations: operations, onOperation: handleOperation, onBack: goBack, buildDetailLines: buildDetailLines, pollResource: isRunning ? pollJob : undefined, breadcrumbPrefix: [{ label: "Home" }] }));
|
|
614
|
+
}
|