@runloop/rl-cli 1.8.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +21 -7
  2. package/dist/cli.js +0 -0
  3. package/dist/commands/blueprint/delete.js +21 -0
  4. package/dist/commands/blueprint/list.js +226 -174
  5. package/dist/commands/blueprint/prune.js +13 -28
  6. package/dist/commands/devbox/create.js +41 -0
  7. package/dist/commands/devbox/list.js +142 -110
  8. package/dist/commands/devbox/rsync.js +69 -41
  9. package/dist/commands/devbox/scp.js +180 -39
  10. package/dist/commands/devbox/tunnel.js +4 -19
  11. package/dist/commands/gateway-config/create.js +53 -0
  12. package/dist/commands/gateway-config/delete.js +21 -0
  13. package/dist/commands/gateway-config/get.js +18 -0
  14. package/dist/commands/gateway-config/list.js +493 -0
  15. package/dist/commands/gateway-config/update.js +70 -0
  16. package/dist/commands/snapshot/list.js +11 -2
  17. package/dist/commands/snapshot/prune.js +265 -0
  18. package/dist/components/BenchmarkMenu.js +23 -3
  19. package/dist/components/DetailedInfoView.js +20 -0
  20. package/dist/components/DevboxActionsMenu.js +26 -62
  21. package/dist/components/DevboxCreatePage.js +763 -15
  22. package/dist/components/DevboxDetailPage.js +73 -24
  23. package/dist/components/GatewayConfigCreatePage.js +272 -0
  24. package/dist/components/LogsViewer.js +6 -40
  25. package/dist/components/ResourceDetailPage.js +143 -160
  26. package/dist/components/ResourceListView.js +3 -33
  27. package/dist/components/ResourcePicker.js +234 -0
  28. package/dist/components/SecretCreatePage.js +71 -27
  29. package/dist/components/SettingsMenu.js +12 -2
  30. package/dist/components/StateHistory.js +1 -20
  31. package/dist/components/StatusBadge.js +9 -2
  32. package/dist/components/StreamingLogsViewer.js +8 -42
  33. package/dist/components/form/FormTextInput.js +4 -2
  34. package/dist/components/resourceDetailTypes.js +18 -0
  35. package/dist/hooks/useInputHandler.js +103 -0
  36. package/dist/router/Router.js +79 -2
  37. package/dist/screens/BenchmarkDetailScreen.js +163 -0
  38. package/dist/screens/BenchmarkJobCreateScreen.js +524 -0
  39. package/dist/screens/BenchmarkJobDetailScreen.js +614 -0
  40. package/dist/screens/BenchmarkJobListScreen.js +479 -0
  41. package/dist/screens/BenchmarkListScreen.js +266 -0
  42. package/dist/screens/BenchmarkMenuScreen.js +6 -0
  43. package/dist/screens/BenchmarkRunDetailScreen.js +258 -22
  44. package/dist/screens/BenchmarkRunListScreen.js +21 -1
  45. package/dist/screens/BlueprintDetailScreen.js +5 -1
  46. package/dist/screens/DevboxCreateScreen.js +2 -2
  47. package/dist/screens/GatewayConfigDetailScreen.js +236 -0
  48. package/dist/screens/GatewayConfigListScreen.js +7 -0
  49. package/dist/screens/ScenarioRunDetailScreen.js +6 -0
  50. package/dist/screens/SecretDetailScreen.js +26 -2
  51. package/dist/screens/SettingsMenuScreen.js +3 -0
  52. package/dist/screens/SnapshotDetailScreen.js +6 -0
  53. package/dist/services/agentService.js +42 -0
  54. package/dist/services/benchmarkJobService.js +122 -0
  55. package/dist/services/benchmarkService.js +47 -0
  56. package/dist/services/gatewayConfigService.js +153 -0
  57. package/dist/services/scenarioService.js +34 -0
  58. package/dist/store/benchmarkJobStore.js +66 -0
  59. package/dist/store/benchmarkStore.js +63 -0
  60. package/dist/store/gatewayConfigStore.js +83 -0
  61. package/dist/utils/browser.js +22 -0
  62. package/dist/utils/clipboard.js +41 -0
  63. package/dist/utils/commands.js +105 -9
  64. package/dist/utils/gatewayConfigValidation.js +58 -0
  65. package/dist/utils/time.js +121 -0
  66. package/package.json +43 -43
@@ -0,0 +1,614 @@
1
+ import { jsx as _jsx, Fragment as _Fragment, jsxs as _jsxs } from "react/jsx-runtime";
2
+ /**
3
+ * BenchmarkJobDetailScreen - Detail page for benchmark jobs
4
+ * Uses the generic ResourceDetailPage component
5
+ */
6
+ import React from "react";
7
+ import { Text } from "ink";
8
+ import figures from "figures";
9
+ import { useNavigation } from "../store/navigationStore.js";
10
+ import { useBenchmarkJobStore, } from "../store/benchmarkJobStore.js";
11
+ import { ResourceDetailPage, formatTimestamp, } from "../components/ResourceDetailPage.js";
12
+ import { getBenchmarkJob } from "../services/benchmarkJobService.js";
13
+ import { getBenchmarkRun } from "../services/benchmarkService.js";
14
+ import { SpinnerComponent } from "../components/Spinner.js";
15
+ import { ErrorMessage } from "../components/ErrorMessage.js";
16
+ import { Breadcrumb } from "../components/Breadcrumb.js";
17
+ import { colors } from "../utils/theme.js";
18
+ export function BenchmarkJobDetailScreen({ benchmarkJobId, }) {
19
+ const { goBack, navigate } = useNavigation();
20
+ const benchmarkJobs = useBenchmarkJobStore((state) => state.benchmarkJobs);
21
+ const [loading, setLoading] = React.useState(false);
22
+ const [error, setError] = React.useState(null);
23
+ const [fetchedJob, setFetchedJob] = React.useState(null);
24
+ const [runNames, setRunNames] = React.useState(new Map());
25
+ // Find job in store first
26
+ const jobFromStore = benchmarkJobs.find((j) => j.id === benchmarkJobId);
27
+ // Polling function
28
+ const pollJob = React.useCallback(async () => {
29
+ if (!benchmarkJobId)
30
+ return null;
31
+ return getBenchmarkJob(benchmarkJobId);
32
+ }, [benchmarkJobId]);
33
+ // Fetch job from API if not in store
34
+ React.useEffect(() => {
35
+ if (benchmarkJobId && !loading && !fetchedJob) {
36
+ setLoading(true);
37
+ setError(null);
38
+ getBenchmarkJob(benchmarkJobId)
39
+ .then((job) => {
40
+ setFetchedJob(job);
41
+ setLoading(false);
42
+ })
43
+ .catch((err) => {
44
+ setError(err);
45
+ setLoading(false);
46
+ });
47
+ }
48
+ }, [benchmarkJobId, loading, fetchedJob]);
49
+ // Use fetched job for full details, fall back to store for basic display
50
+ const job = fetchedJob || jobFromStore;
51
+ // Fetch run names when job is loaded
52
+ React.useEffect(() => {
53
+ if (!job)
54
+ return;
55
+ const runIds = [];
56
+ // Collect run IDs from outcomes
57
+ if (job.benchmark_outcomes) {
58
+ job.benchmark_outcomes.forEach((outcome) => {
59
+ runIds.push(outcome.benchmark_run_id);
60
+ });
61
+ }
62
+ // Collect run IDs from in-progress runs
63
+ if (job.in_progress_runs) {
64
+ job.in_progress_runs.forEach((run) => {
65
+ if (!runIds.includes(run.benchmark_run_id)) {
66
+ runIds.push(run.benchmark_run_id);
67
+ }
68
+ });
69
+ }
70
+ // Fetch run details for each run ID
71
+ Promise.all(runIds.map(async (runId) => {
72
+ try {
73
+ const run = await getBenchmarkRun(runId);
74
+ return { id: runId, name: run.name || runId };
75
+ }
76
+ catch {
77
+ return { id: runId, name: runId };
78
+ }
79
+ })).then((results) => {
80
+ const namesMap = new Map();
81
+ results.forEach((result) => {
82
+ namesMap.set(result.id, result.name);
83
+ });
84
+ setRunNames(namesMap);
85
+ });
86
+ }, [job]);
87
+ // Show loading state
88
+ if (!job && benchmarkJobId && !error) {
89
+ return (_jsxs(_Fragment, { children: [_jsx(Breadcrumb, { items: [
90
+ { label: "Home" },
91
+ { label: "Benchmark Jobs" },
92
+ { label: "Loading...", active: true },
93
+ ] }), _jsx(SpinnerComponent, { message: "Loading benchmark job details..." })] }));
94
+ }
95
+ // Show error state
96
+ if (error && !job) {
97
+ return (_jsxs(_Fragment, { children: [_jsx(Breadcrumb, { items: [
98
+ { label: "Home" },
99
+ { label: "Benchmark Jobs" },
100
+ { label: "Error", active: true },
101
+ ] }), _jsx(ErrorMessage, { message: "Failed to load benchmark job details", error: error })] }));
102
+ }
103
+ // Show not found error
104
+ if (!job) {
105
+ return (_jsxs(_Fragment, { children: [_jsx(Breadcrumb, { items: [
106
+ { label: "Home" },
107
+ { label: "Benchmark Jobs" },
108
+ { label: "Not Found", active: true },
109
+ ] }), _jsx(ErrorMessage, { message: `Benchmark job ${benchmarkJobId || "unknown"} not found`, error: new Error("Benchmark job not found") })] }));
110
+ }
111
+ // Helper to format duration
112
+ const formatDuration = (ms) => {
113
+ if (ms < 1000)
114
+ return `${ms}ms`;
115
+ const seconds = Math.floor(ms / 1000);
116
+ if (seconds < 60)
117
+ return `${seconds}s`;
118
+ const minutes = Math.floor(seconds / 60);
119
+ const remainingSeconds = seconds % 60;
120
+ if (minutes < 60)
121
+ return `${minutes}m ${remainingSeconds}s`;
122
+ const hours = Math.floor(minutes / 60);
123
+ const remainingMinutes = minutes % 60;
124
+ return `${hours}h ${remainingMinutes}m`;
125
+ };
126
+ // Build detail sections
127
+ const detailSections = [];
128
+ // Basic details section
129
+ const basicFields = [];
130
+ if (job.create_time_ms) {
131
+ basicFields.push({
132
+ label: "Created",
133
+ value: formatTimestamp(job.create_time_ms),
134
+ });
135
+ }
136
+ // Calculate overall score if available
137
+ if (job.benchmark_outcomes && job.benchmark_outcomes.length > 0) {
138
+ const scores = job.benchmark_outcomes
139
+ .map((o) => o.average_score)
140
+ .filter((s) => s !== null && s !== undefined);
141
+ if (scores.length > 0) {
142
+ const avgScore = scores.reduce((a, b) => a + b, 0) / scores.length;
143
+ basicFields.push({
144
+ label: "Overall Score",
145
+ value: (_jsx(Text, { color: colors.success, bold: true, children: avgScore.toFixed(2) })),
146
+ });
147
+ }
148
+ }
149
+ // Summary stats
150
+ if (job.benchmark_outcomes && job.benchmark_outcomes.length > 0) {
151
+ const totalCompleted = job.benchmark_outcomes.reduce((acc, o) => acc + o.n_completed, 0);
152
+ const totalFailed = job.benchmark_outcomes.reduce((acc, o) => acc + o.n_failed, 0);
153
+ const totalTimeout = job.benchmark_outcomes.reduce((acc, o) => acc + o.n_timeout, 0);
154
+ const total = totalCompleted + totalFailed + totalTimeout;
155
+ basicFields.push({
156
+ label: "Scenarios",
157
+ value: (_jsxs(Text, { children: [_jsxs(Text, { color: colors.success, children: [totalCompleted, " completed"] }), totalFailed > 0 && (_jsxs(Text, { color: colors.error, children: [" / ", totalFailed, " failed"] })), totalTimeout > 0 && (_jsxs(Text, { color: colors.warning, children: [" / ", totalTimeout, " timeout"] })), _jsxs(Text, { dimColor: true, children: [" (", total, " total)"] })] })),
158
+ });
159
+ }
160
+ if (job.failure_reason) {
161
+ basicFields.push({
162
+ label: "Failure Reason",
163
+ value: _jsx(Text, { color: colors.error, children: job.failure_reason }),
164
+ });
165
+ }
166
+ if (basicFields.length > 0) {
167
+ detailSections.push({
168
+ title: "Summary",
169
+ icon: figures.squareSmallFilled,
170
+ color: colors.warning,
171
+ fields: basicFields,
172
+ });
173
+ }
174
+ const agentRuns = [];
175
+ // First, add completed runs from benchmark_outcomes
176
+ if (job.benchmark_outcomes) {
177
+ job.benchmark_outcomes.forEach((outcome) => {
178
+ const total = outcome.n_completed + outcome.n_failed + outcome.n_timeout;
179
+ const status = outcome.n_failed > 0 || outcome.n_timeout > 0
180
+ ? outcome.n_completed === 0
181
+ ? "failed"
182
+ : "completed"
183
+ : "completed";
184
+ agentRuns.push({
185
+ agentName: outcome.agent_name,
186
+ modelName: outcome.model_name || undefined,
187
+ status,
188
+ benchmarkRunId: outcome.benchmark_run_id,
189
+ score: outcome.average_score ?? undefined,
190
+ nCompleted: outcome.n_completed,
191
+ nFailed: outcome.n_failed,
192
+ nTimeout: outcome.n_timeout,
193
+ duration: outcome.duration_ms ?? undefined,
194
+ });
195
+ });
196
+ }
197
+ // Add in-progress runs
198
+ if (job.in_progress_runs) {
199
+ job.in_progress_runs.forEach((run) => {
200
+ // Get agent name from agent_config if available
201
+ let agentName = "Unknown Agent";
202
+ if (run.agent_config && "name" in run.agent_config) {
203
+ agentName = run.agent_config.name;
204
+ }
205
+ agentRuns.push({
206
+ agentName,
207
+ status: "running",
208
+ benchmarkRunId: run.benchmark_run_id,
209
+ duration: run.duration_ms ?? undefined,
210
+ startTime: run.start_time_ms,
211
+ });
212
+ });
213
+ }
214
+ // Add pending agents from job_spec that don't have runs yet
215
+ if (job.job_spec?.agent_configs) {
216
+ const runningOrCompletedAgents = new Set(agentRuns.map((r) => r.agentName));
217
+ job.job_spec.agent_configs.forEach((agent) => {
218
+ if (!runningOrCompletedAgents.has(agent.name)) {
219
+ agentRuns.push({
220
+ agentName: agent.name,
221
+ modelName: agent.model_name || undefined,
222
+ status: "pending",
223
+ });
224
+ }
225
+ });
226
+ }
227
+ // Benchmark Runs section - show all agent runs with their status
228
+ if (agentRuns.length > 0) {
229
+ const runsFields = agentRuns.map((run) => {
230
+ const parts = [];
231
+ // Status indicator
232
+ switch (run.status) {
233
+ case "pending":
234
+ parts.push(_jsxs(Text, { color: colors.textDim, children: [figures.circleDotted, " Pending"] }, "status"));
235
+ break;
236
+ case "running":
237
+ parts.push(_jsxs(Text, { color: colors.info, children: [figures.play, " Running"] }, "status"));
238
+ if (run.duration) {
239
+ parts.push(_jsxs(Text, { dimColor: true, children: [" ", "(", formatDuration(run.duration), ")"] }, "dur"));
240
+ }
241
+ break;
242
+ case "completed":
243
+ parts.push(_jsxs(Text, { color: colors.success, children: [figures.tick, " Completed"] }, "status"));
244
+ if (run.score !== undefined) {
245
+ parts.push(_jsxs(Text, { color: colors.success, bold: true, children: [" ", "Score: ", run.score.toFixed(2)] }, "score"));
246
+ }
247
+ break;
248
+ case "failed":
249
+ parts.push(_jsxs(Text, { color: colors.error, children: [figures.cross, " Failed"] }, "status"));
250
+ if (run.score !== undefined) {
251
+ parts.push(_jsxs(Text, { dimColor: true, children: [" ", "Score: ", run.score.toFixed(2)] }, "score"));
252
+ }
253
+ break;
254
+ case "timeout":
255
+ parts.push(_jsxs(Text, { color: colors.warning, children: [figures.warning, " Timeout"] }, "status"));
256
+ break;
257
+ }
258
+ // Stats for completed/failed runs
259
+ if (run.nCompleted !== undefined) {
260
+ parts.push(_jsxs(Text, { dimColor: true, children: [" ", "(", run.nCompleted, "\u2713", run.nFailed ? ` ${run.nFailed}✗` : "", run.nTimeout ? ` ${run.nTimeout}⏱` : "", ")"] }, "stats"));
261
+ }
262
+ // Duration for completed runs
263
+ if (run.status !== "running" && run.duration) {
264
+ parts.push(_jsxs(Text, { dimColor: true, children: [" ", formatDuration(run.duration)] }, "dur"));
265
+ }
266
+ // Benchmark Run ID (clickable hint)
267
+ if (run.benchmarkRunId) {
268
+ parts.push(_jsxs(Text, { dimColor: true, children: ["\n", " ", figures.arrowRight, " Run:", " ", _jsx(Text, { color: colors.idColor, children: run.benchmarkRunId })] }, "id"));
269
+ }
270
+ // Model name
271
+ if (run.modelName) {
272
+ parts.push(_jsxs(Text, { dimColor: true, children: [" ", "[", run.modelName, "]"] }, "model"));
273
+ }
274
+ return {
275
+ label: run.agentName,
276
+ value: _jsx(Text, { children: parts }),
277
+ ...(run.benchmarkRunId
278
+ ? {
279
+ action: {
280
+ type: "navigate",
281
+ screen: "benchmark-run-detail",
282
+ params: { benchmarkRunId: run.benchmarkRunId },
283
+ hint: "View Run",
284
+ },
285
+ }
286
+ : {}),
287
+ };
288
+ });
289
+ const pendingCount = agentRuns.filter((r) => r.status === "pending").length;
290
+ const runningCount = agentRuns.filter((r) => r.status === "running").length;
291
+ const completedCount = agentRuns.filter((r) => r.status === "completed" || r.status === "failed").length;
292
+ let sectionTitle = `Benchmark Runs (${agentRuns.length} agents)`;
293
+ if (pendingCount > 0 || runningCount > 0) {
294
+ const statusParts = [];
295
+ if (completedCount > 0)
296
+ statusParts.push(`${completedCount} done`);
297
+ if (runningCount > 0)
298
+ statusParts.push(`${runningCount} running`);
299
+ if (pendingCount > 0)
300
+ statusParts.push(`${pendingCount} pending`);
301
+ sectionTitle = `Benchmark Runs - ${statusParts.join(", ")}`;
302
+ }
303
+ detailSections.push({
304
+ title: sectionTitle,
305
+ icon: figures.pointer,
306
+ color: colors.primary,
307
+ fields: runsFields,
308
+ });
309
+ }
310
+ // Job Configuration section (condensed)
311
+ if (job.job_spec) {
312
+ const spec = job.job_spec;
313
+ const specFields = [];
314
+ if (spec.scenario_ids && spec.scenario_ids.length > 0) {
315
+ specFields.push({
316
+ label: "Scenarios",
317
+ value: `${spec.scenario_ids.length} scenario(s)`,
318
+ });
319
+ }
320
+ if (spec.orchestrator_config) {
321
+ const orch = spec.orchestrator_config;
322
+ const orchParts = [];
323
+ if (orch.n_concurrent_trials)
324
+ orchParts.push(`${orch.n_concurrent_trials} concurrent`);
325
+ if (orch.n_attempts)
326
+ orchParts.push(`${orch.n_attempts} retries`);
327
+ if (orch.timeout_multiplier && orch.timeout_multiplier !== 1) {
328
+ orchParts.push(`${orch.timeout_multiplier}x timeout`);
329
+ }
330
+ if (orchParts.length > 0) {
331
+ specFields.push({
332
+ label: "Orchestrator",
333
+ value: orchParts.join(", "),
334
+ });
335
+ }
336
+ }
337
+ if (specFields.length > 0) {
338
+ detailSections.push({
339
+ title: "Job Configuration",
340
+ icon: figures.circleFilled,
341
+ color: colors.secondary,
342
+ fields: specFields,
343
+ });
344
+ }
345
+ }
346
+ // Job Source section
347
+ if (job.job_source) {
348
+ const source = job.job_source;
349
+ const sourceFields = [];
350
+ if ("type" in source) {
351
+ sourceFields.push({
352
+ label: "Source Type",
353
+ value: source.type,
354
+ });
355
+ }
356
+ if ("benchmark_id" in source && source.benchmark_id) {
357
+ sourceFields.push({
358
+ label: "Benchmark ID",
359
+ value: _jsx(Text, { color: colors.idColor, children: source.benchmark_id }),
360
+ action: {
361
+ type: "navigate",
362
+ screen: "benchmark-detail",
363
+ params: { benchmarkId: source.benchmark_id },
364
+ hint: "View Benchmark",
365
+ },
366
+ });
367
+ }
368
+ if (sourceFields.length > 0) {
369
+ detailSections.push({
370
+ title: "Job Source",
371
+ icon: figures.info,
372
+ color: colors.textDim,
373
+ fields: sourceFields,
374
+ });
375
+ }
376
+ }
377
+ // Collect benchmark run IDs for operations
378
+ const benchmarkRunIds = [];
379
+ if (job.benchmark_outcomes) {
380
+ job.benchmark_outcomes.forEach((outcome) => {
381
+ // Use fetched run name from state, fallback to run ID
382
+ const runName = runNames.get(outcome.benchmark_run_id) || outcome.benchmark_run_id;
383
+ benchmarkRunIds.push({
384
+ id: outcome.benchmark_run_id,
385
+ name: runName,
386
+ });
387
+ });
388
+ }
389
+ if (job.in_progress_runs) {
390
+ job.in_progress_runs.forEach((run) => {
391
+ // Avoid duplicates
392
+ if (!benchmarkRunIds.find((r) => r.id === run.benchmark_run_id)) {
393
+ // Use fetched run name from state, fallback to run ID
394
+ const runName = runNames.get(run.benchmark_run_id) || run.benchmark_run_id;
395
+ benchmarkRunIds.push({ id: run.benchmark_run_id, name: runName });
396
+ }
397
+ });
398
+ }
399
+ // Operations available for benchmark jobs
400
+ const operations = [];
401
+ // Add "View Run" operations for each benchmark run (limit to first 9 for shortcuts)
402
+ benchmarkRunIds.slice(0, 9).forEach((run, idx) => {
403
+ operations.push({
404
+ key: `view-run-${idx}`,
405
+ label: `View Run: ${run.name}`,
406
+ color: colors.info,
407
+ icon: figures.arrowRight,
408
+ shortcut: String(idx + 1),
409
+ });
410
+ });
411
+ // Always add clone job option
412
+ operations.push({
413
+ key: "clone-job",
414
+ label: "Clone Job",
415
+ color: colors.success,
416
+ icon: figures.play,
417
+ shortcut: "c",
418
+ });
419
+ // Handle operation selection
420
+ const handleOperation = async (operation, resource) => {
421
+ if (operation.startsWith("view-run-")) {
422
+ const idx = parseInt(operation.replace("view-run-", ""), 10);
423
+ if (benchmarkRunIds[idx]) {
424
+ navigate("benchmark-run-detail", {
425
+ benchmarkRunId: benchmarkRunIds[idx].id,
426
+ });
427
+ }
428
+ }
429
+ else if (operation === "clone-job") {
430
+ // Pass job data for cloning
431
+ const cloneParams = {
432
+ cloneFromJobId: resource.id,
433
+ cloneJobName: resource.name,
434
+ };
435
+ // Determine source type and extract IDs
436
+ if (resource.job_spec) {
437
+ const spec = resource.job_spec;
438
+ // Check if it's a scenarios spec (has scenario_ids array)
439
+ if (spec.scenario_ids && Array.isArray(spec.scenario_ids)) {
440
+ cloneParams.cloneSourceType = "scenarios";
441
+ cloneParams.initialScenarioIds = spec.scenario_ids.join(",");
442
+ }
443
+ // Check if it's a benchmark spec (has benchmark_id)
444
+ else if (spec.benchmark_id) {
445
+ cloneParams.cloneSourceType = "benchmark";
446
+ cloneParams.initialBenchmarkIds = spec.benchmark_id;
447
+ }
448
+ // Fallback: check job_source
449
+ else if (resource.job_source) {
450
+ const source = resource.job_source;
451
+ if (source.scenario_ids && Array.isArray(source.scenario_ids)) {
452
+ cloneParams.cloneSourceType = "scenarios";
453
+ cloneParams.initialScenarioIds = source.scenario_ids.join(",");
454
+ }
455
+ else if (source.benchmark_id) {
456
+ cloneParams.cloneSourceType = "benchmark";
457
+ cloneParams.initialBenchmarkIds = source.benchmark_id;
458
+ }
459
+ }
460
+ }
461
+ // Extract agent configs - both full configs and legacy fields
462
+ if (resource.job_spec?.agent_configs) {
463
+ const agentConfigs = resource.job_spec.agent_configs.map((a) => ({
464
+ agentId: a.agent_id,
465
+ name: a.name,
466
+ modelName: a.model_name,
467
+ timeoutSeconds: a.timeout_seconds,
468
+ kwargs: a.kwargs,
469
+ environmentVariables: a.agent_environment?.environment_variables,
470
+ secrets: a.agent_environment?.secrets,
471
+ }));
472
+ cloneParams.cloneAgentConfigs = JSON.stringify(agentConfigs);
473
+ // Also extract legacy fields for form initialization
474
+ cloneParams.cloneAgentIds = resource.job_spec.agent_configs
475
+ .map((a) => a.agent_id)
476
+ .join(",");
477
+ cloneParams.cloneAgentNames = resource.job_spec.agent_configs
478
+ .map((a) => a.name)
479
+ .join(",");
480
+ }
481
+ // Extract orchestrator config
482
+ if (resource.job_spec?.orchestrator_config) {
483
+ const orch = resource.job_spec.orchestrator_config;
484
+ cloneParams.cloneOrchestratorConfig = JSON.stringify({
485
+ nAttempts: orch.n_attempts,
486
+ nConcurrentTrials: orch.n_concurrent_trials,
487
+ quiet: orch.quiet,
488
+ timeoutMultiplier: orch.timeout_multiplier,
489
+ });
490
+ }
491
+ navigate("benchmark-job-create", cloneParams);
492
+ }
493
+ };
494
+ // Build detailed info lines for full details view
495
+ const buildDetailLines = (j) => {
496
+ const lines = [];
497
+ // Core Information
498
+ lines.push(_jsx(Text, { color: colors.warning, bold: true, children: "Benchmark Job Details" }, "core-title"));
499
+ lines.push(_jsxs(Text, { color: colors.idColor, children: [" ", "ID: ", j.id] }, "core-id"));
500
+ lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Name: ", j.name || "(none)"] }, "core-name"));
501
+ lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Status: ", j.state] }, "core-status"));
502
+ if (j.create_time_ms) {
503
+ lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Created: ", new Date(j.create_time_ms).toLocaleString()] }, "core-created"));
504
+ }
505
+ if (j.failure_reason) {
506
+ lines.push(_jsxs(Text, { color: colors.error, children: [" ", "Failure: ", j.failure_reason] }, "core-failure"));
507
+ }
508
+ lines.push(_jsx(Text, { children: " " }, "core-space"));
509
+ // Benchmark Runs - unified view
510
+ lines.push(_jsx(Text, { color: colors.primary, bold: true, children: "Benchmark Runs" }, "runs-title"));
511
+ // Completed runs from benchmark_outcomes
512
+ if (j.benchmark_outcomes && j.benchmark_outcomes.length > 0) {
513
+ j.benchmark_outcomes.forEach((outcome, idx) => {
514
+ const scoreStr = outcome.average_score !== undefined && outcome.average_score !== null
515
+ ? `Score: ${outcome.average_score.toFixed(2)}`
516
+ : "No score";
517
+ const statsStr = `${outcome.n_completed}✓ ${outcome.n_failed}✗ ${outcome.n_timeout}⏱`;
518
+ const durationStr = outcome.duration_ms
519
+ ? formatDuration(outcome.duration_ms)
520
+ : "";
521
+ const statusIcon = outcome.n_failed > 0 ? figures.cross : figures.tick;
522
+ const statusColor = outcome.n_failed > 0 ? colors.error : colors.success;
523
+ lines.push(_jsxs(Text, { children: [" ", _jsx(Text, { color: statusColor, children: statusIcon }), _jsxs(Text, { color: colors.info, children: [" ", outcome.agent_name || `Agent ${idx + 1}`] }), _jsxs(Text, { dimColor: true, children: [": ", scoreStr, " (", statsStr, ") ", durationStr] })] }, `outcome-${idx}`));
524
+ lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Run ID: ", outcome.benchmark_run_id] }, `outcome-${idx}-id`));
525
+ // Show scenario outcomes
526
+ if (outcome.scenario_outcomes && outcome.scenario_outcomes.length > 0) {
527
+ outcome.scenario_outcomes.forEach((scenario, sIdx) => {
528
+ const scenarioScore = scenario.score !== undefined && scenario.score !== null
529
+ ? scenario.score.toFixed(2)
530
+ : "-";
531
+ const scenarioDur = scenario.duration_ms
532
+ ? formatDuration(scenario.duration_ms)
533
+ : "";
534
+ const scenarioIcon = scenario.state === "COMPLETED"
535
+ ? figures.tick
536
+ : scenario.state === "FAILED"
537
+ ? figures.cross
538
+ : figures.warning;
539
+ const scenarioColor = scenario.state === "COMPLETED"
540
+ ? colors.success
541
+ : scenario.state === "FAILED"
542
+ ? colors.error
543
+ : colors.warning;
544
+ lines.push(_jsxs(Text, { children: [" ", _jsx(Text, { color: scenarioColor, children: scenarioIcon }), _jsxs(Text, { dimColor: true, children: [" ", scenario.scenario_name, ": ", scenario.state, " (score:", " ", scenarioScore, ") ", scenarioDur] })] }, `outcome-${idx}-scenario-${sIdx}`));
545
+ if (scenario.failure_reason) {
546
+ lines.push(_jsxs(Text, { color: colors.error, children: [" ", scenario.failure_reason.exception_type, ":", " ", scenario.failure_reason.exception_message] }, `outcome-${idx}-scenario-${sIdx}-fail`));
547
+ }
548
+ });
549
+ }
550
+ });
551
+ }
552
+ // In-progress runs
553
+ if (j.in_progress_runs && j.in_progress_runs.length > 0) {
554
+ j.in_progress_runs.forEach((run, idx) => {
555
+ let agentName = "Unknown Agent";
556
+ if (run.agent_config && "name" in run.agent_config) {
557
+ agentName = run.agent_config.name;
558
+ }
559
+ const durationStr = run.duration_ms
560
+ ? formatDuration(run.duration_ms)
561
+ : "";
562
+ lines.push(_jsxs(Text, { children: [" ", _jsx(Text, { color: colors.info, children: figures.play }), _jsxs(Text, { color: colors.info, children: [" ", agentName] }), _jsxs(Text, { dimColor: true, children: [": Running ", durationStr] })] }, `run-${idx}`));
563
+ lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Run ID: ", run.benchmark_run_id] }, `run-${idx}-id`));
564
+ });
565
+ }
566
+ // Pending agents
567
+ if (j.job_spec?.agent_configs) {
568
+ const runningOrCompletedAgents = new Set();
569
+ j.benchmark_outcomes?.forEach((o) => runningOrCompletedAgents.add(o.agent_name));
570
+ j.in_progress_runs?.forEach((r) => {
571
+ if (r.agent_config && "name" in r.agent_config) {
572
+ runningOrCompletedAgents.add(r.agent_config.name);
573
+ }
574
+ });
575
+ j.job_spec.agent_configs.forEach((agent, idx) => {
576
+ if (!runningOrCompletedAgents.has(agent.name)) {
577
+ lines.push(_jsxs(Text, { children: [" ", _jsx(Text, { color: colors.textDim, children: figures.circleDotted }), _jsxs(Text, { dimColor: true, children: [" ", agent.name, ": Pending"] })] }, `pending-${idx}`));
578
+ }
579
+ });
580
+ }
581
+ lines.push(_jsx(Text, { children: " " }, "runs-space"));
582
+ // Job Configuration
583
+ if (j.job_spec) {
584
+ lines.push(_jsx(Text, { color: colors.secondary, bold: true, children: "Job Configuration" }, "spec-title"));
585
+ if (j.job_spec.scenario_ids) {
586
+ lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Scenarios: ", j.job_spec.scenario_ids.length] }, "spec-scenarios"));
587
+ }
588
+ if (j.job_spec.orchestrator_config) {
589
+ const orch = j.job_spec.orchestrator_config;
590
+ const orchInfo = [];
591
+ if (orch.n_concurrent_trials)
592
+ orchInfo.push(`${orch.n_concurrent_trials} concurrent`);
593
+ if (orch.n_attempts)
594
+ orchInfo.push(`${orch.n_attempts} retries`);
595
+ if (orchInfo.length > 0) {
596
+ lines.push(_jsxs(Text, { dimColor: true, children: [" ", "Orchestrator: ", orchInfo.join(", ")] }, "spec-orch"));
597
+ }
598
+ }
599
+ lines.push(_jsx(Text, { children: " " }, "spec-space"));
600
+ }
601
+ // Raw JSON
602
+ lines.push(_jsx(Text, { color: colors.warning, bold: true, children: "Raw JSON" }, "json-title"));
603
+ const jsonLines = JSON.stringify(j, null, 2).split("\n");
604
+ jsonLines.forEach((line, idx) => {
605
+ lines.push(_jsxs(Text, { dimColor: true, children: [" ", line] }, `json-${idx}`));
606
+ });
607
+ return lines;
608
+ };
609
+ // Check if job is still in progress for polling
610
+ const isRunning = job.state === "running" ||
611
+ job.state === "queued" ||
612
+ job.state === "initializing";
613
+ return (_jsx(ResourceDetailPage, { resource: job, resourceType: "Benchmark Jobs", getDisplayName: (j) => j.name || j.id, getId: (j) => j.id, getStatus: (j) => j.state, detailSections: detailSections, operations: operations, onOperation: handleOperation, onBack: goBack, buildDetailLines: buildDetailLines, pollResource: isRunning ? pollJob : undefined, breadcrumbPrefix: [{ label: "Home" }] }));
614
+ }