@workbench-ai/workbench-contract 0.0.67 → 0.0.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,683 +1,317 @@
1
1
  export type Json = null | boolean | number | string | Json[] | {
2
2
  [key: string]: Json;
3
3
  };
4
- export declare function isReservedWorkbenchAdapterAuthEnvName(name: string): boolean;
5
- export declare function assertWorkbenchAdapterAuthEnvNameAllowed(name: string): void;
6
- export interface RemoteWorkbenchProject {
7
- id: string;
8
- ownerUserId: string;
9
- ownerUsername: string;
10
- visibility: "private" | "public";
11
- createdAt: string;
12
- updatedAt: string;
13
- activeEnvironmentVersionId: string;
14
- currentSpecVersionId: string;
15
- activeCandidateId?: string | null;
16
- sourceFingerprint?: string;
17
- starCount: number;
18
- }
19
- export interface RemoteWorkbenchProjectSummary {
20
- id: string;
21
- ownerUsername: string;
22
- name: string;
23
- description: string;
24
- visibility: "private" | "public";
25
- updatedAt: string;
26
- currentSpecVersionId: string;
27
- activeEnvironmentVersionId: string;
28
- activeCandidateId?: string | null;
29
- candidateCount: number;
30
- evaluationCount: number;
31
- runCount: number;
32
- starCount: number;
33
- viewerHasStarred?: boolean;
34
- }
35
- export interface WorkbenchSpecValidation {
36
- ok: boolean;
37
- errors: string[];
38
- warnings: string[];
39
- }
40
- export interface RemoteWorkbenchSpecVersion {
41
- id: string;
42
- projectId: string;
43
- ordinal: number;
44
- sourceYaml: string;
45
- createdAt: string;
46
- updatedAt: string;
47
- validation: WorkbenchSpecValidation;
48
- }
49
- export interface RemoteWorkbenchEnvironment {
50
- id: string;
51
- name: string;
52
- description: string;
53
- currentVersionId: string;
54
- builtIn: boolean;
55
- createdAt: string;
56
- updatedAt: string;
57
- }
58
- export interface RemoteWorkbenchEnvironmentVersion {
59
- id: string;
60
- environmentId: string;
61
- name: string;
62
- spec: RemoteWorkbenchEnvironmentSpec;
63
- imageRef: string;
64
- sourceHash: string;
65
- sourceType: "builtin" | "dockerfile";
66
- build?: {
67
- dockerfileRef?: BlobObjectRef;
68
- logRef?: BlobObjectRef;
69
- error?: string;
70
- startedAt?: string;
71
- finishedAt?: string;
72
- };
73
- status: "ready" | "building" | "failed";
74
- createdAt: string;
75
- updatedAt: string;
76
- }
77
- export interface RemoteWorkbenchEnvironmentSpec {
78
- base: string;
79
- resources: {
80
- cpu: number;
81
- memoryGb: number;
82
- diskGb: number;
83
- timeoutMinutes: number;
84
- };
85
- network: "off" | "on";
86
- }
87
- export interface BlobObjectRef {
88
- bucket: string;
89
- key: string;
90
- byteLength: number;
91
- sha256: string;
92
- }
93
- export type RemoteWorkbenchSnapshotKind = "candidate" | "engineResolve" | "adapters" | "runtime";
94
- export type WorkspaceWriteEncoding = "utf8" | "base64";
95
4
  export interface SurfaceSnapshotFile {
96
5
  path: string;
97
- kind: "text" | "binary";
98
- encoding: WorkspaceWriteEncoding;
6
+ kind?: "text" | "binary";
7
+ encoding?: "utf8" | "base64";
99
8
  content: string;
100
- executable: boolean;
101
- contentRedacted?: boolean;
102
- }
103
- export interface WorkbenchEngineCaseFiles {
104
- public?: SurfaceSnapshotFile[];
105
- private?: SurfaceSnapshotFile[];
106
- source?: SurfaceSnapshotFile[];
107
- }
108
- export interface SurfaceSnapshot {
109
- files: SurfaceSnapshotFile[];
9
+ executable?: boolean;
110
10
  }
111
- export interface RemoteWorkbenchFileInput {
11
+ export type WorkbenchInspectionFileOwnerKind = "version" | "trace" | "artifact";
12
+ export interface WorkbenchInspectionFileContent {
112
13
  path: string;
113
- content: string;
114
- encoding?: WorkspaceWriteEncoding;
14
+ kind?: SurfaceSnapshotFile["kind"];
15
+ encoding?: SurfaceSnapshotFile["encoding"];
115
16
  executable?: boolean;
17
+ content?: string;
18
+ unavailableReason?: string;
116
19
  }
117
- export interface EngineResolveBinding {
118
- engine: string;
119
- resolver: {
120
- use: string;
121
- withFingerprint: string;
122
- };
20
+ export declare function workbenchInspectionFileContentUnavailableReason(file: Pick<SurfaceSnapshotFile, "kind" | "encoding">): string | null;
21
+ export declare function workbenchInspectionFileContent(file: SurfaceSnapshotFile): WorkbenchInspectionFileContent;
22
+ export declare function workbenchInspectionFileManifest(file: SurfaceSnapshotFile): SurfaceSnapshotFile;
23
+ export interface WorkbenchAgent {
24
+ name: string;
25
+ adapter: string;
26
+ model?: string;
27
+ config: Record<string, Json>;
123
28
  }
124
- export interface RemoteWorkbenchSnapshotBase {
29
+ export type WorkbenchSkillSourceKind = "local" | "remote";
30
+ export interface WorkbenchSkillInclude {
31
+ name: string;
32
+ kind: WorkbenchSkillSourceKind;
33
+ path?: string;
34
+ from?: string;
35
+ ref?: string;
36
+ resolvedRef?: string;
37
+ hash?: string;
38
+ files?: SurfaceSnapshotFile[];
39
+ }
40
+ export interface WorkbenchSkillSource {
41
+ name: string;
42
+ kind: WorkbenchSkillSourceKind;
43
+ path?: string;
44
+ from?: string;
45
+ ref?: string;
46
+ resolvedRef?: string;
47
+ hash?: string;
48
+ includes?: WorkbenchSkillInclude[];
49
+ }
50
+ export interface WorkbenchSkillBundleSnapshot {
51
+ hash: string;
52
+ skillName: string;
53
+ entryName: string;
54
+ source: WorkbenchSkillSource;
125
55
  files: SurfaceSnapshotFile[];
126
- updatedAt: string;
127
- }
128
- export interface RemoteWorkbenchEngineResolveSnapshot extends RemoteWorkbenchSnapshotBase {
129
- kind: "engineResolve";
130
- engineResolveBinding: EngineResolveBinding;
131
- }
132
- export interface RemoteWorkbenchStandardSnapshot extends RemoteWorkbenchSnapshotBase {
133
- kind: Exclude<RemoteWorkbenchSnapshotKind, "engineResolve">;
134
- }
135
- export type RemoteWorkbenchSnapshot = RemoteWorkbenchEngineResolveSnapshot | RemoteWorkbenchStandardSnapshot;
136
- export type CandidateStatus = "running" | "evaluated" | "repair_exhausted" | "eval_error" | "agent_error";
137
- export interface MetricStats {
138
- count: number;
139
- mean: number;
140
- variance: number;
141
- stddev: number;
142
- min: number;
143
- max: number;
144
- }
145
- export type EvalCaseStatus = "completed" | "error";
146
- export type EvalCaseSource = Record<string, Json>;
147
- export interface CandidateCaseCriterionScore {
148
- criterion_id: string;
149
- label: string;
150
- score: number;
151
- pass: boolean;
152
- errors?: string[];
153
- rationale?: string;
56
+ includedSkills: WorkbenchSkillInclude[];
57
+ createdAt: string;
154
58
  }
155
- export interface EvalCaseResult {
59
+ export interface WorkbenchVersion {
156
60
  id: string;
157
- label?: string;
158
- split?: string;
159
- status?: EvalCaseStatus;
160
- durationMs?: number;
161
- metrics: Record<string, number>;
162
- source?: EvalCaseSource;
163
- feedback?: Json;
164
- criteria?: CandidateCaseCriterionScore[];
165
- }
166
- export type ExecutionRole = "improver" | "runner" | "engine";
167
- export type ExecutionUsageCostSource = "provider" | "estimated" | "mixed";
168
- export interface ExecutionUsage {
169
- provider?: string;
170
- model?: string;
171
- inputTokens?: number;
172
- uncachedInputTokens?: number;
173
- cachedInputTokens?: number;
174
- cacheCreationInputTokens?: number;
175
- cacheReadInputTokens?: number;
176
- outputTokens?: number;
177
- reasoningOutputTokens?: number;
178
- totalTokens?: number;
179
- costUsd?: number;
180
- costSource?: ExecutionUsageCostSource;
181
- pricingSource?: string;
182
- }
183
- export interface UsageSummary {
184
- total?: ExecutionUsage;
185
- improver?: ExecutionUsage;
186
- runner?: ExecutionUsage;
187
- engine?: ExecutionUsage;
61
+ hash: string;
62
+ message: string;
63
+ parentIds: string[];
64
+ createdAt: string;
65
+ files: SurfaceSnapshotFile[];
188
66
  }
189
- export interface EvaluationCandidateSummary {
190
- id: string;
191
- kind: "candidate";
192
- label?: string;
67
+ export interface WorkbenchEvalSnapshot {
68
+ hash: string;
69
+ files: SurfaceSnapshotFile[];
70
+ caseCount: number;
193
71
  }
194
- export type EvaluationSampleStatus = "planned" | "running" | "completed" | "error";
195
- export type EvaluationStatus = EvaluationSampleStatus | "partial";
196
- export interface EvaluationSampleRecord {
72
+ export type WorkbenchRunKind = "eval" | "improve" | "compare" | "retry";
73
+ export type WorkbenchRunStatus = "running" | "succeeded" | "failed" | "canceled";
74
+ export type WorkbenchJobStatus = "queued" | "running" | "succeeded" | "failed" | "canceled";
75
+ export type WorkbenchArtifactKind = "file" | "directory" | "log" | "scorecard";
76
+ export interface WorkbenchRun {
197
77
  id: string;
198
- index: number;
199
- candidate: EvaluationCandidateSummary;
200
- status: EvaluationSampleStatus;
201
- startedAt?: string;
78
+ kind: WorkbenchRunKind;
79
+ versionId: string;
80
+ skillName: string;
81
+ skillBundleHash: string;
82
+ evalHash: string;
83
+ agentName: string;
84
+ agentHash: string;
85
+ status: WorkbenchRunStatus;
86
+ score?: number;
87
+ costUsd?: number;
88
+ latencyMs?: number;
89
+ jobIds?: string[];
90
+ traceIds: string[];
91
+ createdAt: string;
202
92
  finishedAt?: string;
203
- durationMs?: number;
204
- metrics?: Record<string, number>;
205
- usage?: UsageSummary;
93
+ parentRunId?: string;
94
+ outputVersionId?: string;
206
95
  error?: string;
207
- cases?: EvalCaseResult[];
208
- feedback?: Json;
209
- }
210
- export interface EvaluationCaseStats {
211
- id: string;
212
- label?: string;
213
- split?: string;
214
- status?: EvalCaseStatus;
215
- sampleCount: number;
216
- metrics: Record<string, MetricStats>;
217
- durationMs?: MetricStats;
218
96
  }
219
- export interface EvaluationUsageStats {
220
- total?: ExecutionUsageStats;
221
- improver?: ExecutionUsageStats;
222
- runner?: ExecutionUsageStats;
223
- engine?: ExecutionUsageStats;
224
- }
225
- export interface ExecutionUsageStats {
226
- inputTokens?: MetricStats;
227
- uncachedInputTokens?: MetricStats;
228
- cachedInputTokens?: MetricStats;
229
- cacheCreationInputTokens?: MetricStats;
230
- cacheReadInputTokens?: MetricStats;
231
- outputTokens?: MetricStats;
232
- reasoningOutputTokens?: MetricStats;
233
- totalTokens?: MetricStats;
234
- costUsd?: MetricStats;
97
+ export type WorkbenchAutomationReadinessLevel = "insufficient" | "assist" | "review" | "automate";
98
+ export interface WorkbenchAutomationReadiness {
99
+ level: WorkbenchAutomationReadinessLevel;
100
+ label: string;
101
+ reason: string;
102
+ runId?: string;
103
+ score?: number;
104
+ caseCount?: number;
105
+ jobCount?: number;
235
106
  }
236
- export interface EvaluationRecord {
237
- candidate: EvaluationCandidateSummary;
238
- status: EvaluationStatus;
239
- sampleCount: number;
240
- completedSampleCount: number;
241
- errorSampleCount: number;
107
+ export interface WorkbenchJob {
108
+ id: string;
109
+ runId: string;
110
+ kind: WorkbenchRunKind;
111
+ versionId: string;
112
+ skillName: string;
113
+ skillBundleHash: string;
114
+ evalHash: string;
115
+ agentName: string;
116
+ agentHash: string;
117
+ caseId: string;
118
+ sample: number;
119
+ status: WorkbenchJobStatus;
120
+ score?: number;
121
+ command?: string;
122
+ dockerImage?: string;
123
+ exitCode?: number;
124
+ artifactIds: string[];
125
+ traceIds: string[];
126
+ createdAt: string;
242
127
  startedAt?: string;
243
128
  finishedAt?: string;
244
- metrics?: Record<string, MetricStats>;
245
- durationMs?: MetricStats;
246
- usage?: EvaluationUsageStats;
247
- cases?: EvaluationCaseStats[];
248
- samples: EvaluationSampleRecord[];
129
+ durationMs?: number;
249
130
  error?: string;
250
131
  }
251
- export interface EvaluationSummary {
132
+ export interface WorkbenchArtifact {
252
133
  id: string;
253
134
  runId: string;
254
- benchmarkFingerprint: string;
255
- candidateFingerprint: string;
256
- candidateId: string;
257
- candidateName?: string;
258
- candidateVersion: number;
259
- candidateRunId?: string;
260
- candidateRunName?: string;
135
+ jobId: string;
136
+ kind: WorkbenchArtifactKind;
137
+ path: string;
261
138
  createdAt: string;
262
- updatedAt: string;
263
- status: EvaluationStatus;
264
- sampleCount: number;
265
- completedSampleCount: number;
266
- errorSampleCount: number;
267
- metrics?: Record<string, MetricStats>;
268
- selectionMetric?: string;
269
- selectionLabel?: string;
270
- selectionScore?: MetricStats;
271
- durationMs?: MetricStats;
272
- usage?: EvaluationUsageStats;
273
- error?: string;
274
- }
275
- export interface EvaluationScorecard extends EvaluationSummary {
276
- evaluation: EvaluationRecord;
139
+ files: SurfaceSnapshotFile[];
277
140
  }
278
- export interface WorkbenchEvaluationMetricDescriptor {
141
+ export interface WorkbenchTrace {
279
142
  id: string;
280
- label: string;
281
- direction: "higher" | "lower";
282
- kind: "number" | "duration_ms" | "currency_usd";
283
- group: "metric" | "execution" | "usage" | "other";
284
- primary: boolean;
285
- semanticRole?: "performance" | "speed" | "cost";
286
- }
287
- export interface WorkbenchEvaluationComparisonRow {
288
- evaluationId: string;
289
143
  runId: string;
290
- candidateId: string;
291
- candidateLabel: string;
292
- configurationLabel: string;
293
- status: EvaluationSummary["status"];
294
- score: number | null;
295
- metrics: Record<string, number>;
144
+ jobId?: string;
145
+ versionId: string;
146
+ skillName: string;
147
+ skillBundleHash: string;
148
+ agentName: string;
296
149
  createdAt: string;
297
- updatedAt: string;
298
- error?: string;
150
+ request: Json;
151
+ result: Json;
152
+ files: SurfaceSnapshotFile[];
299
153
  }
300
- export interface WorkbenchCandidateEvaluationRollup {
301
- candidateId: string;
302
- candidateLabel: string;
303
- evaluationCount: number;
304
- completeEvaluationCount: number;
305
- scoredEvaluationCount: number;
306
- bestEvaluationId: string | null;
307
- bestScore: number | null;
308
- meanScore: number | null;
309
- }
310
- export interface WorkbenchEvaluationComparison {
311
- evaluations: EvaluationSummary[];
312
- rows: WorkbenchEvaluationComparisonRow[];
313
- candidates: WorkbenchCandidateEvaluationRollup[];
314
- metrics: WorkbenchEvaluationMetricDescriptor[];
315
- }
316
- export interface CandidateSummary {
317
- id: string;
318
- name?: string;
319
- version: number;
320
- ordinal: number;
321
- benchmarkFingerprint: string;
322
- candidateFingerprint: string;
323
- ownerUserId?: string;
324
- ownerUsername?: string;
325
- visibility?: "private" | "public";
154
+ export interface WorkbenchLineageEdge {
155
+ parentId: string;
156
+ childId: string;
157
+ runId?: string;
158
+ reason: "version" | "improve" | "switch" | "publish";
326
159
  createdAt: string;
327
- baseId?: string;
328
- referenceIds: string[];
329
- status: CandidateStatus;
330
- fileChanges: string[];
331
- usage?: UsageSummary;
332
- }
333
- export interface CandidateRecord extends CandidateSummary {
334
- eval?: EvaluationRecord;
335
- prompt?: string;
336
- meta?: Json;
160
+ message?: string;
337
161
  }
338
- export interface CandidateLineageNode {
339
- id: string;
340
- active: boolean;
341
- summary: CandidateSummary;
162
+ export interface WorkbenchRemote {
163
+ name: string;
164
+ url: string;
165
+ type: "workbench";
342
166
  }
343
- export interface CandidateLineageEdge {
344
- id: string;
345
- kind: "anchor";
346
- sourceId: string;
347
- targetId: string;
348
- }
349
- export interface CandidateLineageGraph {
350
- activeId: string | null;
351
- nodes: CandidateLineageNode[];
352
- edges: CandidateLineageEdge[];
353
- }
354
- export declare function buildCandidateLineage(args: {
355
- summaries: readonly CandidateSummary[];
356
- activeId: string | null;
357
- }): CandidateLineageGraph;
358
- export declare function buildWorkbenchEvaluationComparison(evaluations: readonly EvaluationSummary[]): WorkbenchEvaluationComparison;
359
- export declare function buildWorkbenchEvaluationMetricDescriptors(evaluations: readonly EvaluationSummary[]): WorkbenchEvaluationMetricDescriptor[];
360
- export declare function readEvaluationScore(evaluation: EvaluationSummary): number | null;
361
- export declare function isCompleteEvaluationSummary(evaluation: Pick<EvaluationSummary, "status" | "sampleCount" | "completedSampleCount" | "errorSampleCount">): boolean;
362
- export declare function formatEvaluationConfigurationLabel(evaluation: Pick<EvaluationSummary, "candidateRunName" | "candidateRunId">): string;
363
- export type CandidatePreviewMode = "diff" | "raw" | "rendered";
364
- export type CandidatePreviewKind = "text" | "markdown" | "table" | "spreadsheet" | "image" | "pdf" | "unsupported";
365
- export type CandidatePreviewSourceEncoding = "utf8" | "base64";
366
- export type CandidateFileStatus = "added" | "modified" | "unchanged";
367
- export interface CandidateFileSummary {
368
- path: string;
369
- old_path: string | null;
370
- status: CandidateFileStatus;
371
- mime_type: string | null;
372
- preview_kind: CandidatePreviewKind;
373
- additions: number;
374
- deletions: number;
375
- }
376
- export interface CandidateFilePreviewSource {
377
- content: string;
378
- encoding: CandidatePreviewSourceEncoding;
167
+ export interface WorkbenchRefs {
168
+ current?: string;
169
+ [name: string]: string | undefined;
379
170
  }
380
- export interface CandidateFilePreview {
171
+ export interface WorkbenchProjectState {
172
+ schema: "workbench.skill.state.v1";
173
+ root: string;
174
+ currentVersionId?: string;
175
+ refs: WorkbenchRefs;
176
+ remotes: Record<string, WorkbenchRemote>;
177
+ defaultSkill?: string;
178
+ defaultAgent?: string;
179
+ versions: WorkbenchVersion[];
180
+ skillSources: WorkbenchSkillSource[];
181
+ skillBundles: WorkbenchSkillBundleSnapshot[];
182
+ evals: WorkbenchEvalSnapshot[];
183
+ agents: WorkbenchAgent[];
184
+ runs: WorkbenchRun[];
185
+ jobs: WorkbenchJob[];
186
+ traces: WorkbenchTrace[];
187
+ artifacts: WorkbenchArtifact[];
188
+ lineage: WorkbenchLineageEdge[];
189
+ }
190
+ export interface WorkbenchStatus {
191
+ root: string;
192
+ initialized: boolean;
193
+ currentSkillHash?: string;
194
+ currentVersionId?: string;
195
+ hasUnversionedChanges: boolean;
196
+ defaultSkill?: string;
197
+ defaultAgent?: string;
198
+ versionCount: number;
199
+ skillCount: number;
200
+ agentCount: number;
201
+ runCount: number;
202
+ remoteCount: number;
203
+ pendingSyncCount?: number;
204
+ lastScore?: number;
205
+ automationReadiness?: WorkbenchAutomationReadiness;
206
+ }
207
+ export interface WorkbenchComparisonCell {
208
+ versionId: string;
209
+ skillName: string;
210
+ skillBundleHash: string;
211
+ evalHash: string;
212
+ agentName: string;
213
+ runId?: string;
214
+ score?: number;
215
+ costUsd?: number;
216
+ latencyMs?: number;
217
+ automationReadiness?: WorkbenchAutomationReadiness;
218
+ }
219
+ export interface WorkbenchComparison {
220
+ evalHash?: string;
221
+ versions: WorkbenchVersion[];
222
+ skills: WorkbenchSkillBundleSnapshot[];
223
+ agents: WorkbenchAgent[];
224
+ cells: WorkbenchComparisonCell[];
225
+ }
226
+ export interface WorkbenchInspectionSnapshot {
227
+ root: string;
228
+ status: WorkbenchStatus;
229
+ versions: WorkbenchVersion[];
230
+ skillSources: WorkbenchSkillSource[];
231
+ skillBundles: WorkbenchSkillBundleSnapshot[];
232
+ agents: WorkbenchAgent[];
233
+ runs: WorkbenchRun[];
234
+ jobs: WorkbenchJob[];
235
+ traces: WorkbenchTrace[];
236
+ artifacts: WorkbenchArtifact[];
237
+ lineage: WorkbenchLineageEdge[];
238
+ remotes: WorkbenchRemote[];
239
+ refs: WorkbenchRefs;
240
+ publication?: WorkbenchPublication;
241
+ }
242
+ export interface WorkbenchPublication {
243
+ versionId: string;
244
+ installUrl: string;
245
+ pinnedInstallUrl: string;
246
+ }
247
+ export interface WorkbenchObjectPack {
248
+ schema: "workbench.object-pack.v1";
249
+ createdAt: string;
250
+ refs: WorkbenchRefs;
251
+ defaultSkill?: string;
252
+ defaultAgent?: string;
253
+ versions: WorkbenchVersion[];
254
+ skillSources: WorkbenchSkillSource[];
255
+ skillBundles: WorkbenchSkillBundleSnapshot[];
256
+ evals: WorkbenchEvalSnapshot[];
257
+ agents: WorkbenchAgent[];
258
+ runs: WorkbenchRun[];
259
+ jobs: WorkbenchJob[];
260
+ traces: WorkbenchTrace[];
261
+ artifacts: WorkbenchArtifact[];
262
+ lineage: WorkbenchLineageEdge[];
263
+ }
264
+ export interface WorkbenchFilePreview {
381
265
  path: string;
382
- view: CandidatePreviewMode;
383
- mime_type: string | null;
384
- preview_kind: CandidatePreviewKind;
385
- diff: string | null;
386
- source: CandidateFilePreviewSource | null;
387
- rendered_html: string | null;
388
- }
389
- export interface CandidateCaseCriterionResult {
390
- criterion_id: string;
391
- pass: boolean;
392
- score: number;
393
- errors: string[];
394
- rationale?: string;
266
+ source?: SurfaceSnapshotFile;
267
+ renderedText?: string;
268
+ diff?: string;
395
269
  }
396
- export interface CandidateCaseExecutionRef {
397
- runId: string;
398
- kind: string;
399
- role: WorkbenchExecutionEventRole;
400
- status: RemoteWorkbenchJobStatus;
401
- jobIds: string[];
402
- executionIds: string[];
403
- createdAt?: string;
404
- startedAt?: string;
405
- finishedAt?: string;
406
- durationMs?: number;
407
- caseId?: string;
408
- sampleIndex?: number;
409
- attemptIndex?: number;
410
- }
411
- export interface CandidateCaseReview {
412
- candidateId: string;
413
- caseId: string;
414
- caseLabel: string;
415
- sampleId?: string;
416
- sampleIndex?: number;
417
- status?: EvalCaseStatus | RemoteWorkbenchJobStatus;
418
- metrics: Record<string, number>;
419
- durationMs?: number;
420
- source?: EvalCaseSource;
421
- feedback?: Json;
422
- executions: CandidateCaseExecutionRef[];
423
- criteria_results: CandidateCaseCriterionResult[];
424
- }
425
- export type RunStatus = "queued" | "running" | "finished";
426
- export type RunOutcome = "ok" | "error" | "cancelled";
427
- export type RemoteRunWorkflow = "eval" | "improve";
428
- export interface RunSummary {
429
- id: string;
430
- workflow: RemoteRunWorkflow;
431
- benchmarkFingerprint: string;
432
- status: RunStatus;
433
- candidateId?: string | null;
434
- candidateRunId?: string;
435
- candidateRunName?: string;
436
- startedAt: string;
437
- finishedAt?: string;
438
- durationMs?: number;
439
- improver: string;
440
- engineRun: string;
441
- strategy: string;
442
- optimizeOn?: string;
443
- selectBy?: string;
444
- budget: number;
445
- repairBudget: number;
446
- attemptsRequested: number;
447
- attemptsExecuted: number;
448
- samples: number;
449
- executionFingerprint?: string;
450
- stoppedReason?: "budget_exhausted" | "completed" | "dry_run" | "cancelled";
451
- outcome?: RunOutcome;
452
- error?: string;
453
- activeCandidateId?: string | null;
454
- outputCandidateId?: string | null;
455
- }
456
- export interface WorkbenchRuntimeRun extends RunSummary {
457
- jobCount?: number;
458
- completedJobCount?: number;
459
- failedJobCount?: number;
460
- }
461
- export interface RuntimeEvent {
462
- id: string;
463
- at: string;
464
- type: "run_started" | "job_queued" | "job_started" | "job_progress" | "sandbox_allocated" | "sandbox_stopped" | "candidate_created" | "candidate_evaluated" | "active_changed" | "run_finished";
465
- runId?: string;
466
- jobId?: string;
467
- candidateId?: string;
468
- baseId?: string;
469
- activeId?: string;
470
- status?: CandidateStatus | RemoteWorkbenchJobStatus;
471
- metrics?: Record<string, number>;
472
- detail?: Record<string, Json>;
473
- }
474
- export interface RuntimeSnapshot {
475
- workspaceRoot: string;
476
- activeId: string | null;
477
- currentBenchmarkFingerprint: string | null;
478
- summaries: CandidateSummary[];
479
- evaluations: EvaluationSummary[];
480
- runs: RunSummary[];
481
- }
482
- export interface WorkbenchRuntimeCandidateFiles {
483
- candidateId: string;
270
+ export interface WorkbenchFileSurface {
484
271
  files: SurfaceSnapshotFile[];
272
+ preview: WorkbenchFilePreview | null;
485
273
  }
486
- export interface WorkbenchRuntimeExecutionFiles {
487
- jobId: string;
488
- files: SurfaceSnapshotFile[];
274
+ export interface WorkbenchSpecValidation {
275
+ ok: boolean;
276
+ errors: string[];
277
+ warnings: string[];
489
278
  }
490
- export interface WorkbenchRuntimeBundle {
491
- schema: "workbench.runtime.bundle.v1";
492
- activeId: string | null;
493
- candidates: CandidateRecord[];
494
- candidateFiles: WorkbenchRuntimeCandidateFiles[];
495
- evaluations: EvaluationScorecard[];
496
- runs: WorkbenchRuntimeRun[];
497
- jobs: RemoteWorkbenchJob[];
498
- executionFiles: WorkbenchRuntimeExecutionFiles[];
499
- events: RuntimeEvent[];
500
- }
501
- export interface WorkbenchRuntimeBundleStats {
502
- candidates: number;
503
- candidateFiles: number;
504
- evaluations: number;
505
- runs: number;
506
- jobs: number;
507
- executionFiles: number;
508
- events: number;
509
- activeId: string | null;
510
- }
511
- export interface WorkbenchRuntimeImportResult {
512
- changed: boolean;
513
- stats: WorkbenchRuntimeBundleStats;
514
- }
515
- export interface WorkbenchProjectSourceResources {
516
- cpu?: number;
517
- memoryGb?: number;
518
- diskGb?: number;
519
- timeoutMinutes?: number;
520
- }
521
- export interface WorkbenchProjectStateSource {
522
- source: string;
523
- files: SurfaceSnapshotFile[];
524
- candidateFiles: SurfaceSnapshotFile[];
525
- engineResolveFiles: SurfaceSnapshotFile[];
526
- engineResolveBinding: EngineResolveBinding;
527
- adapterFiles: SurfaceSnapshotFile[];
528
- dockerfile: string;
529
- runtimeDockerfile: string;
530
- runtimeFiles: SurfaceSnapshotFile[];
279
+ export interface RemoteWorkbenchEnvironmentSpec {
280
+ base: string;
281
+ resources: {
282
+ cpu: number;
283
+ memoryGb: number;
284
+ diskGb: number;
285
+ timeoutMinutes: number;
286
+ };
531
287
  network: "off" | "on";
532
- resources: WorkbenchProjectSourceResources;
533
- revisionId?: string;
534
- fingerprint?: string;
535
- }
536
- export interface WorkbenchProjectStateBase {
537
- sourceRevisionId?: string;
538
- sourceFingerprint?: string;
539
- runtimeFingerprint?: string;
540
288
  }
541
- export interface WorkbenchProjectStateRemote {
289
+ export interface RemoteWorkbenchEnvironmentVersion {
542
290
  id: string;
543
- remote: string;
544
- ownerUsername: string;
291
+ environmentId: string;
545
292
  name: string;
546
- visibility: "private" | "public";
547
- }
548
- export interface WorkbenchProjectState {
549
- schema: "workbench.project.state.v1";
550
- project: WorkbenchProjectStateRemote;
551
- base: WorkbenchProjectStateBase;
552
- source: WorkbenchProjectStateSource;
553
- runtime: WorkbenchRuntimeBundle;
554
- }
555
- export interface WorkbenchProjectStateImportResult {
556
- changed: boolean;
557
- source: {
558
- changed: boolean;
559
- revisionId?: string;
560
- fingerprint?: string;
561
- };
562
- runtime: WorkbenchRuntimeImportResult;
563
- state: WorkbenchProjectState;
564
- }
565
- export type WorkbenchRemoteContractSchema = "workbench.remote.capabilities.v1" | "workbench.remote.run.request.v1" | "workbench.remote.job.claim_request.v1" | "workbench.remote.job.claim.v1" | "workbench.remote.job.renewal.v1" | "workbench.remote.job.renewal_result.v1" | "workbench.remote.job.progress.v1" | "workbench.remote.job.completion.v1" | "workbench.remote.job.retry.v1";
566
- export type WorkbenchRemoteProductionSandbox = "firecracker";
567
- export type WorkbenchRemoteLocalSandbox = "docker";
568
- export type WorkbenchRemoteNetworkPolicy = "open" | "none";
569
- export interface WorkbenchRemoteCapabilities {
570
- schema: "workbench.remote.capabilities.v1";
571
- contractVersion: 1;
572
- projectState: {
573
- schema: WorkbenchProjectState["schema"];
574
- guardedSourceWrites: true;
575
- immutableRuntimeFacts: true;
576
- };
577
- execution: {
578
- fencedJobLeases: true;
579
- idempotentCompletion: true;
580
- progressIsBestEffort: true;
581
- maxJobsPerRun: number;
582
- };
583
- sandbox: {
584
- production: WorkbenchRemoteProductionSandbox;
585
- local: WorkbenchRemoteLocalSandbox;
586
- networkPolicies: WorkbenchRemoteNetworkPolicy[];
587
- };
588
- blobs: {
589
- contentAddressed: boolean;
590
- maxUploadBytes: number;
293
+ spec: RemoteWorkbenchEnvironmentSpec;
294
+ imageRef: string;
295
+ sourceHash: string;
296
+ sourceType: "builtin" | "dockerfile";
297
+ build?: {
298
+ dockerfileRef?: BlobObjectRef;
299
+ logRef?: BlobObjectRef;
300
+ error?: string;
301
+ startedAt?: string;
302
+ finishedAt?: string;
591
303
  };
304
+ status: "ready" | "building" | "failed";
305
+ createdAt: string;
306
+ updatedAt: string;
592
307
  }
593
- export interface WorkbenchRemoteRunRequest {
594
- schema: "workbench.remote.run.request.v1";
595
- workflow: "eval" | "improve";
596
- budget?: number;
597
- samples: number;
598
- candidateId?: string;
599
- sourceYaml?: string;
600
- candidateFiles?: RemoteWorkbenchFileInput[];
601
- adapterFiles?: RemoteWorkbenchFileInput[];
602
- selectedSamples?: Array<{
603
- caseId: string;
604
- sampleIndex: number;
605
- }>;
606
- preserveActive?: boolean;
607
- rerun?: boolean;
608
- }
609
- export interface AuthoredWorkbenchCandidateRunSpec extends WorkbenchAuthoredAdapterSpec {
610
- name: string;
611
- }
612
- export interface WorkbenchCaseSelector {
613
- all?: true;
614
- split?: string;
615
- }
616
- export interface WorkbenchSelectionSpec {
617
- metric: string;
618
- cases?: WorkbenchCaseSelector;
619
- }
620
- export interface AuthoredWorkbenchCandidateImproveSpec extends WorkbenchAuthoredAdapterSpec {
621
- edits: string[];
622
- optimizeOn?: WorkbenchCaseSelector;
623
- selectBy?: WorkbenchSelectionSpec;
624
- }
625
- export interface AuthoredWorkbenchCandidateSpec {
626
- name: string;
627
- description?: string;
628
- files: WorkbenchPathRef;
629
- prepare?: WorkbenchCandidatePrepareSpec;
630
- defaultRun?: string;
631
- selectedRunId?: string;
632
- runs: Record<string, AuthoredWorkbenchCandidateRunSpec>;
633
- improve?: AuthoredWorkbenchCandidateImproveSpec;
634
- }
635
- export interface WorkbenchCandidatePrepareSpec {
636
- command: string;
637
- }
638
- export interface WorkbenchPathRef {
639
- path: string;
640
- }
641
- export interface WorkbenchAuthoredAdapterSpec {
642
- use: string;
643
- auth?: string | Record<string, string>;
644
- with?: Record<string, Json>;
645
- }
646
- export interface AuthoredWorkbenchRuntimeSpec {
647
- dockerfile: string;
648
- resources?: {
649
- cpu?: number;
650
- memoryGb?: number;
651
- diskGb?: number;
652
- timeoutMinutes?: number;
653
- };
654
- network?: {
655
- egress?: "none" | "open";
308
+ export interface EngineResolveBinding {
309
+ engine: string;
310
+ resolver: {
311
+ use: string;
312
+ withFingerprint: string;
656
313
  };
657
314
  }
658
- export type AuthoredWorkbenchImproveSpec = WorkbenchAuthoredAdapterSpec;
659
- export type AuthoredWorkbenchRunSpec = WorkbenchAuthoredAdapterSpec;
660
- export type AuthoredWorkbenchScoreSpec = WorkbenchAuthoredAdapterSpec;
661
- export interface AuthoredWorkbenchEngineConfig {
662
- tasks?: WorkbenchAuthoredAdapterSpec;
663
- environment: AuthoredWorkbenchRuntimeSpec;
664
- score: AuthoredWorkbenchScoreSpec;
665
- }
666
- export interface AuthoredWorkbenchEngineSpec {
667
- use: string;
668
- auth?: string | Record<string, string>;
669
- with?: AuthoredWorkbenchEngineConfig | Record<string, Json>;
670
- }
671
- export interface AuthoredWorkbenchBenchmarkSpec {
672
- name: string;
673
- description: string;
674
- engine: AuthoredWorkbenchEngineSpec;
675
- }
676
- export interface AuthoredWorkbenchSourceSpec {
677
- version: 4;
678
- benchmark: AuthoredWorkbenchBenchmarkSpec;
679
- candidate: AuthoredWorkbenchCandidateSpec;
680
- }
681
315
  export type WorkbenchExecutionPurpose = "improve" | "attempt";
682
316
  export type WorkbenchSandboxTemplateKind = "snapshot" | "oci";
683
317
  export interface WorkbenchAdapterInvocation {
@@ -704,11 +338,11 @@ export interface WorkbenchSandboxAllocation {
704
338
  }
705
339
  export interface WorkbenchExecutionCapability {
706
340
  executionId: string;
707
- candidate: {
341
+ skill: {
708
342
  tenantId: string;
709
343
  projectId: string;
710
344
  runId: string;
711
- candidateId?: string;
345
+ versionId?: string;
712
346
  };
713
347
  inputs: WorkbenchExecutionInputRef[];
714
348
  outputPrefix: string;
@@ -735,7 +369,7 @@ export interface WorkbenchExecutionInputRef {
735
369
  mountPath: string;
736
370
  writable: boolean;
737
371
  }
738
- export type WorkbenchExecutionOutputSchema = "workbench.candidate_patch.v1" | "workbench.result.v1" | string;
372
+ export type WorkbenchExecutionOutputSchema = "workbench.skill_patch.v1" | "workbench.result.v1" | string;
739
373
  export interface WorkbenchExecutionOutputContract {
740
374
  name: string;
741
375
  schema: WorkbenchExecutionOutputSchema;
@@ -761,7 +395,7 @@ export interface WorkbenchExecutionSpec {
761
395
  id: string;
762
396
  projectId: string;
763
397
  runId: string;
764
- candidateId?: string;
398
+ versionId?: string;
765
399
  purpose: WorkbenchExecutionPurpose;
766
400
  adapter: WorkbenchAdapterInvocation;
767
401
  sandbox: WorkbenchSandboxTemplate;
@@ -770,12 +404,87 @@ export interface WorkbenchExecutionSpec {
770
404
  policy: WorkbenchExecutionPolicy;
771
405
  metadata: Record<string, Json>;
772
406
  }
773
- export interface WorkbenchCandidatePatch {
407
+ export interface BlobObjectRef {
408
+ bucket: string;
409
+ key: string;
410
+ byteLength: number;
411
+ sha256: string;
412
+ }
413
+ export interface WorkbenchSkillPatch {
774
414
  files: SurfaceSnapshotFile[];
775
415
  fileChanges: string[];
776
416
  summary?: string;
777
417
  feedback?: Json;
778
418
  }
419
+ export interface WorkbenchCaseCriterionScore {
420
+ criterion_id: string;
421
+ label: string;
422
+ score: number;
423
+ pass: boolean;
424
+ errors?: string[];
425
+ rationale?: string;
426
+ }
427
+ export interface MetricStats {
428
+ count: number;
429
+ mean: number;
430
+ variance: number;
431
+ stddev: number;
432
+ min: number;
433
+ max: number;
434
+ }
435
+ export type EvalCaseStatus = "completed" | "error";
436
+ export type EvalCaseSource = Record<string, Json>;
437
+ export interface EvalCaseResult {
438
+ id: string;
439
+ label?: string;
440
+ split?: string;
441
+ status?: EvalCaseStatus;
442
+ durationMs?: number;
443
+ metrics: Record<string, number>;
444
+ source?: EvalCaseSource;
445
+ feedback?: Json;
446
+ criteria?: WorkbenchCaseCriterionScore[];
447
+ }
448
+ export type ExecutionRole = "improver" | "runner" | "engine";
449
+ export type ExecutionUsageCostSource = "provider" | "estimated" | "mixed";
450
+ export interface ExecutionUsage {
451
+ provider?: string;
452
+ model?: string;
453
+ inputTokens?: number;
454
+ uncachedInputTokens?: number;
455
+ cachedInputTokens?: number;
456
+ cacheCreationInputTokens?: number;
457
+ cacheReadInputTokens?: number;
458
+ outputTokens?: number;
459
+ reasoningOutputTokens?: number;
460
+ totalTokens?: number;
461
+ costUsd?: number;
462
+ costSource?: ExecutionUsageCostSource;
463
+ pricingSource?: string;
464
+ }
465
+ export interface UsageSummary {
466
+ total?: ExecutionUsage;
467
+ improver?: ExecutionUsage;
468
+ runner?: ExecutionUsage;
469
+ engine?: ExecutionUsage;
470
+ }
471
+ export interface EvaluationUsageStats {
472
+ total?: ExecutionUsageStats;
473
+ improver?: ExecutionUsageStats;
474
+ runner?: ExecutionUsageStats;
475
+ engine?: ExecutionUsageStats;
476
+ }
477
+ export interface ExecutionUsageStats {
478
+ inputTokens?: MetricStats;
479
+ uncachedInputTokens?: MetricStats;
480
+ cachedInputTokens?: MetricStats;
481
+ cacheCreationInputTokens?: MetricStats;
482
+ cacheReadInputTokens?: MetricStats;
483
+ reasoningOutputTokens?: MetricStats;
484
+ outputTokens?: MetricStats;
485
+ totalTokens?: MetricStats;
486
+ costUsd?: MetricStats;
487
+ }
779
488
  export interface WorkbenchResult {
780
489
  score: number;
781
490
  metrics?: Record<string, number>;
@@ -888,6 +597,24 @@ export interface WorkbenchTraceSession {
888
597
  trace: WorkbenchExecutionTrace;
889
598
  metadata?: Record<string, Json>;
890
599
  }
600
+ export type RemoteWorkbenchJobStatus = "queued" | "running" | "succeeded" | "failed" | "cancelled";
601
+ export type RemoteWorkbenchJobKind = "execute";
602
+ export interface RemoteWorkbenchJob {
603
+ id: string;
604
+ projectId: string;
605
+ runId: string;
606
+ versionId?: string;
607
+ kind: RemoteWorkbenchJobKind;
608
+ status: RemoteWorkbenchJobStatus;
609
+ attempt: number;
610
+ createdAt: string;
611
+ updatedAt: string;
612
+ startedAt?: string;
613
+ finishedAt?: string;
614
+ input: Json;
615
+ output?: Json;
616
+ error?: string;
617
+ }
891
618
  export interface WorkbenchExecutionEvidence {
892
619
  id: string;
893
620
  kind: string;
@@ -896,7 +623,7 @@ export interface WorkbenchExecutionEvidence {
896
623
  status: RemoteWorkbenchJobStatus;
897
624
  jobIds: string[];
898
625
  executionIds: string[];
899
- candidateId?: string;
626
+ versionId?: string;
900
627
  caseId?: string;
901
628
  sampleIndex?: number;
902
629
  attemptIndex?: number;
@@ -908,44 +635,6 @@ export interface WorkbenchExecutionTraceDetail {
908
635
  runId: string;
909
636
  executions: WorkbenchExecutionEvidence[];
910
637
  }
911
- export interface AuthoredWorkbenchCaseSummary {
912
- id: string;
913
- slug: string;
914
- path: string;
915
- name: string;
916
- split?: string;
917
- fileCount: number;
918
- }
919
- export interface AuthoredWorkbenchSourceFile {
920
- path: string;
921
- content: string;
922
- }
923
- export interface AuthoredWorkbenchSourceDocument {
924
- path: string;
925
- exists: boolean;
926
- source_yaml: string;
927
- source_files: AuthoredWorkbenchSourceFile[];
928
- spec: AuthoredWorkbenchSourceSpec | null;
929
- cases: AuthoredWorkbenchCaseSummary[];
930
- }
931
- export type RemoteWorkbenchJobStatus = "queued" | "running" | "succeeded" | "failed" | "cancelled";
932
- export type RemoteWorkbenchJobKind = "execute";
933
- export interface RemoteWorkbenchJob {
934
- id: string;
935
- projectId: string;
936
- runId: string;
937
- candidateId?: string;
938
- kind: RemoteWorkbenchJobKind;
939
- status: RemoteWorkbenchJobStatus;
940
- attempt: number;
941
- createdAt: string;
942
- updatedAt: string;
943
- startedAt?: string;
944
- finishedAt?: string;
945
- input: Json;
946
- output?: Json;
947
- error?: string;
948
- }
949
638
  export interface WorkbenchRemoteJobClaimRequest {
950
639
  schema: "workbench.remote.job.claim_request.v1";
951
640
  ownerUserId: string;
@@ -1015,32 +704,6 @@ export interface WorkbenchRemoteJobRetry {
1015
704
  leaseToken: string;
1016
705
  reason: string;
1017
706
  }
1018
- export interface RemoteWorkbenchRun extends WorkbenchRuntimeRun {
1019
- projectId: string;
1020
- environmentVersionId?: string;
1021
- specVersionId: string;
1022
- candidateId: string | null;
1023
- activeCandidateId?: string | null;
1024
- outputCandidateId?: string | null;
1025
- input: {
1026
- benchmarkFingerprint: string;
1027
- candidateFingerprint: string;
1028
- baseCandidateId: string | null;
1029
- payerUserId?: string;
1030
- candidateOwnerUserId?: string;
1031
- candidateOwnerUsername?: string;
1032
- preserveActiveCandidateId?: string | null;
1033
- selectedSamples?: Array<{
1034
- caseId: string;
1035
- sampleIndex: number;
1036
- }>;
1037
- sourceYaml?: string;
1038
- candidateSourceFiles?: SurfaceSnapshotFile[];
1039
- baseFiles: SurfaceSnapshotFile[];
1040
- engineResolveFiles: SurfaceSnapshotFile[];
1041
- };
1042
- jobCount: number;
1043
- completedJobCount: number;
1044
- failedJobCount: number;
1045
- }
707
+ export declare function isReservedWorkbenchAdapterAuthEnvName(name: string): boolean;
708
+ export declare function assertWorkbenchAdapterAuthEnvNameAllowed(name: string): void;
1046
709
  //# sourceMappingURL=index.d.ts.map