@workbench-ai/workbench-contract 0.0.67 → 0.0.69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,682 +1,394 @@
1
1
  export type Json = null | boolean | number | string | Json[] | {
2
2
  [key: string]: Json;
3
3
  };
4
- export declare function isReservedWorkbenchAdapterAuthEnvName(name: string): boolean;
5
- export declare function assertWorkbenchAdapterAuthEnvNameAllowed(name: string): void;
6
- export interface RemoteWorkbenchProject {
7
- id: string;
8
- ownerUserId: string;
9
- ownerUsername: string;
10
- visibility: "private" | "public";
11
- createdAt: string;
12
- updatedAt: string;
13
- activeEnvironmentVersionId: string;
14
- currentSpecVersionId: string;
15
- activeCandidateId?: string | null;
16
- sourceFingerprint?: string;
17
- starCount: number;
4
+ export interface SurfaceSnapshotFile {
5
+ path: string;
6
+ kind?: "text" | "binary";
7
+ encoding?: "utf8" | "base64";
8
+ content: string;
9
+ executable?: boolean;
18
10
  }
19
- export interface RemoteWorkbenchProjectSummary {
20
- id: string;
21
- ownerUsername: string;
22
- name: string;
23
- description: string;
24
- visibility: "private" | "public";
25
- updatedAt: string;
26
- currentSpecVersionId: string;
27
- activeEnvironmentVersionId: string;
28
- activeCandidateId?: string | null;
29
- candidateCount: number;
30
- evaluationCount: number;
31
- runCount: number;
32
- starCount: number;
33
- viewerHasStarred?: boolean;
11
+ export declare function normalizeWorkbenchSourcePath(filePath: string): string;
12
+ export declare function normalizeWorkbenchSourceRequestPath(filePath: string): string;
13
+ export declare function normalizeWorkbenchSkillName(value: string): string;
14
+ export declare function isWorkbenchLocalMetadataPath(filePath: string): boolean;
15
+ export type WorkbenchInspectionFileOwnerKind = "version" | "trace" | "artifact";
16
+ export interface WorkbenchInspectionFileContent {
17
+ path: string;
18
+ kind?: SurfaceSnapshotFile["kind"];
19
+ encoding?: SurfaceSnapshotFile["encoding"];
20
+ executable?: boolean;
21
+ content?: string;
22
+ unavailableReason?: string;
34
23
  }
35
- export interface WorkbenchSpecValidation {
36
- ok: boolean;
37
- errors: string[];
38
- warnings: string[];
24
+ export declare function workbenchInspectionFileContentUnavailableReason(file: Pick<SurfaceSnapshotFile, "kind" | "encoding">): string | null;
25
+ export declare function workbenchInspectionFileContent(file: SurfaceSnapshotFile): WorkbenchInspectionFileContent;
26
+ export declare function workbenchInspectionFileManifest(file: SurfaceSnapshotFile): SurfaceSnapshotFile;
27
+ export interface WorkbenchAgent {
28
+ name: string;
29
+ adapter: string;
30
+ model?: string;
31
+ config: Record<string, Json>;
39
32
  }
40
- export interface RemoteWorkbenchSpecVersion {
41
- id: string;
42
- projectId: string;
43
- ordinal: number;
44
- sourceYaml: string;
45
- createdAt: string;
46
- updatedAt: string;
47
- validation: WorkbenchSpecValidation;
33
+ export interface WorkbenchAgentSnapshot {
34
+ hash: string;
35
+ agent: WorkbenchAgent;
48
36
  }
49
- export interface RemoteWorkbenchEnvironment {
50
- id: string;
37
+ export type WorkbenchSkillSourceKind = "local" | "remote" | "none";
38
+ export type WorkbenchSkillIncludeKind = Exclude<WorkbenchSkillSourceKind, "none">;
39
+ export interface WorkbenchSkillInclude {
51
40
  name: string;
52
- description: string;
53
- currentVersionId: string;
54
- builtIn: boolean;
41
+ kind: WorkbenchSkillIncludeKind;
42
+ path?: string;
43
+ from?: string;
44
+ ref?: string;
45
+ resolvedRef?: string;
46
+ hash?: string;
47
+ files?: SurfaceSnapshotFile[];
48
+ }
49
+ export interface WorkbenchSkillSource {
50
+ name: string;
51
+ kind: WorkbenchSkillSourceKind;
52
+ path?: string;
53
+ from?: string;
54
+ ref?: string;
55
+ resolvedRef?: string;
56
+ hash?: string;
57
+ includes?: WorkbenchSkillInclude[];
58
+ }
59
+ export interface WorkbenchSkillBundleSnapshot {
60
+ hash: string;
61
+ skillName: string;
62
+ entryName: string;
63
+ source: WorkbenchSkillSource;
64
+ files: SurfaceSnapshotFile[];
65
+ includedSkills: WorkbenchSkillInclude[];
55
66
  createdAt: string;
56
- updatedAt: string;
57
67
  }
58
- export interface RemoteWorkbenchEnvironmentVersion {
68
+ export interface WorkbenchVersion {
59
69
  id: string;
60
- environmentId: string;
61
- name: string;
62
- spec: RemoteWorkbenchEnvironmentSpec;
63
- imageRef: string;
64
- sourceHash: string;
65
- sourceType: "builtin" | "dockerfile";
66
- build?: {
67
- dockerfileRef?: BlobObjectRef;
68
- logRef?: BlobObjectRef;
69
- error?: string;
70
- startedAt?: string;
71
- finishedAt?: string;
72
- };
73
- status: "ready" | "building" | "failed";
70
+ hash: string;
71
+ message: string;
72
+ parentIds: string[];
74
73
  createdAt: string;
75
- updatedAt: string;
76
- }
77
- export interface RemoteWorkbenchEnvironmentSpec {
78
- base: string;
79
- resources: {
80
- cpu: number;
81
- memoryGb: number;
82
- diskGb: number;
83
- timeoutMinutes: number;
84
- };
85
- network: "off" | "on";
86
- }
87
- export interface BlobObjectRef {
88
- bucket: string;
89
- key: string;
90
- byteLength: number;
91
- sha256: string;
92
- }
93
- export type RemoteWorkbenchSnapshotKind = "candidate" | "engineResolve" | "adapters" | "runtime";
94
- export type WorkspaceWriteEncoding = "utf8" | "base64";
95
- export interface SurfaceSnapshotFile {
96
- path: string;
97
- kind: "text" | "binary";
98
- encoding: WorkspaceWriteEncoding;
99
- content: string;
100
- executable: boolean;
101
- contentRedacted?: boolean;
102
- }
103
- export interface WorkbenchEngineCaseFiles {
104
- public?: SurfaceSnapshotFile[];
105
- private?: SurfaceSnapshotFile[];
106
- source?: SurfaceSnapshotFile[];
107
- }
108
- export interface SurfaceSnapshot {
109
74
  files: SurfaceSnapshotFile[];
110
75
  }
111
- export interface RemoteWorkbenchFileInput {
112
- path: string;
113
- content: string;
114
- encoding?: WorkspaceWriteEncoding;
115
- executable?: boolean;
116
- }
117
- export interface EngineResolveBinding {
118
- engine: string;
119
- resolver: {
120
- use: string;
121
- withFingerprint: string;
122
- };
123
- }
124
- export interface RemoteWorkbenchSnapshotBase {
76
+ export interface WorkbenchEvalSnapshot {
77
+ hash: string;
125
78
  files: SurfaceSnapshotFile[];
79
+ caseCount: number;
80
+ createdAt: string;
126
81
  updatedAt: string;
82
+ scoreAdapter: string;
127
83
  }
128
- export interface RemoteWorkbenchEngineResolveSnapshot extends RemoteWorkbenchSnapshotBase {
129
- kind: "engineResolve";
130
- engineResolveBinding: EngineResolveBinding;
131
- }
132
- export interface RemoteWorkbenchStandardSnapshot extends RemoteWorkbenchSnapshotBase {
133
- kind: Exclude<RemoteWorkbenchSnapshotKind, "engineResolve">;
134
- }
135
- export type RemoteWorkbenchSnapshot = RemoteWorkbenchEngineResolveSnapshot | RemoteWorkbenchStandardSnapshot;
136
- export type CandidateStatus = "running" | "evaluated" | "repair_exhausted" | "eval_error" | "agent_error";
137
- export interface MetricStats {
138
- count: number;
139
- mean: number;
140
- variance: number;
141
- stddev: number;
142
- min: number;
143
- max: number;
144
- }
145
- export type EvalCaseStatus = "completed" | "error";
146
- export type EvalCaseSource = Record<string, Json>;
147
- export interface CandidateCaseCriterionScore {
148
- criterion_id: string;
149
- label: string;
150
- score: number;
151
- pass: boolean;
152
- errors?: string[];
153
- rationale?: string;
154
- }
155
- export interface EvalCaseResult {
84
+ export type WorkbenchRunKind = "eval" | "improve" | "compare";
85
+ export type WorkbenchRunStatus = "running" | "succeeded" | "failed" | "canceled";
86
+ export type WorkbenchJobStatus = "queued" | "running" | "succeeded" | "failed" | "canceled";
87
+ export type WorkbenchArtifactKind = "file" | "directory" | "log" | "scorecard";
88
+ export interface WorkbenchRun {
156
89
  id: string;
157
- label?: string;
158
- split?: string;
159
- status?: EvalCaseStatus;
160
- durationMs?: number;
161
- metrics: Record<string, number>;
162
- source?: EvalCaseSource;
163
- feedback?: Json;
164
- criteria?: CandidateCaseCriterionScore[];
165
- }
166
- export type ExecutionRole = "improver" | "runner" | "engine";
167
- export type ExecutionUsageCostSource = "provider" | "estimated" | "mixed";
168
- export interface ExecutionUsage {
169
- provider?: string;
170
- model?: string;
171
- inputTokens?: number;
172
- uncachedInputTokens?: number;
173
- cachedInputTokens?: number;
174
- cacheCreationInputTokens?: number;
175
- cacheReadInputTokens?: number;
176
- outputTokens?: number;
177
- reasoningOutputTokens?: number;
178
- totalTokens?: number;
90
+ kind: WorkbenchRunKind;
91
+ versionId: string;
92
+ skillName: string;
93
+ skillBundleHash: string;
94
+ evalHash: string;
95
+ agentName: string;
96
+ agentHash: string;
97
+ status: WorkbenchRunStatus;
98
+ score?: number;
179
99
  costUsd?: number;
180
- costSource?: ExecutionUsageCostSource;
181
- pricingSource?: string;
182
- }
183
- export interface UsageSummary {
184
- total?: ExecutionUsage;
185
- improver?: ExecutionUsage;
186
- runner?: ExecutionUsage;
187
- engine?: ExecutionUsage;
188
- }
189
- export interface EvaluationCandidateSummary {
190
- id: string;
191
- kind: "candidate";
192
- label?: string;
193
- }
194
- export type EvaluationSampleStatus = "planned" | "running" | "completed" | "error";
195
- export type EvaluationStatus = EvaluationSampleStatus | "partial";
196
- export interface EvaluationSampleRecord {
197
- id: string;
198
- index: number;
199
- candidate: EvaluationCandidateSummary;
200
- status: EvaluationSampleStatus;
201
- startedAt?: string;
100
+ latencyMs?: number;
101
+ jobIds?: string[];
102
+ traceIds: string[];
103
+ createdAt: string;
202
104
  finishedAt?: string;
203
- durationMs?: number;
204
- metrics?: Record<string, number>;
205
- usage?: UsageSummary;
105
+ parentRunId?: string;
106
+ outputVersionId?: string;
206
107
  error?: string;
207
- cases?: EvalCaseResult[];
208
- feedback?: Json;
209
108
  }
210
- export interface EvaluationCaseStats {
109
+ export interface WorkbenchJob {
211
110
  id: string;
212
- label?: string;
213
- split?: string;
214
- status?: EvalCaseStatus;
215
- sampleCount: number;
216
- metrics: Record<string, MetricStats>;
217
- durationMs?: MetricStats;
218
- }
219
- export interface EvaluationUsageStats {
220
- total?: ExecutionUsageStats;
221
- improver?: ExecutionUsageStats;
222
- runner?: ExecutionUsageStats;
223
- engine?: ExecutionUsageStats;
224
- }
225
- export interface ExecutionUsageStats {
226
- inputTokens?: MetricStats;
227
- uncachedInputTokens?: MetricStats;
228
- cachedInputTokens?: MetricStats;
229
- cacheCreationInputTokens?: MetricStats;
230
- cacheReadInputTokens?: MetricStats;
231
- outputTokens?: MetricStats;
232
- reasoningOutputTokens?: MetricStats;
233
- totalTokens?: MetricStats;
234
- costUsd?: MetricStats;
235
- }
236
- export interface EvaluationRecord {
237
- candidate: EvaluationCandidateSummary;
238
- status: EvaluationStatus;
239
- sampleCount: number;
240
- completedSampleCount: number;
241
- errorSampleCount: number;
111
+ runId: string;
112
+ kind: WorkbenchRunKind;
113
+ versionId: string;
114
+ skillName: string;
115
+ skillBundleHash: string;
116
+ evalHash: string;
117
+ agentName: string;
118
+ agentHash: string;
119
+ caseId: string;
120
+ sample: number;
121
+ status: WorkbenchJobStatus;
122
+ score?: number;
123
+ command?: string;
124
+ dockerImage?: string;
125
+ exitCode?: number;
126
+ artifactIds: string[];
127
+ traceIds: string[];
128
+ createdAt: string;
242
129
  startedAt?: string;
243
130
  finishedAt?: string;
244
- metrics?: Record<string, MetricStats>;
245
- durationMs?: MetricStats;
246
- usage?: EvaluationUsageStats;
247
- cases?: EvaluationCaseStats[];
248
- samples: EvaluationSampleRecord[];
131
+ durationMs?: number;
249
132
  error?: string;
250
133
  }
251
- export interface EvaluationSummary {
134
+ export interface WorkbenchArtifact {
252
135
  id: string;
253
136
  runId: string;
254
- benchmarkFingerprint: string;
255
- candidateFingerprint: string;
256
- candidateId: string;
257
- candidateName?: string;
258
- candidateVersion: number;
259
- candidateRunId?: string;
260
- candidateRunName?: string;
137
+ jobId: string;
138
+ kind: WorkbenchArtifactKind;
139
+ path: string;
261
140
  createdAt: string;
262
- updatedAt: string;
263
- status: EvaluationStatus;
264
- sampleCount: number;
265
- completedSampleCount: number;
266
- errorSampleCount: number;
267
- metrics?: Record<string, MetricStats>;
268
- selectionMetric?: string;
269
- selectionLabel?: string;
270
- selectionScore?: MetricStats;
271
- durationMs?: MetricStats;
272
- usage?: EvaluationUsageStats;
273
- error?: string;
274
- }
275
- export interface EvaluationScorecard extends EvaluationSummary {
276
- evaluation: EvaluationRecord;
141
+ files: SurfaceSnapshotFile[];
277
142
  }
278
- export interface WorkbenchEvaluationMetricDescriptor {
143
+ export interface WorkbenchTrace {
279
144
  id: string;
280
- label: string;
281
- direction: "higher" | "lower";
282
- kind: "number" | "duration_ms" | "currency_usd";
283
- group: "metric" | "execution" | "usage" | "other";
284
- primary: boolean;
285
- semanticRole?: "performance" | "speed" | "cost";
286
- }
287
- export interface WorkbenchEvaluationComparisonRow {
288
- evaluationId: string;
289
145
  runId: string;
290
- candidateId: string;
291
- candidateLabel: string;
292
- configurationLabel: string;
293
- status: EvaluationSummary["status"];
294
- score: number | null;
295
- metrics: Record<string, number>;
296
- createdAt: string;
297
- updatedAt: string;
298
- error?: string;
299
- }
300
- export interface WorkbenchCandidateEvaluationRollup {
301
- candidateId: string;
302
- candidateLabel: string;
303
- evaluationCount: number;
304
- completeEvaluationCount: number;
305
- scoredEvaluationCount: number;
306
- bestEvaluationId: string | null;
307
- bestScore: number | null;
308
- meanScore: number | null;
309
- }
310
- export interface WorkbenchEvaluationComparison {
311
- evaluations: EvaluationSummary[];
312
- rows: WorkbenchEvaluationComparisonRow[];
313
- candidates: WorkbenchCandidateEvaluationRollup[];
314
- metrics: WorkbenchEvaluationMetricDescriptor[];
315
- }
316
- export interface CandidateSummary {
317
- id: string;
318
- name?: string;
319
- version: number;
320
- ordinal: number;
321
- benchmarkFingerprint: string;
322
- candidateFingerprint: string;
323
- ownerUserId?: string;
324
- ownerUsername?: string;
325
- visibility?: "private" | "public";
146
+ jobId?: string;
147
+ versionId: string;
148
+ skillName: string;
149
+ skillBundleHash: string;
150
+ evalHash?: string;
151
+ agentName: string;
152
+ agentHash?: string;
326
153
  createdAt: string;
327
- baseId?: string;
328
- referenceIds: string[];
329
- status: CandidateStatus;
330
- fileChanges: string[];
331
- usage?: UsageSummary;
332
- }
333
- export interface CandidateRecord extends CandidateSummary {
334
- eval?: EvaluationRecord;
335
- prompt?: string;
336
- meta?: Json;
337
- }
338
- export interface CandidateLineageNode {
339
- id: string;
340
- active: boolean;
341
- summary: CandidateSummary;
342
- }
343
- export interface CandidateLineageEdge {
344
- id: string;
345
- kind: "anchor";
346
- sourceId: string;
347
- targetId: string;
348
- }
349
- export interface CandidateLineageGraph {
350
- activeId: string | null;
351
- nodes: CandidateLineageNode[];
352
- edges: CandidateLineageEdge[];
353
- }
354
- export declare function buildCandidateLineage(args: {
355
- summaries: readonly CandidateSummary[];
356
- activeId: string | null;
357
- }): CandidateLineageGraph;
358
- export declare function buildWorkbenchEvaluationComparison(evaluations: readonly EvaluationSummary[]): WorkbenchEvaluationComparison;
359
- export declare function buildWorkbenchEvaluationMetricDescriptors(evaluations: readonly EvaluationSummary[]): WorkbenchEvaluationMetricDescriptor[];
360
- export declare function readEvaluationScore(evaluation: EvaluationSummary): number | null;
361
- export declare function isCompleteEvaluationSummary(evaluation: Pick<EvaluationSummary, "status" | "sampleCount" | "completedSampleCount" | "errorSampleCount">): boolean;
362
- export declare function formatEvaluationConfigurationLabel(evaluation: Pick<EvaluationSummary, "candidateRunName" | "candidateRunId">): string;
363
- export type CandidatePreviewMode = "diff" | "raw" | "rendered";
364
- export type CandidatePreviewKind = "text" | "markdown" | "table" | "spreadsheet" | "image" | "pdf" | "unsupported";
365
- export type CandidatePreviewSourceEncoding = "utf8" | "base64";
366
- export type CandidateFileStatus = "added" | "modified" | "unchanged";
367
- export interface CandidateFileSummary {
368
- path: string;
369
- old_path: string | null;
370
- status: CandidateFileStatus;
371
- mime_type: string | null;
372
- preview_kind: CandidatePreviewKind;
373
- additions: number;
374
- deletions: number;
375
- }
376
- export interface CandidateFilePreviewSource {
377
- content: string;
378
- encoding: CandidatePreviewSourceEncoding;
379
- }
380
- export interface CandidateFilePreview {
381
- path: string;
382
- view: CandidatePreviewMode;
383
- mime_type: string | null;
384
- preview_kind: CandidatePreviewKind;
385
- diff: string | null;
386
- source: CandidateFilePreviewSource | null;
387
- rendered_html: string | null;
388
- }
389
- export interface CandidateCaseCriterionResult {
390
- criterion_id: string;
391
- pass: boolean;
392
- score: number;
393
- errors: string[];
394
- rationale?: string;
395
- }
396
- export interface CandidateCaseExecutionRef {
397
- runId: string;
398
- kind: string;
399
- role: WorkbenchExecutionEventRole;
400
- status: RemoteWorkbenchJobStatus;
401
- jobIds: string[];
402
- executionIds: string[];
403
- createdAt?: string;
404
- startedAt?: string;
405
- finishedAt?: string;
406
- durationMs?: number;
407
- caseId?: string;
408
- sampleIndex?: number;
409
- attemptIndex?: number;
410
- }
411
- export interface CandidateCaseReview {
412
- candidateId: string;
413
- caseId: string;
414
- caseLabel: string;
415
- sampleId?: string;
416
- sampleIndex?: number;
417
- status?: EvalCaseStatus | RemoteWorkbenchJobStatus;
418
- metrics: Record<string, number>;
419
- durationMs?: number;
420
- source?: EvalCaseSource;
421
- feedback?: Json;
422
- executions: CandidateCaseExecutionRef[];
423
- criteria_results: CandidateCaseCriterionResult[];
424
- }
425
- export type RunStatus = "queued" | "running" | "finished";
426
- export type RunOutcome = "ok" | "error" | "cancelled";
427
- export type RemoteRunWorkflow = "eval" | "improve";
428
- export interface RunSummary {
429
- id: string;
430
- workflow: RemoteRunWorkflow;
431
- benchmarkFingerprint: string;
432
- status: RunStatus;
433
- candidateId?: string | null;
434
- candidateRunId?: string;
435
- candidateRunName?: string;
436
- startedAt: string;
437
- finishedAt?: string;
438
- durationMs?: number;
439
- improver: string;
440
- engineRun: string;
441
- strategy: string;
442
- optimizeOn?: string;
443
- selectBy?: string;
444
- budget: number;
445
- repairBudget: number;
446
- attemptsRequested: number;
447
- attemptsExecuted: number;
448
- samples: number;
449
- executionFingerprint?: string;
450
- stoppedReason?: "budget_exhausted" | "completed" | "dry_run" | "cancelled";
451
- outcome?: RunOutcome;
452
- error?: string;
453
- activeCandidateId?: string | null;
454
- outputCandidateId?: string | null;
455
- }
456
- export interface WorkbenchRuntimeRun extends RunSummary {
457
- jobCount?: number;
458
- completedJobCount?: number;
459
- failedJobCount?: number;
154
+ request: Json;
155
+ result: Json;
156
+ files: SurfaceSnapshotFile[];
460
157
  }
461
- export interface RuntimeEvent {
462
- id: string;
463
- at: string;
464
- type: "run_started" | "job_queued" | "job_started" | "job_progress" | "sandbox_allocated" | "sandbox_stopped" | "candidate_created" | "candidate_evaluated" | "active_changed" | "run_finished";
158
+ export interface WorkbenchLineageEdge {
159
+ parentId: string;
160
+ childId: string;
465
161
  runId?: string;
466
- jobId?: string;
467
- candidateId?: string;
468
- baseId?: string;
469
- activeId?: string;
470
- status?: CandidateStatus | RemoteWorkbenchJobStatus;
471
- metrics?: Record<string, number>;
472
- detail?: Record<string, Json>;
473
- }
474
- export interface RuntimeSnapshot {
475
- workspaceRoot: string;
476
- activeId: string | null;
477
- currentBenchmarkFingerprint: string | null;
478
- summaries: CandidateSummary[];
479
- evaluations: EvaluationSummary[];
480
- runs: RunSummary[];
481
- }
482
- export interface WorkbenchRuntimeCandidateFiles {
483
- candidateId: string;
484
- files: SurfaceSnapshotFile[];
162
+ reason: "version" | "improve";
163
+ createdAt: string;
164
+ message?: string;
485
165
  }
486
- export interface WorkbenchRuntimeExecutionFiles {
487
- jobId: string;
488
- files: SurfaceSnapshotFile[];
166
+ export type WorkbenchRemoteKind = "workbench-cloud" | "file";
167
+ export interface WorkbenchRemote {
168
+ name: string;
169
+ url: string;
170
+ kind: WorkbenchRemoteKind;
489
171
  }
490
- export interface WorkbenchRuntimeBundle {
491
- schema: "workbench.runtime.bundle.v1";
492
- activeId: string | null;
493
- candidates: CandidateRecord[];
494
- candidateFiles: WorkbenchRuntimeCandidateFiles[];
495
- evaluations: EvaluationScorecard[];
496
- runs: WorkbenchRuntimeRun[];
497
- jobs: RemoteWorkbenchJob[];
498
- executionFiles: WorkbenchRuntimeExecutionFiles[];
499
- events: RuntimeEvent[];
500
- }
501
- export interface WorkbenchRuntimeBundleStats {
502
- candidates: number;
503
- candidateFiles: number;
504
- evaluations: number;
505
- runs: number;
506
- jobs: number;
507
- executionFiles: number;
508
- events: number;
509
- activeId: string | null;
510
- }
511
- export interface WorkbenchRuntimeImportResult {
512
- changed: boolean;
513
- stats: WorkbenchRuntimeBundleStats;
514
- }
515
- export interface WorkbenchProjectSourceResources {
516
- cpu?: number;
517
- memoryGb?: number;
518
- diskGb?: number;
519
- timeoutMinutes?: number;
520
- }
521
- export interface WorkbenchProjectStateSource {
522
- source: string;
523
- files: SurfaceSnapshotFile[];
524
- candidateFiles: SurfaceSnapshotFile[];
525
- engineResolveFiles: SurfaceSnapshotFile[];
526
- engineResolveBinding: EngineResolveBinding;
527
- adapterFiles: SurfaceSnapshotFile[];
528
- dockerfile: string;
529
- runtimeDockerfile: string;
530
- runtimeFiles: SurfaceSnapshotFile[];
531
- network: "off" | "on";
532
- resources: WorkbenchProjectSourceResources;
533
- revisionId?: string;
534
- fingerprint?: string;
172
+ export interface WorkbenchRefs {
173
+ current?: string;
174
+ [name: string]: string | undefined;
535
175
  }
536
- export interface WorkbenchProjectStateBase {
537
- sourceRevisionId?: string;
538
- sourceFingerprint?: string;
539
- runtimeFingerprint?: string;
176
+ export interface WorkbenchProjectState {
177
+ schema: "workbench.skill.state.v1";
178
+ root: string;
179
+ refs: WorkbenchRefs;
180
+ remotes: Record<string, WorkbenchRemote>;
181
+ versions: WorkbenchVersion[];
182
+ skillSources: WorkbenchSkillSource[];
183
+ skillBundles: WorkbenchSkillBundleSnapshot[];
184
+ evals: WorkbenchEvalSnapshot[];
185
+ agents: WorkbenchAgent[];
186
+ runs: WorkbenchRun[];
187
+ jobs: WorkbenchJob[];
188
+ traces: WorkbenchTrace[];
189
+ executionEvents: WorkbenchExecutionEventBatch[];
190
+ artifacts: WorkbenchArtifact[];
191
+ lineage: WorkbenchLineageEdge[];
192
+ }
193
+ export interface WorkbenchStatus {
194
+ root: string;
195
+ initialized: boolean;
196
+ currentSkillHash?: string;
197
+ currentVersionId?: string;
198
+ defaultSkill?: string;
199
+ defaultAgent?: string;
200
+ versionCount: number;
201
+ skillCount: number;
202
+ agentCount: number;
203
+ runCount: number;
204
+ remoteCount: number;
205
+ pendingSyncCount?: number;
206
+ lastScore?: number;
540
207
  }
541
- export interface WorkbenchProjectStateRemote {
542
- id: string;
208
+ export interface WorkbenchRemoteSyncState {
209
+ schema: "workbench.remote-sync-state.v1";
543
210
  remote: string;
544
- ownerUsername: string;
545
- name: string;
546
- visibility: "private" | "public";
547
- }
548
- export interface WorkbenchProjectState {
549
- schema: "workbench.project.state.v1";
550
- project: WorkbenchProjectStateRemote;
551
- base: WorkbenchProjectStateBase;
552
- source: WorkbenchProjectStateSource;
553
- runtime: WorkbenchRuntimeBundle;
554
- }
555
- export interface WorkbenchProjectStateImportResult {
556
- changed: boolean;
557
- source: {
558
- changed: boolean;
559
- revisionId?: string;
560
- fingerprint?: string;
211
+ url: string;
212
+ status: "synced" | "error";
213
+ lastSyncedAt?: string;
214
+ lastAttemptAt: string;
215
+ lastError?: {
216
+ code: string;
217
+ message: string;
218
+ } | null;
219
+ pushed?: number;
220
+ pulled?: number;
221
+ }
222
+ export interface WorkbenchStatusSnapshot {
223
+ schema: "workbench.status.v1";
224
+ ok: true;
225
+ project: {
226
+ root: string;
227
+ initialized: boolean;
228
+ currentVersionId?: string;
229
+ defaultSkill?: string;
230
+ defaultAgent?: string;
561
231
  };
562
- runtime: WorkbenchRuntimeImportResult;
563
- state: WorkbenchProjectState;
564
- }
565
- export type WorkbenchRemoteContractSchema = "workbench.remote.capabilities.v1" | "workbench.remote.run.request.v1" | "workbench.remote.job.claim_request.v1" | "workbench.remote.job.claim.v1" | "workbench.remote.job.renewal.v1" | "workbench.remote.job.renewal_result.v1" | "workbench.remote.job.progress.v1" | "workbench.remote.job.completion.v1" | "workbench.remote.job.retry.v1";
566
- export type WorkbenchRemoteProductionSandbox = "firecracker";
567
- export type WorkbenchRemoteLocalSandbox = "docker";
568
- export type WorkbenchRemoteNetworkPolicy = "open" | "none";
569
- export interface WorkbenchRemoteCapabilities {
570
- schema: "workbench.remote.capabilities.v1";
571
- contractVersion: 1;
572
- projectState: {
573
- schema: WorkbenchProjectState["schema"];
574
- guardedSourceWrites: true;
575
- immutableRuntimeFacts: true;
232
+ worktree: {
233
+ hasUnversionedChanges: boolean;
234
+ latestVersionId?: string;
576
235
  };
577
- execution: {
578
- fencedJobLeases: true;
579
- idempotentCompletion: true;
580
- progressIsBestEffort: true;
581
- maxJobsPerRun: number;
236
+ runs: {
237
+ total: number;
238
+ lastRunId?: string;
239
+ lastStatus?: WorkbenchRunStatus;
240
+ lastScore?: number;
582
241
  };
583
- sandbox: {
584
- production: WorkbenchRemoteProductionSandbox;
585
- local: WorkbenchRemoteLocalSandbox;
586
- networkPolicies: WorkbenchRemoteNetworkPolicy[];
587
- };
588
- blobs: {
589
- contentAddressed: boolean;
590
- maxUploadBytes: number;
591
- };
592
- }
593
- export interface WorkbenchRemoteRunRequest {
594
- schema: "workbench.remote.run.request.v1";
595
- workflow: "eval" | "improve";
596
- budget?: number;
597
- samples: number;
598
- candidateId?: string;
599
- sourceYaml?: string;
600
- candidateFiles?: RemoteWorkbenchFileInput[];
601
- adapterFiles?: RemoteWorkbenchFileInput[];
602
- selectedSamples?: Array<{
603
- caseId: string;
604
- sampleIndex: number;
242
+ remotes: Array<{
243
+ name: string;
244
+ kind: WorkbenchRemoteKind;
245
+ url: string;
246
+ sync: {
247
+ status: "up_to_date" | "error" | "never";
248
+ lastSyncedAt?: string;
249
+ lastAttemptAt?: string;
250
+ lastError?: {
251
+ code: string;
252
+ message: string;
253
+ } | null;
254
+ nextCommand?: string;
255
+ };
256
+ publication: {
257
+ status: "published" | "unpublished";
258
+ visibility?: string;
259
+ versionId?: string;
260
+ installUrl?: string;
261
+ pinnedInstallUrl?: string;
262
+ };
605
263
  }>;
606
- preserveActive?: boolean;
607
- rerun?: boolean;
608
- }
609
- export interface AuthoredWorkbenchCandidateRunSpec extends WorkbenchAuthoredAdapterSpec {
610
- name: string;
611
- }
612
- export interface WorkbenchCaseSelector {
613
- all?: true;
614
- split?: string;
615
- }
616
- export interface WorkbenchSelectionSpec {
617
- metric: string;
618
- cases?: WorkbenchCaseSelector;
619
- }
620
- export interface AuthoredWorkbenchCandidateImproveSpec extends WorkbenchAuthoredAdapterSpec {
621
- edits: string[];
622
- optimizeOn?: WorkbenchCaseSelector;
623
- selectBy?: WorkbenchSelectionSpec;
264
+ auth?: {
265
+ workbenchCloud: {
266
+ status: "authenticated" | "not_authenticated";
267
+ baseUrl?: string;
268
+ username?: string;
269
+ };
270
+ adapters: Array<{
271
+ adapter: string;
272
+ slot?: string;
273
+ profile: string;
274
+ status: string;
275
+ method?: string;
276
+ updatedAt?: string;
277
+ }>;
278
+ };
279
+ next: string[];
280
+ }
281
+ export interface WorkbenchComparisonCell {
282
+ versionId: string;
283
+ skillName: string;
284
+ skillBundleHash: string;
285
+ evalHash: string;
286
+ agentName: string;
287
+ agentHash: string;
288
+ runId?: string;
289
+ status?: WorkbenchRunStatus;
290
+ score?: number;
291
+ costUsd?: number;
292
+ latencyMs?: number;
293
+ error?: string;
624
294
  }
625
- export interface AuthoredWorkbenchCandidateSpec {
626
- name: string;
627
- description?: string;
628
- files: WorkbenchPathRef;
629
- prepare?: WorkbenchCandidatePrepareSpec;
630
- defaultRun?: string;
631
- selectedRunId?: string;
632
- runs: Record<string, AuthoredWorkbenchCandidateRunSpec>;
633
- improve?: AuthoredWorkbenchCandidateImproveSpec;
634
- }
635
- export interface WorkbenchCandidatePrepareSpec {
636
- command: string;
637
- }
638
- export interface WorkbenchPathRef {
295
+ export interface WorkbenchComparison {
296
+ evalHash?: string;
297
+ versions: WorkbenchVersion[];
298
+ skills: WorkbenchSkillBundleSnapshot[];
299
+ agents: WorkbenchAgentSnapshot[];
300
+ cells: WorkbenchComparisonCell[];
301
+ }
302
+ export interface WorkbenchInspectionSnapshot {
303
+ root: string;
304
+ status: WorkbenchStatus;
305
+ versions: WorkbenchVersion[];
306
+ skillSources: WorkbenchSkillSource[];
307
+ skillBundles: WorkbenchSkillBundleSnapshot[];
308
+ evals: WorkbenchEvalSnapshot[];
309
+ agents: WorkbenchAgentSnapshot[];
310
+ comparison?: WorkbenchComparison;
311
+ runs: WorkbenchRun[];
312
+ jobs: WorkbenchJob[];
313
+ traces: WorkbenchTrace[];
314
+ executionEvents: WorkbenchExecutionEventBatch[];
315
+ artifacts: WorkbenchArtifact[];
316
+ lineage: WorkbenchLineageEdge[];
317
+ remotes: WorkbenchRemote[];
318
+ refs: WorkbenchRefs;
319
+ publication?: WorkbenchPublication;
320
+ }
321
+ export interface WorkbenchPublication {
322
+ versionId: string;
323
+ installUrl: string;
324
+ pinnedInstallUrl: string;
325
+ }
326
+ export interface WorkbenchObjectPack {
327
+ schema: "workbench.object-pack.v1";
328
+ createdAt: string;
329
+ refs: WorkbenchRefs;
330
+ versions: WorkbenchVersion[];
331
+ skillSources: WorkbenchSkillSource[];
332
+ skillBundles: WorkbenchSkillBundleSnapshot[];
333
+ evals: WorkbenchEvalSnapshot[];
334
+ agents: WorkbenchAgent[];
335
+ runs: WorkbenchRun[];
336
+ jobs: WorkbenchJob[];
337
+ traces: WorkbenchTrace[];
338
+ executionEvents: WorkbenchExecutionEventBatch[];
339
+ artifacts: WorkbenchArtifact[];
340
+ lineage: WorkbenchLineageEdge[];
341
+ }
342
+ export interface WorkbenchFilePreview {
639
343
  path: string;
344
+ source?: SurfaceSnapshotFile;
345
+ renderedText?: string;
346
+ diff?: string;
640
347
  }
641
- export interface WorkbenchAuthoredAdapterSpec {
642
- use: string;
643
- auth?: string | Record<string, string>;
644
- with?: Record<string, Json>;
645
- }
646
- export interface AuthoredWorkbenchRuntimeSpec {
647
- dockerfile: string;
648
- resources?: {
649
- cpu?: number;
650
- memoryGb?: number;
651
- diskGb?: number;
652
- timeoutMinutes?: number;
653
- };
654
- network?: {
655
- egress?: "none" | "open";
656
- };
348
+ export interface WorkbenchFileSurface {
349
+ files: SurfaceSnapshotFile[];
350
+ preview: WorkbenchFilePreview | null;
657
351
  }
658
- export type AuthoredWorkbenchImproveSpec = WorkbenchAuthoredAdapterSpec;
659
- export type AuthoredWorkbenchRunSpec = WorkbenchAuthoredAdapterSpec;
660
- export type AuthoredWorkbenchScoreSpec = WorkbenchAuthoredAdapterSpec;
661
- export interface AuthoredWorkbenchEngineConfig {
662
- tasks?: WorkbenchAuthoredAdapterSpec;
663
- environment: AuthoredWorkbenchRuntimeSpec;
664
- score: AuthoredWorkbenchScoreSpec;
352
+ export interface WorkbenchSpecValidation {
353
+ ok: boolean;
354
+ errors: string[];
355
+ warnings: string[];
665
356
  }
666
- export interface AuthoredWorkbenchEngineSpec {
667
- use: string;
668
- auth?: string | Record<string, string>;
669
- with?: AuthoredWorkbenchEngineConfig | Record<string, Json>;
357
+ export interface RemoteWorkbenchEnvironmentSpec {
358
+ base: string;
359
+ resources: {
360
+ cpu: number;
361
+ memoryGb: number;
362
+ diskGb: number;
363
+ timeoutMinutes: number;
364
+ };
365
+ network: "off" | "on";
670
366
  }
671
- export interface AuthoredWorkbenchBenchmarkSpec {
367
+ export interface RemoteWorkbenchEnvironmentVersion {
368
+ id: string;
369
+ environmentId: string;
672
370
  name: string;
673
- description: string;
674
- engine: AuthoredWorkbenchEngineSpec;
371
+ spec: RemoteWorkbenchEnvironmentSpec;
372
+ imageRef: string;
373
+ sourceHash: string;
374
+ sourceType: "builtin" | "dockerfile";
375
+ build?: {
376
+ dockerfileRef?: BlobObjectRef;
377
+ logRef?: BlobObjectRef;
378
+ error?: string;
379
+ startedAt?: string;
380
+ finishedAt?: string;
381
+ };
382
+ status: "ready" | "building" | "failed";
383
+ createdAt: string;
384
+ updatedAt: string;
675
385
  }
676
- export interface AuthoredWorkbenchSourceSpec {
677
- version: 4;
678
- benchmark: AuthoredWorkbenchBenchmarkSpec;
679
- candidate: AuthoredWorkbenchCandidateSpec;
386
+ export interface EngineResolveBinding {
387
+ engine: string;
388
+ resolver: {
389
+ use: string;
390
+ withFingerprint: string;
391
+ };
680
392
  }
681
393
  export type WorkbenchExecutionPurpose = "improve" | "attempt";
682
394
  export type WorkbenchSandboxTemplateKind = "snapshot" | "oci";
@@ -704,11 +416,11 @@ export interface WorkbenchSandboxAllocation {
704
416
  }
705
417
  export interface WorkbenchExecutionCapability {
706
418
  executionId: string;
707
- candidate: {
419
+ skill: {
708
420
  tenantId: string;
709
421
  projectId: string;
710
422
  runId: string;
711
- candidateId?: string;
423
+ versionId?: string;
712
424
  };
713
425
  inputs: WorkbenchExecutionInputRef[];
714
426
  outputPrefix: string;
@@ -735,7 +447,7 @@ export interface WorkbenchExecutionInputRef {
735
447
  mountPath: string;
736
448
  writable: boolean;
737
449
  }
738
- export type WorkbenchExecutionOutputSchema = "workbench.candidate_patch.v1" | "workbench.result.v1" | string;
450
+ export type WorkbenchExecutionOutputSchema = "workbench.skill_patch.v1" | "workbench.result.v1" | string;
739
451
  export interface WorkbenchExecutionOutputContract {
740
452
  name: string;
741
453
  schema: WorkbenchExecutionOutputSchema;
@@ -761,7 +473,7 @@ export interface WorkbenchExecutionSpec {
761
473
  id: string;
762
474
  projectId: string;
763
475
  runId: string;
764
- candidateId?: string;
476
+ versionId?: string;
765
477
  purpose: WorkbenchExecutionPurpose;
766
478
  adapter: WorkbenchAdapterInvocation;
767
479
  sandbox: WorkbenchSandboxTemplate;
@@ -770,12 +482,87 @@ export interface WorkbenchExecutionSpec {
770
482
  policy: WorkbenchExecutionPolicy;
771
483
  metadata: Record<string, Json>;
772
484
  }
773
- export interface WorkbenchCandidatePatch {
485
+ export interface BlobObjectRef {
486
+ bucket: string;
487
+ key: string;
488
+ byteLength: number;
489
+ sha256: string;
490
+ }
491
+ export interface WorkbenchSkillPatch {
774
492
  files: SurfaceSnapshotFile[];
775
493
  fileChanges: string[];
776
494
  summary?: string;
777
495
  feedback?: Json;
778
496
  }
497
+ export interface WorkbenchCaseCriterionScore {
498
+ criterion_id: string;
499
+ label: string;
500
+ score: number;
501
+ pass: boolean;
502
+ errors?: string[];
503
+ rationale?: string;
504
+ }
505
+ export interface MetricStats {
506
+ count: number;
507
+ mean: number;
508
+ variance: number;
509
+ stddev: number;
510
+ min: number;
511
+ max: number;
512
+ }
513
+ export type EvalCaseStatus = "completed" | "error";
514
+ export type EvalCaseSource = Record<string, Json>;
515
+ export interface EvalCaseResult {
516
+ id: string;
517
+ label?: string;
518
+ split?: string;
519
+ status?: EvalCaseStatus;
520
+ durationMs?: number;
521
+ metrics: Record<string, number>;
522
+ source?: EvalCaseSource;
523
+ feedback?: Json;
524
+ criteria?: WorkbenchCaseCriterionScore[];
525
+ }
526
+ export type ExecutionRole = "improver" | "runner" | "engine";
527
+ export type ExecutionUsageCostSource = "provider" | "estimated" | "mixed";
528
+ export interface ExecutionUsage {
529
+ provider?: string;
530
+ model?: string;
531
+ inputTokens?: number;
532
+ uncachedInputTokens?: number;
533
+ cachedInputTokens?: number;
534
+ cacheCreationInputTokens?: number;
535
+ cacheReadInputTokens?: number;
536
+ outputTokens?: number;
537
+ reasoningOutputTokens?: number;
538
+ totalTokens?: number;
539
+ costUsd?: number;
540
+ costSource?: ExecutionUsageCostSource;
541
+ pricingSource?: string;
542
+ }
543
+ export interface UsageSummary {
544
+ total?: ExecutionUsage;
545
+ improver?: ExecutionUsage;
546
+ runner?: ExecutionUsage;
547
+ engine?: ExecutionUsage;
548
+ }
549
+ export interface EvaluationUsageStats {
550
+ total?: ExecutionUsageStats;
551
+ improver?: ExecutionUsageStats;
552
+ runner?: ExecutionUsageStats;
553
+ engine?: ExecutionUsageStats;
554
+ }
555
+ export interface ExecutionUsageStats {
556
+ inputTokens?: MetricStats;
557
+ uncachedInputTokens?: MetricStats;
558
+ cachedInputTokens?: MetricStats;
559
+ cacheCreationInputTokens?: MetricStats;
560
+ cacheReadInputTokens?: MetricStats;
561
+ reasoningOutputTokens?: MetricStats;
562
+ outputTokens?: MetricStats;
563
+ totalTokens?: MetricStats;
564
+ costUsd?: MetricStats;
565
+ }
779
566
  export interface WorkbenchResult {
780
567
  score: number;
781
568
  metrics?: Record<string, number>;
@@ -888,6 +675,24 @@ export interface WorkbenchTraceSession {
888
675
  trace: WorkbenchExecutionTrace;
889
676
  metadata?: Record<string, Json>;
890
677
  }
678
+ export type RemoteWorkbenchJobStatus = "queued" | "running" | "succeeded" | "failed" | "cancelled";
679
+ export type RemoteWorkbenchJobKind = "execute";
680
+ export interface RemoteWorkbenchJob {
681
+ id: string;
682
+ projectId: string;
683
+ runId: string;
684
+ versionId?: string;
685
+ kind: RemoteWorkbenchJobKind;
686
+ status: RemoteWorkbenchJobStatus;
687
+ attempt: number;
688
+ createdAt: string;
689
+ updatedAt: string;
690
+ startedAt?: string;
691
+ finishedAt?: string;
692
+ input: Json;
693
+ output?: Json;
694
+ error?: string;
695
+ }
891
696
  export interface WorkbenchExecutionEvidence {
892
697
  id: string;
893
698
  kind: string;
@@ -896,7 +701,7 @@ export interface WorkbenchExecutionEvidence {
896
701
  status: RemoteWorkbenchJobStatus;
897
702
  jobIds: string[];
898
703
  executionIds: string[];
899
- candidateId?: string;
704
+ versionId?: string;
900
705
  caseId?: string;
901
706
  sampleIndex?: number;
902
707
  attemptIndex?: number;
@@ -908,44 +713,6 @@ export interface WorkbenchExecutionTraceDetail {
908
713
  runId: string;
909
714
  executions: WorkbenchExecutionEvidence[];
910
715
  }
911
- export interface AuthoredWorkbenchCaseSummary {
912
- id: string;
913
- slug: string;
914
- path: string;
915
- name: string;
916
- split?: string;
917
- fileCount: number;
918
- }
919
- export interface AuthoredWorkbenchSourceFile {
920
- path: string;
921
- content: string;
922
- }
923
- export interface AuthoredWorkbenchSourceDocument {
924
- path: string;
925
- exists: boolean;
926
- source_yaml: string;
927
- source_files: AuthoredWorkbenchSourceFile[];
928
- spec: AuthoredWorkbenchSourceSpec | null;
929
- cases: AuthoredWorkbenchCaseSummary[];
930
- }
931
- export type RemoteWorkbenchJobStatus = "queued" | "running" | "succeeded" | "failed" | "cancelled";
932
- export type RemoteWorkbenchJobKind = "execute";
933
- export interface RemoteWorkbenchJob {
934
- id: string;
935
- projectId: string;
936
- runId: string;
937
- candidateId?: string;
938
- kind: RemoteWorkbenchJobKind;
939
- status: RemoteWorkbenchJobStatus;
940
- attempt: number;
941
- createdAt: string;
942
- updatedAt: string;
943
- startedAt?: string;
944
- finishedAt?: string;
945
- input: Json;
946
- output?: Json;
947
- error?: string;
948
- }
949
716
  export interface WorkbenchRemoteJobClaimRequest {
950
717
  schema: "workbench.remote.job.claim_request.v1";
951
718
  ownerUserId: string;
@@ -1015,32 +782,6 @@ export interface WorkbenchRemoteJobRetry {
1015
782
  leaseToken: string;
1016
783
  reason: string;
1017
784
  }
1018
- export interface RemoteWorkbenchRun extends WorkbenchRuntimeRun {
1019
- projectId: string;
1020
- environmentVersionId?: string;
1021
- specVersionId: string;
1022
- candidateId: string | null;
1023
- activeCandidateId?: string | null;
1024
- outputCandidateId?: string | null;
1025
- input: {
1026
- benchmarkFingerprint: string;
1027
- candidateFingerprint: string;
1028
- baseCandidateId: string | null;
1029
- payerUserId?: string;
1030
- candidateOwnerUserId?: string;
1031
- candidateOwnerUsername?: string;
1032
- preserveActiveCandidateId?: string | null;
1033
- selectedSamples?: Array<{
1034
- caseId: string;
1035
- sampleIndex: number;
1036
- }>;
1037
- sourceYaml?: string;
1038
- candidateSourceFiles?: SurfaceSnapshotFile[];
1039
- baseFiles: SurfaceSnapshotFile[];
1040
- engineResolveFiles: SurfaceSnapshotFile[];
1041
- };
1042
- jobCount: number;
1043
- completedJobCount: number;
1044
- failedJobCount: number;
1045
- }
785
+ export declare function isReservedWorkbenchAdapterAuthEnvName(name: string): boolean;
786
+ export declare function assertWorkbenchAdapterAuthEnvNameAllowed(name: string): void;
1046
787
  //# sourceMappingURL=index.d.ts.map