@workbench-ai/workbench-contract 0.0.66 → 0.0.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +365 -702
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +38 -201
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,683 +1,317 @@
|
|
|
1
1
|
export type Json = null | boolean | number | string | Json[] | {
|
|
2
2
|
[key: string]: Json;
|
|
3
3
|
};
|
|
4
|
-
export declare function isReservedWorkbenchAdapterAuthEnvName(name: string): boolean;
|
|
5
|
-
export declare function assertWorkbenchAdapterAuthEnvNameAllowed(name: string): void;
|
|
6
|
-
export interface RemoteWorkbenchProject {
|
|
7
|
-
id: string;
|
|
8
|
-
ownerUserId: string;
|
|
9
|
-
ownerUsername: string;
|
|
10
|
-
visibility: "private" | "public";
|
|
11
|
-
createdAt: string;
|
|
12
|
-
updatedAt: string;
|
|
13
|
-
activeEnvironmentVersionId: string;
|
|
14
|
-
currentSpecVersionId: string;
|
|
15
|
-
activeCandidateId?: string | null;
|
|
16
|
-
sourceFingerprint?: string;
|
|
17
|
-
starCount: number;
|
|
18
|
-
}
|
|
19
|
-
export interface RemoteWorkbenchProjectSummary {
|
|
20
|
-
id: string;
|
|
21
|
-
ownerUsername: string;
|
|
22
|
-
name: string;
|
|
23
|
-
description: string;
|
|
24
|
-
visibility: "private" | "public";
|
|
25
|
-
updatedAt: string;
|
|
26
|
-
currentSpecVersionId: string;
|
|
27
|
-
activeEnvironmentVersionId: string;
|
|
28
|
-
activeCandidateId?: string | null;
|
|
29
|
-
candidateCount: number;
|
|
30
|
-
evaluationCount: number;
|
|
31
|
-
runCount: number;
|
|
32
|
-
starCount: number;
|
|
33
|
-
viewerHasStarred?: boolean;
|
|
34
|
-
}
|
|
35
|
-
export interface WorkbenchSpecValidation {
|
|
36
|
-
ok: boolean;
|
|
37
|
-
errors: string[];
|
|
38
|
-
warnings: string[];
|
|
39
|
-
}
|
|
40
|
-
export interface RemoteWorkbenchSpecVersion {
|
|
41
|
-
id: string;
|
|
42
|
-
projectId: string;
|
|
43
|
-
ordinal: number;
|
|
44
|
-
sourceYaml: string;
|
|
45
|
-
createdAt: string;
|
|
46
|
-
updatedAt: string;
|
|
47
|
-
validation: WorkbenchSpecValidation;
|
|
48
|
-
}
|
|
49
|
-
export interface RemoteWorkbenchEnvironment {
|
|
50
|
-
id: string;
|
|
51
|
-
name: string;
|
|
52
|
-
description: string;
|
|
53
|
-
currentVersionId: string;
|
|
54
|
-
builtIn: boolean;
|
|
55
|
-
createdAt: string;
|
|
56
|
-
updatedAt: string;
|
|
57
|
-
}
|
|
58
|
-
export interface RemoteWorkbenchEnvironmentVersion {
|
|
59
|
-
id: string;
|
|
60
|
-
environmentId: string;
|
|
61
|
-
name: string;
|
|
62
|
-
spec: RemoteWorkbenchEnvironmentSpec;
|
|
63
|
-
imageRef: string;
|
|
64
|
-
sourceHash: string;
|
|
65
|
-
sourceType: "builtin" | "dockerfile";
|
|
66
|
-
build?: {
|
|
67
|
-
dockerfileRef?: BlobObjectRef;
|
|
68
|
-
logRef?: BlobObjectRef;
|
|
69
|
-
error?: string;
|
|
70
|
-
startedAt?: string;
|
|
71
|
-
finishedAt?: string;
|
|
72
|
-
};
|
|
73
|
-
status: "ready" | "building" | "failed";
|
|
74
|
-
createdAt: string;
|
|
75
|
-
updatedAt: string;
|
|
76
|
-
}
|
|
77
|
-
export interface RemoteWorkbenchEnvironmentSpec {
|
|
78
|
-
base: string;
|
|
79
|
-
resources: {
|
|
80
|
-
cpu: number;
|
|
81
|
-
memoryGb: number;
|
|
82
|
-
diskGb: number;
|
|
83
|
-
timeoutMinutes: number;
|
|
84
|
-
};
|
|
85
|
-
network: "off" | "on";
|
|
86
|
-
}
|
|
87
|
-
export interface BlobObjectRef {
|
|
88
|
-
bucket: string;
|
|
89
|
-
key: string;
|
|
90
|
-
byteLength: number;
|
|
91
|
-
sha256: string;
|
|
92
|
-
}
|
|
93
|
-
export type RemoteWorkbenchSnapshotKind = "candidate" | "engineResolve" | "adapters" | "runtime";
|
|
94
|
-
export type WorkspaceWriteEncoding = "utf8" | "base64";
|
|
95
4
|
export interface SurfaceSnapshotFile {
|
|
96
5
|
path: string;
|
|
97
|
-
kind
|
|
98
|
-
encoding
|
|
6
|
+
kind?: "text" | "binary";
|
|
7
|
+
encoding?: "utf8" | "base64";
|
|
99
8
|
content: string;
|
|
100
|
-
executable
|
|
101
|
-
contentRedacted?: boolean;
|
|
102
|
-
}
|
|
103
|
-
export interface WorkbenchEngineCaseFiles {
|
|
104
|
-
public?: SurfaceSnapshotFile[];
|
|
105
|
-
private?: SurfaceSnapshotFile[];
|
|
106
|
-
source?: SurfaceSnapshotFile[];
|
|
107
|
-
}
|
|
108
|
-
export interface SurfaceSnapshot {
|
|
109
|
-
files: SurfaceSnapshotFile[];
|
|
9
|
+
executable?: boolean;
|
|
110
10
|
}
|
|
111
|
-
export
|
|
11
|
+
export type WorkbenchInspectionFileOwnerKind = "version" | "trace" | "artifact";
|
|
12
|
+
export interface WorkbenchInspectionFileContent {
|
|
112
13
|
path: string;
|
|
113
|
-
|
|
114
|
-
encoding?:
|
|
14
|
+
kind?: SurfaceSnapshotFile["kind"];
|
|
15
|
+
encoding?: SurfaceSnapshotFile["encoding"];
|
|
115
16
|
executable?: boolean;
|
|
17
|
+
content?: string;
|
|
18
|
+
unavailableReason?: string;
|
|
116
19
|
}
|
|
117
|
-
export
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
20
|
+
export declare function workbenchInspectionFileContentUnavailableReason(file: Pick<SurfaceSnapshotFile, "kind" | "encoding">): string | null;
|
|
21
|
+
export declare function workbenchInspectionFileContent(file: SurfaceSnapshotFile): WorkbenchInspectionFileContent;
|
|
22
|
+
export declare function workbenchInspectionFileManifest(file: SurfaceSnapshotFile): SurfaceSnapshotFile;
|
|
23
|
+
export interface WorkbenchAgent {
|
|
24
|
+
name: string;
|
|
25
|
+
adapter: string;
|
|
26
|
+
model?: string;
|
|
27
|
+
config: Record<string, Json>;
|
|
123
28
|
}
|
|
124
|
-
export
|
|
29
|
+
export type WorkbenchSkillSourceKind = "local" | "remote";
|
|
30
|
+
export interface WorkbenchSkillInclude {
|
|
31
|
+
name: string;
|
|
32
|
+
kind: WorkbenchSkillSourceKind;
|
|
33
|
+
path?: string;
|
|
34
|
+
from?: string;
|
|
35
|
+
ref?: string;
|
|
36
|
+
resolvedRef?: string;
|
|
37
|
+
hash?: string;
|
|
38
|
+
files?: SurfaceSnapshotFile[];
|
|
39
|
+
}
|
|
40
|
+
export interface WorkbenchSkillSource {
|
|
41
|
+
name: string;
|
|
42
|
+
kind: WorkbenchSkillSourceKind;
|
|
43
|
+
path?: string;
|
|
44
|
+
from?: string;
|
|
45
|
+
ref?: string;
|
|
46
|
+
resolvedRef?: string;
|
|
47
|
+
hash?: string;
|
|
48
|
+
includes?: WorkbenchSkillInclude[];
|
|
49
|
+
}
|
|
50
|
+
export interface WorkbenchSkillBundleSnapshot {
|
|
51
|
+
hash: string;
|
|
52
|
+
skillName: string;
|
|
53
|
+
entryName: string;
|
|
54
|
+
source: WorkbenchSkillSource;
|
|
125
55
|
files: SurfaceSnapshotFile[];
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
export interface RemoteWorkbenchEngineResolveSnapshot extends RemoteWorkbenchSnapshotBase {
|
|
129
|
-
kind: "engineResolve";
|
|
130
|
-
engineResolveBinding: EngineResolveBinding;
|
|
131
|
-
}
|
|
132
|
-
export interface RemoteWorkbenchStandardSnapshot extends RemoteWorkbenchSnapshotBase {
|
|
133
|
-
kind: Exclude<RemoteWorkbenchSnapshotKind, "engineResolve">;
|
|
134
|
-
}
|
|
135
|
-
export type RemoteWorkbenchSnapshot = RemoteWorkbenchEngineResolveSnapshot | RemoteWorkbenchStandardSnapshot;
|
|
136
|
-
export type CandidateStatus = "running" | "evaluated" | "repair_exhausted" | "eval_error" | "agent_error";
|
|
137
|
-
export interface MetricStats {
|
|
138
|
-
count: number;
|
|
139
|
-
mean: number;
|
|
140
|
-
variance: number;
|
|
141
|
-
stddev: number;
|
|
142
|
-
min: number;
|
|
143
|
-
max: number;
|
|
144
|
-
}
|
|
145
|
-
export type EvalCaseStatus = "completed" | "error";
|
|
146
|
-
export type EvalCaseSource = Record<string, Json>;
|
|
147
|
-
export interface CandidateCaseCriterionScore {
|
|
148
|
-
criterion_id: string;
|
|
149
|
-
label: string;
|
|
150
|
-
score: number;
|
|
151
|
-
pass: boolean;
|
|
152
|
-
errors?: string[];
|
|
153
|
-
rationale?: string;
|
|
56
|
+
includedSkills: WorkbenchSkillInclude[];
|
|
57
|
+
createdAt: string;
|
|
154
58
|
}
|
|
155
|
-
export interface
|
|
59
|
+
export interface WorkbenchVersion {
|
|
156
60
|
id: string;
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
source?: EvalCaseSource;
|
|
163
|
-
feedback?: Json;
|
|
164
|
-
criteria?: CandidateCaseCriterionScore[];
|
|
165
|
-
}
|
|
166
|
-
export type ExecutionRole = "improver" | "runner" | "engine";
|
|
167
|
-
export type ExecutionUsageCostSource = "provider" | "estimated" | "mixed";
|
|
168
|
-
export interface ExecutionUsage {
|
|
169
|
-
provider?: string;
|
|
170
|
-
model?: string;
|
|
171
|
-
inputTokens?: number;
|
|
172
|
-
uncachedInputTokens?: number;
|
|
173
|
-
cachedInputTokens?: number;
|
|
174
|
-
cacheCreationInputTokens?: number;
|
|
175
|
-
cacheReadInputTokens?: number;
|
|
176
|
-
outputTokens?: number;
|
|
177
|
-
reasoningOutputTokens?: number;
|
|
178
|
-
totalTokens?: number;
|
|
179
|
-
costUsd?: number;
|
|
180
|
-
costSource?: ExecutionUsageCostSource;
|
|
181
|
-
pricingSource?: string;
|
|
182
|
-
}
|
|
183
|
-
export interface UsageSummary {
|
|
184
|
-
total?: ExecutionUsage;
|
|
185
|
-
improver?: ExecutionUsage;
|
|
186
|
-
runner?: ExecutionUsage;
|
|
187
|
-
engine?: ExecutionUsage;
|
|
61
|
+
hash: string;
|
|
62
|
+
message: string;
|
|
63
|
+
parentIds: string[];
|
|
64
|
+
createdAt: string;
|
|
65
|
+
files: SurfaceSnapshotFile[];
|
|
188
66
|
}
|
|
189
|
-
export interface
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
67
|
+
export interface WorkbenchEvalSnapshot {
|
|
68
|
+
hash: string;
|
|
69
|
+
files: SurfaceSnapshotFile[];
|
|
70
|
+
caseCount: number;
|
|
193
71
|
}
|
|
194
|
-
export type
|
|
195
|
-
export type
|
|
196
|
-
export
|
|
72
|
+
export type WorkbenchRunKind = "eval" | "improve" | "compare" | "retry";
|
|
73
|
+
export type WorkbenchRunStatus = "running" | "succeeded" | "failed" | "canceled";
|
|
74
|
+
export type WorkbenchJobStatus = "queued" | "running" | "succeeded" | "failed" | "canceled";
|
|
75
|
+
export type WorkbenchArtifactKind = "file" | "directory" | "log" | "scorecard";
|
|
76
|
+
export interface WorkbenchRun {
|
|
197
77
|
id: string;
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
78
|
+
kind: WorkbenchRunKind;
|
|
79
|
+
versionId: string;
|
|
80
|
+
skillName: string;
|
|
81
|
+
skillBundleHash: string;
|
|
82
|
+
evalHash: string;
|
|
83
|
+
agentName: string;
|
|
84
|
+
agentHash: string;
|
|
85
|
+
status: WorkbenchRunStatus;
|
|
86
|
+
score?: number;
|
|
87
|
+
costUsd?: number;
|
|
88
|
+
latencyMs?: number;
|
|
89
|
+
jobIds?: string[];
|
|
90
|
+
traceIds: string[];
|
|
91
|
+
createdAt: string;
|
|
202
92
|
finishedAt?: string;
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
usage?: UsageSummary;
|
|
93
|
+
parentRunId?: string;
|
|
94
|
+
outputVersionId?: string;
|
|
206
95
|
error?: string;
|
|
207
|
-
cases?: EvalCaseResult[];
|
|
208
|
-
feedback?: Json;
|
|
209
|
-
}
|
|
210
|
-
export interface EvaluationCaseStats {
|
|
211
|
-
id: string;
|
|
212
|
-
label?: string;
|
|
213
|
-
split?: string;
|
|
214
|
-
status?: EvalCaseStatus;
|
|
215
|
-
sampleCount: number;
|
|
216
|
-
metrics: Record<string, MetricStats>;
|
|
217
|
-
durationMs?: MetricStats;
|
|
218
96
|
}
|
|
219
|
-
export
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
cachedInputTokens?: MetricStats;
|
|
229
|
-
cacheCreationInputTokens?: MetricStats;
|
|
230
|
-
cacheReadInputTokens?: MetricStats;
|
|
231
|
-
outputTokens?: MetricStats;
|
|
232
|
-
reasoningOutputTokens?: MetricStats;
|
|
233
|
-
totalTokens?: MetricStats;
|
|
234
|
-
costUsd?: MetricStats;
|
|
97
|
+
export type WorkbenchAutomationReadinessLevel = "insufficient" | "assist" | "review" | "automate";
|
|
98
|
+
export interface WorkbenchAutomationReadiness {
|
|
99
|
+
level: WorkbenchAutomationReadinessLevel;
|
|
100
|
+
label: string;
|
|
101
|
+
reason: string;
|
|
102
|
+
runId?: string;
|
|
103
|
+
score?: number;
|
|
104
|
+
caseCount?: number;
|
|
105
|
+
jobCount?: number;
|
|
235
106
|
}
|
|
236
|
-
export interface
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
107
|
+
export interface WorkbenchJob {
|
|
108
|
+
id: string;
|
|
109
|
+
runId: string;
|
|
110
|
+
kind: WorkbenchRunKind;
|
|
111
|
+
versionId: string;
|
|
112
|
+
skillName: string;
|
|
113
|
+
skillBundleHash: string;
|
|
114
|
+
evalHash: string;
|
|
115
|
+
agentName: string;
|
|
116
|
+
agentHash: string;
|
|
117
|
+
caseId: string;
|
|
118
|
+
sample: number;
|
|
119
|
+
status: WorkbenchJobStatus;
|
|
120
|
+
score?: number;
|
|
121
|
+
command?: string;
|
|
122
|
+
dockerImage?: string;
|
|
123
|
+
exitCode?: number;
|
|
124
|
+
artifactIds: string[];
|
|
125
|
+
traceIds: string[];
|
|
126
|
+
createdAt: string;
|
|
242
127
|
startedAt?: string;
|
|
243
128
|
finishedAt?: string;
|
|
244
|
-
|
|
245
|
-
durationMs?: MetricStats;
|
|
246
|
-
usage?: EvaluationUsageStats;
|
|
247
|
-
cases?: EvaluationCaseStats[];
|
|
248
|
-
samples: EvaluationSampleRecord[];
|
|
129
|
+
durationMs?: number;
|
|
249
130
|
error?: string;
|
|
250
131
|
}
|
|
251
|
-
export interface
|
|
132
|
+
export interface WorkbenchArtifact {
|
|
252
133
|
id: string;
|
|
253
134
|
runId: string;
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
candidateName?: string;
|
|
258
|
-
candidateVersion: number;
|
|
259
|
-
candidateRunId?: string;
|
|
260
|
-
candidateRunName?: string;
|
|
135
|
+
jobId: string;
|
|
136
|
+
kind: WorkbenchArtifactKind;
|
|
137
|
+
path: string;
|
|
261
138
|
createdAt: string;
|
|
262
|
-
|
|
263
|
-
status: EvaluationStatus;
|
|
264
|
-
sampleCount: number;
|
|
265
|
-
completedSampleCount: number;
|
|
266
|
-
errorSampleCount: number;
|
|
267
|
-
metrics?: Record<string, MetricStats>;
|
|
268
|
-
selectionMetric?: string;
|
|
269
|
-
selectionLabel?: string;
|
|
270
|
-
selectionScore?: MetricStats;
|
|
271
|
-
durationMs?: MetricStats;
|
|
272
|
-
usage?: EvaluationUsageStats;
|
|
273
|
-
error?: string;
|
|
274
|
-
}
|
|
275
|
-
export interface EvaluationScorecard extends EvaluationSummary {
|
|
276
|
-
evaluation: EvaluationRecord;
|
|
139
|
+
files: SurfaceSnapshotFile[];
|
|
277
140
|
}
|
|
278
|
-
export interface
|
|
141
|
+
export interface WorkbenchTrace {
|
|
279
142
|
id: string;
|
|
280
|
-
label: string;
|
|
281
|
-
direction: "higher" | "lower";
|
|
282
|
-
kind: "number" | "duration_ms" | "currency_usd";
|
|
283
|
-
group: "metric" | "execution" | "usage" | "other";
|
|
284
|
-
primary: boolean;
|
|
285
|
-
semanticRole?: "performance" | "speed" | "cost";
|
|
286
|
-
}
|
|
287
|
-
export interface WorkbenchEvaluationComparisonRow {
|
|
288
|
-
evaluationId: string;
|
|
289
143
|
runId: string;
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
metrics: Record<string, number>;
|
|
144
|
+
jobId?: string;
|
|
145
|
+
versionId: string;
|
|
146
|
+
skillName: string;
|
|
147
|
+
skillBundleHash: string;
|
|
148
|
+
agentName: string;
|
|
296
149
|
createdAt: string;
|
|
297
|
-
|
|
298
|
-
|
|
150
|
+
request: Json;
|
|
151
|
+
result: Json;
|
|
152
|
+
files: SurfaceSnapshotFile[];
|
|
299
153
|
}
|
|
300
|
-
export interface
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
scoredEvaluationCount: number;
|
|
306
|
-
bestEvaluationId: string | null;
|
|
307
|
-
bestScore: number | null;
|
|
308
|
-
meanScore: number | null;
|
|
309
|
-
}
|
|
310
|
-
export interface WorkbenchEvaluationComparison {
|
|
311
|
-
evaluations: EvaluationSummary[];
|
|
312
|
-
rows: WorkbenchEvaluationComparisonRow[];
|
|
313
|
-
candidates: WorkbenchCandidateEvaluationRollup[];
|
|
314
|
-
metrics: WorkbenchEvaluationMetricDescriptor[];
|
|
315
|
-
}
|
|
316
|
-
export interface CandidateSummary {
|
|
317
|
-
id: string;
|
|
318
|
-
name?: string;
|
|
319
|
-
version: number;
|
|
320
|
-
ordinal: number;
|
|
321
|
-
benchmarkFingerprint: string;
|
|
322
|
-
candidateFingerprint: string;
|
|
323
|
-
ownerUserId?: string;
|
|
324
|
-
ownerUsername?: string;
|
|
325
|
-
visibility?: "private" | "public";
|
|
154
|
+
export interface WorkbenchLineageEdge {
|
|
155
|
+
parentId: string;
|
|
156
|
+
childId: string;
|
|
157
|
+
runId?: string;
|
|
158
|
+
reason: "version" | "improve" | "switch" | "publish";
|
|
326
159
|
createdAt: string;
|
|
327
|
-
|
|
328
|
-
referenceIds: string[];
|
|
329
|
-
status: CandidateStatus;
|
|
330
|
-
fileChanges: string[];
|
|
331
|
-
usage?: UsageSummary;
|
|
332
|
-
}
|
|
333
|
-
export interface CandidateRecord extends CandidateSummary {
|
|
334
|
-
eval?: EvaluationRecord;
|
|
335
|
-
prompt?: string;
|
|
336
|
-
meta?: Json;
|
|
160
|
+
message?: string;
|
|
337
161
|
}
|
|
338
|
-
export interface
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
162
|
+
export interface WorkbenchRemote {
|
|
163
|
+
name: string;
|
|
164
|
+
url: string;
|
|
165
|
+
type: "workbench";
|
|
342
166
|
}
|
|
343
|
-
export interface
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
sourceId: string;
|
|
347
|
-
targetId: string;
|
|
348
|
-
}
|
|
349
|
-
export interface CandidateLineageGraph {
|
|
350
|
-
activeId: string | null;
|
|
351
|
-
nodes: CandidateLineageNode[];
|
|
352
|
-
edges: CandidateLineageEdge[];
|
|
353
|
-
}
|
|
354
|
-
export declare function buildCandidateLineage(args: {
|
|
355
|
-
summaries: readonly CandidateSummary[];
|
|
356
|
-
activeId: string | null;
|
|
357
|
-
}): CandidateLineageGraph;
|
|
358
|
-
export declare function buildWorkbenchEvaluationComparison(evaluations: readonly EvaluationSummary[]): WorkbenchEvaluationComparison;
|
|
359
|
-
export declare function buildWorkbenchEvaluationMetricDescriptors(evaluations: readonly EvaluationSummary[]): WorkbenchEvaluationMetricDescriptor[];
|
|
360
|
-
export declare function readEvaluationScore(evaluation: EvaluationSummary): number | null;
|
|
361
|
-
export declare function isCompleteEvaluationSummary(evaluation: Pick<EvaluationSummary, "status" | "sampleCount" | "completedSampleCount" | "errorSampleCount">): boolean;
|
|
362
|
-
export declare function formatEvaluationConfigurationLabel(evaluation: Pick<EvaluationSummary, "candidateRunName" | "candidateRunId">): string;
|
|
363
|
-
export type CandidatePreviewMode = "diff" | "raw" | "rendered";
|
|
364
|
-
export type CandidatePreviewKind = "text" | "markdown" | "table" | "spreadsheet" | "image" | "pdf" | "unsupported";
|
|
365
|
-
export type CandidatePreviewSourceEncoding = "utf8" | "base64";
|
|
366
|
-
export type CandidateFileStatus = "added" | "modified" | "unchanged";
|
|
367
|
-
export interface CandidateFileSummary {
|
|
368
|
-
path: string;
|
|
369
|
-
old_path: string | null;
|
|
370
|
-
status: CandidateFileStatus;
|
|
371
|
-
mime_type: string | null;
|
|
372
|
-
preview_kind: CandidatePreviewKind;
|
|
373
|
-
additions: number;
|
|
374
|
-
deletions: number;
|
|
375
|
-
}
|
|
376
|
-
export interface CandidateFilePreviewSource {
|
|
377
|
-
content: string;
|
|
378
|
-
encoding: CandidatePreviewSourceEncoding;
|
|
167
|
+
export interface WorkbenchRefs {
|
|
168
|
+
current?: string;
|
|
169
|
+
[name: string]: string | undefined;
|
|
379
170
|
}
|
|
380
|
-
export interface
|
|
171
|
+
export interface WorkbenchProjectState {
|
|
172
|
+
schema: "workbench.skill.state.v1";
|
|
173
|
+
root: string;
|
|
174
|
+
currentVersionId?: string;
|
|
175
|
+
refs: WorkbenchRefs;
|
|
176
|
+
remotes: Record<string, WorkbenchRemote>;
|
|
177
|
+
defaultSkill?: string;
|
|
178
|
+
defaultAgent?: string;
|
|
179
|
+
versions: WorkbenchVersion[];
|
|
180
|
+
skillSources: WorkbenchSkillSource[];
|
|
181
|
+
skillBundles: WorkbenchSkillBundleSnapshot[];
|
|
182
|
+
evals: WorkbenchEvalSnapshot[];
|
|
183
|
+
agents: WorkbenchAgent[];
|
|
184
|
+
runs: WorkbenchRun[];
|
|
185
|
+
jobs: WorkbenchJob[];
|
|
186
|
+
traces: WorkbenchTrace[];
|
|
187
|
+
artifacts: WorkbenchArtifact[];
|
|
188
|
+
lineage: WorkbenchLineageEdge[];
|
|
189
|
+
}
|
|
190
|
+
export interface WorkbenchStatus {
|
|
191
|
+
root: string;
|
|
192
|
+
initialized: boolean;
|
|
193
|
+
currentSkillHash?: string;
|
|
194
|
+
currentVersionId?: string;
|
|
195
|
+
hasUnversionedChanges: boolean;
|
|
196
|
+
defaultSkill?: string;
|
|
197
|
+
defaultAgent?: string;
|
|
198
|
+
versionCount: number;
|
|
199
|
+
skillCount: number;
|
|
200
|
+
agentCount: number;
|
|
201
|
+
runCount: number;
|
|
202
|
+
remoteCount: number;
|
|
203
|
+
pendingSyncCount?: number;
|
|
204
|
+
lastScore?: number;
|
|
205
|
+
automationReadiness?: WorkbenchAutomationReadiness;
|
|
206
|
+
}
|
|
207
|
+
export interface WorkbenchComparisonCell {
|
|
208
|
+
versionId: string;
|
|
209
|
+
skillName: string;
|
|
210
|
+
skillBundleHash: string;
|
|
211
|
+
evalHash: string;
|
|
212
|
+
agentName: string;
|
|
213
|
+
runId?: string;
|
|
214
|
+
score?: number;
|
|
215
|
+
costUsd?: number;
|
|
216
|
+
latencyMs?: number;
|
|
217
|
+
automationReadiness?: WorkbenchAutomationReadiness;
|
|
218
|
+
}
|
|
219
|
+
export interface WorkbenchComparison {
|
|
220
|
+
evalHash?: string;
|
|
221
|
+
versions: WorkbenchVersion[];
|
|
222
|
+
skills: WorkbenchSkillBundleSnapshot[];
|
|
223
|
+
agents: WorkbenchAgent[];
|
|
224
|
+
cells: WorkbenchComparisonCell[];
|
|
225
|
+
}
|
|
226
|
+
export interface WorkbenchInspectionSnapshot {
|
|
227
|
+
root: string;
|
|
228
|
+
status: WorkbenchStatus;
|
|
229
|
+
versions: WorkbenchVersion[];
|
|
230
|
+
skillSources: WorkbenchSkillSource[];
|
|
231
|
+
skillBundles: WorkbenchSkillBundleSnapshot[];
|
|
232
|
+
agents: WorkbenchAgent[];
|
|
233
|
+
runs: WorkbenchRun[];
|
|
234
|
+
jobs: WorkbenchJob[];
|
|
235
|
+
traces: WorkbenchTrace[];
|
|
236
|
+
artifacts: WorkbenchArtifact[];
|
|
237
|
+
lineage: WorkbenchLineageEdge[];
|
|
238
|
+
remotes: WorkbenchRemote[];
|
|
239
|
+
refs: WorkbenchRefs;
|
|
240
|
+
publication?: WorkbenchPublication;
|
|
241
|
+
}
|
|
242
|
+
export interface WorkbenchPublication {
|
|
243
|
+
versionId: string;
|
|
244
|
+
installUrl: string;
|
|
245
|
+
pinnedInstallUrl: string;
|
|
246
|
+
}
|
|
247
|
+
export interface WorkbenchObjectPack {
|
|
248
|
+
schema: "workbench.object-pack.v1";
|
|
249
|
+
createdAt: string;
|
|
250
|
+
refs: WorkbenchRefs;
|
|
251
|
+
defaultSkill?: string;
|
|
252
|
+
defaultAgent?: string;
|
|
253
|
+
versions: WorkbenchVersion[];
|
|
254
|
+
skillSources: WorkbenchSkillSource[];
|
|
255
|
+
skillBundles: WorkbenchSkillBundleSnapshot[];
|
|
256
|
+
evals: WorkbenchEvalSnapshot[];
|
|
257
|
+
agents: WorkbenchAgent[];
|
|
258
|
+
runs: WorkbenchRun[];
|
|
259
|
+
jobs: WorkbenchJob[];
|
|
260
|
+
traces: WorkbenchTrace[];
|
|
261
|
+
artifacts: WorkbenchArtifact[];
|
|
262
|
+
lineage: WorkbenchLineageEdge[];
|
|
263
|
+
}
|
|
264
|
+
export interface WorkbenchFilePreview {
|
|
381
265
|
path: string;
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
diff: string | null;
|
|
386
|
-
source: CandidateFilePreviewSource | null;
|
|
387
|
-
rendered_html: string | null;
|
|
388
|
-
}
|
|
389
|
-
export interface CandidateCaseCriterionResult {
|
|
390
|
-
criterion_id: string;
|
|
391
|
-
pass: boolean;
|
|
392
|
-
score: number;
|
|
393
|
-
errors: string[];
|
|
394
|
-
rationale?: string;
|
|
266
|
+
source?: SurfaceSnapshotFile;
|
|
267
|
+
renderedText?: string;
|
|
268
|
+
diff?: string;
|
|
395
269
|
}
|
|
396
|
-
export interface
|
|
397
|
-
runId: string;
|
|
398
|
-
kind: string;
|
|
399
|
-
role: WorkbenchExecutionEventRole;
|
|
400
|
-
status: RemoteWorkbenchJobStatus;
|
|
401
|
-
jobIds: string[];
|
|
402
|
-
executionIds: string[];
|
|
403
|
-
createdAt?: string;
|
|
404
|
-
startedAt?: string;
|
|
405
|
-
finishedAt?: string;
|
|
406
|
-
durationMs?: number;
|
|
407
|
-
caseId?: string;
|
|
408
|
-
sampleIndex?: number;
|
|
409
|
-
attemptIndex?: number;
|
|
410
|
-
}
|
|
411
|
-
export interface CandidateCaseReview {
|
|
412
|
-
candidateId: string;
|
|
413
|
-
caseId: string;
|
|
414
|
-
caseLabel: string;
|
|
415
|
-
sampleId?: string;
|
|
416
|
-
sampleIndex?: number;
|
|
417
|
-
status?: EvalCaseStatus | RemoteWorkbenchJobStatus;
|
|
418
|
-
metrics: Record<string, number>;
|
|
419
|
-
durationMs?: number;
|
|
420
|
-
source?: EvalCaseSource;
|
|
421
|
-
feedback?: Json;
|
|
422
|
-
executions: CandidateCaseExecutionRef[];
|
|
423
|
-
criteria_results: CandidateCaseCriterionResult[];
|
|
424
|
-
}
|
|
425
|
-
export type RunStatus = "queued" | "running" | "finished";
|
|
426
|
-
export type RunOutcome = "ok" | "error" | "cancelled";
|
|
427
|
-
export type RemoteRunWorkflow = "eval" | "improve";
|
|
428
|
-
export interface RunSummary {
|
|
429
|
-
id: string;
|
|
430
|
-
workflow: RemoteRunWorkflow;
|
|
431
|
-
benchmarkFingerprint: string;
|
|
432
|
-
status: RunStatus;
|
|
433
|
-
candidateId?: string | null;
|
|
434
|
-
candidateRunId?: string;
|
|
435
|
-
candidateRunName?: string;
|
|
436
|
-
startedAt: string;
|
|
437
|
-
finishedAt?: string;
|
|
438
|
-
durationMs?: number;
|
|
439
|
-
improver: string;
|
|
440
|
-
engineRun: string;
|
|
441
|
-
strategy: string;
|
|
442
|
-
optimizeOn?: string;
|
|
443
|
-
selectBy?: string;
|
|
444
|
-
budget: number;
|
|
445
|
-
repairBudget: number;
|
|
446
|
-
attemptsRequested: number;
|
|
447
|
-
attemptsExecuted: number;
|
|
448
|
-
samples: number;
|
|
449
|
-
executionFingerprint?: string;
|
|
450
|
-
stoppedReason?: "budget_exhausted" | "completed" | "dry_run" | "cancelled";
|
|
451
|
-
outcome?: RunOutcome;
|
|
452
|
-
error?: string;
|
|
453
|
-
activeCandidateId?: string | null;
|
|
454
|
-
outputCandidateId?: string | null;
|
|
455
|
-
}
|
|
456
|
-
export interface WorkbenchRuntimeRun extends RunSummary {
|
|
457
|
-
jobCount?: number;
|
|
458
|
-
completedJobCount?: number;
|
|
459
|
-
failedJobCount?: number;
|
|
460
|
-
}
|
|
461
|
-
export interface RuntimeEvent {
|
|
462
|
-
id: string;
|
|
463
|
-
at: string;
|
|
464
|
-
type: "run_started" | "job_queued" | "job_started" | "job_progress" | "sandbox_allocated" | "sandbox_stopped" | "candidate_created" | "candidate_evaluated" | "active_changed" | "run_finished";
|
|
465
|
-
runId?: string;
|
|
466
|
-
jobId?: string;
|
|
467
|
-
candidateId?: string;
|
|
468
|
-
baseId?: string;
|
|
469
|
-
activeId?: string;
|
|
470
|
-
status?: CandidateStatus | RemoteWorkbenchJobStatus;
|
|
471
|
-
metrics?: Record<string, number>;
|
|
472
|
-
detail?: Record<string, Json>;
|
|
473
|
-
}
|
|
474
|
-
export interface RuntimeSnapshot {
|
|
475
|
-
workspaceRoot: string;
|
|
476
|
-
activeId: string | null;
|
|
477
|
-
currentBenchmarkFingerprint: string | null;
|
|
478
|
-
summaries: CandidateSummary[];
|
|
479
|
-
evaluations: EvaluationSummary[];
|
|
480
|
-
runs: RunSummary[];
|
|
481
|
-
}
|
|
482
|
-
export interface WorkbenchRuntimeCandidateFiles {
|
|
483
|
-
candidateId: string;
|
|
270
|
+
export interface WorkbenchFileSurface {
|
|
484
271
|
files: SurfaceSnapshotFile[];
|
|
272
|
+
preview: WorkbenchFilePreview | null;
|
|
485
273
|
}
|
|
486
|
-
export interface
|
|
487
|
-
|
|
488
|
-
|
|
274
|
+
export interface WorkbenchSpecValidation {
|
|
275
|
+
ok: boolean;
|
|
276
|
+
errors: string[];
|
|
277
|
+
warnings: string[];
|
|
489
278
|
}
|
|
490
|
-
export interface
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
executionFiles: WorkbenchRuntimeExecutionFiles[];
|
|
499
|
-
events: RuntimeEvent[];
|
|
500
|
-
}
|
|
501
|
-
export interface WorkbenchRuntimeBundleStats {
|
|
502
|
-
candidates: number;
|
|
503
|
-
candidateFiles: number;
|
|
504
|
-
evaluations: number;
|
|
505
|
-
runs: number;
|
|
506
|
-
jobs: number;
|
|
507
|
-
executionFiles: number;
|
|
508
|
-
events: number;
|
|
509
|
-
activeId: string | null;
|
|
510
|
-
}
|
|
511
|
-
export interface WorkbenchRuntimeImportResult {
|
|
512
|
-
changed: boolean;
|
|
513
|
-
stats: WorkbenchRuntimeBundleStats;
|
|
514
|
-
}
|
|
515
|
-
export interface WorkbenchProjectSourceResources {
|
|
516
|
-
cpu?: number;
|
|
517
|
-
memoryGb?: number;
|
|
518
|
-
diskGb?: number;
|
|
519
|
-
timeoutMinutes?: number;
|
|
520
|
-
}
|
|
521
|
-
export interface WorkbenchProjectStateSource {
|
|
522
|
-
source: string;
|
|
523
|
-
files: SurfaceSnapshotFile[];
|
|
524
|
-
candidateFiles: SurfaceSnapshotFile[];
|
|
525
|
-
engineResolveFiles: SurfaceSnapshotFile[];
|
|
526
|
-
engineResolveBinding: EngineResolveBinding;
|
|
527
|
-
adapterFiles: SurfaceSnapshotFile[];
|
|
528
|
-
dockerfile: string;
|
|
529
|
-
runtimeDockerfile: string;
|
|
530
|
-
runtimeFiles: SurfaceSnapshotFile[];
|
|
279
|
+
export interface RemoteWorkbenchEnvironmentSpec {
|
|
280
|
+
base: string;
|
|
281
|
+
resources: {
|
|
282
|
+
cpu: number;
|
|
283
|
+
memoryGb: number;
|
|
284
|
+
diskGb: number;
|
|
285
|
+
timeoutMinutes: number;
|
|
286
|
+
};
|
|
531
287
|
network: "off" | "on";
|
|
532
|
-
resources: WorkbenchProjectSourceResources;
|
|
533
|
-
revisionId?: string;
|
|
534
|
-
fingerprint?: string;
|
|
535
|
-
}
|
|
536
|
-
export interface WorkbenchProjectStateBase {
|
|
537
|
-
sourceRevisionId?: string;
|
|
538
|
-
sourceFingerprint?: string;
|
|
539
|
-
runtimeFingerprint?: string;
|
|
540
288
|
}
|
|
541
|
-
export interface
|
|
289
|
+
export interface RemoteWorkbenchEnvironmentVersion {
|
|
542
290
|
id: string;
|
|
543
|
-
|
|
544
|
-
ownerUsername: string;
|
|
291
|
+
environmentId: string;
|
|
545
292
|
name: string;
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
changed: boolean;
|
|
557
|
-
source: {
|
|
558
|
-
changed: boolean;
|
|
559
|
-
revisionId?: string;
|
|
560
|
-
fingerprint?: string;
|
|
561
|
-
};
|
|
562
|
-
runtime: WorkbenchRuntimeImportResult;
|
|
563
|
-
state: WorkbenchProjectState;
|
|
564
|
-
}
|
|
565
|
-
export type WorkbenchRemoteContractSchema = "workbench.remote.capabilities.v1" | "workbench.remote.run.request.v1" | "workbench.remote.job.claim_request.v1" | "workbench.remote.job.claim.v1" | "workbench.remote.job.renewal.v1" | "workbench.remote.job.renewal_result.v1" | "workbench.remote.job.progress.v1" | "workbench.remote.job.completion.v1" | "workbench.remote.job.retry.v1";
|
|
566
|
-
export type WorkbenchRemoteProductionSandbox = "firecracker";
|
|
567
|
-
export type WorkbenchRemoteLocalSandbox = "docker";
|
|
568
|
-
export type WorkbenchRemoteNetworkPolicy = "open" | "none";
|
|
569
|
-
export interface WorkbenchRemoteCapabilities {
|
|
570
|
-
schema: "workbench.remote.capabilities.v1";
|
|
571
|
-
contractVersion: 1;
|
|
572
|
-
projectState: {
|
|
573
|
-
schema: WorkbenchProjectState["schema"];
|
|
574
|
-
guardedSourceWrites: true;
|
|
575
|
-
immutableRuntimeFacts: true;
|
|
576
|
-
};
|
|
577
|
-
execution: {
|
|
578
|
-
fencedJobLeases: true;
|
|
579
|
-
idempotentCompletion: true;
|
|
580
|
-
progressIsBestEffort: true;
|
|
581
|
-
maxJobsPerRun: number;
|
|
582
|
-
};
|
|
583
|
-
sandbox: {
|
|
584
|
-
production: WorkbenchRemoteProductionSandbox;
|
|
585
|
-
local: WorkbenchRemoteLocalSandbox;
|
|
586
|
-
networkPolicies: WorkbenchRemoteNetworkPolicy[];
|
|
587
|
-
};
|
|
588
|
-
blobs: {
|
|
589
|
-
contentAddressed: boolean;
|
|
590
|
-
maxUploadBytes: number;
|
|
293
|
+
spec: RemoteWorkbenchEnvironmentSpec;
|
|
294
|
+
imageRef: string;
|
|
295
|
+
sourceHash: string;
|
|
296
|
+
sourceType: "builtin" | "dockerfile";
|
|
297
|
+
build?: {
|
|
298
|
+
dockerfileRef?: BlobObjectRef;
|
|
299
|
+
logRef?: BlobObjectRef;
|
|
300
|
+
error?: string;
|
|
301
|
+
startedAt?: string;
|
|
302
|
+
finishedAt?: string;
|
|
591
303
|
};
|
|
304
|
+
status: "ready" | "building" | "failed";
|
|
305
|
+
createdAt: string;
|
|
306
|
+
updatedAt: string;
|
|
592
307
|
}
|
|
593
|
-
export interface
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
candidateId?: string;
|
|
599
|
-
sourceYaml?: string;
|
|
600
|
-
candidateFiles?: RemoteWorkbenchFileInput[];
|
|
601
|
-
adapterFiles?: RemoteWorkbenchFileInput[];
|
|
602
|
-
selectedSamples?: Array<{
|
|
603
|
-
caseId: string;
|
|
604
|
-
sampleIndex: number;
|
|
605
|
-
}>;
|
|
606
|
-
preserveActive?: boolean;
|
|
607
|
-
rerun?: boolean;
|
|
608
|
-
}
|
|
609
|
-
export interface AuthoredWorkbenchCandidateRunSpec extends WorkbenchAuthoredAdapterSpec {
|
|
610
|
-
name: string;
|
|
611
|
-
}
|
|
612
|
-
export interface WorkbenchCaseSelector {
|
|
613
|
-
all?: true;
|
|
614
|
-
split?: string;
|
|
615
|
-
}
|
|
616
|
-
export interface WorkbenchSelectionSpec {
|
|
617
|
-
metric: string;
|
|
618
|
-
cases?: WorkbenchCaseSelector;
|
|
619
|
-
}
|
|
620
|
-
export interface AuthoredWorkbenchCandidateImproveSpec extends WorkbenchAuthoredAdapterSpec {
|
|
621
|
-
edits: string[];
|
|
622
|
-
optimizeOn?: WorkbenchCaseSelector;
|
|
623
|
-
selectBy?: WorkbenchSelectionSpec;
|
|
624
|
-
}
|
|
625
|
-
export interface AuthoredWorkbenchCandidateSpec {
|
|
626
|
-
name: string;
|
|
627
|
-
description?: string;
|
|
628
|
-
files: WorkbenchPathRef;
|
|
629
|
-
prepare?: WorkbenchCandidatePrepareSpec;
|
|
630
|
-
defaultRun?: string;
|
|
631
|
-
selectedRunId?: string;
|
|
632
|
-
runs: Record<string, AuthoredWorkbenchCandidateRunSpec>;
|
|
633
|
-
improve?: AuthoredWorkbenchCandidateImproveSpec;
|
|
634
|
-
}
|
|
635
|
-
export interface WorkbenchCandidatePrepareSpec {
|
|
636
|
-
command: string;
|
|
637
|
-
}
|
|
638
|
-
export interface WorkbenchPathRef {
|
|
639
|
-
path: string;
|
|
640
|
-
}
|
|
641
|
-
export interface WorkbenchAuthoredAdapterSpec {
|
|
642
|
-
use: string;
|
|
643
|
-
auth?: string | Record<string, string>;
|
|
644
|
-
with?: Record<string, Json>;
|
|
645
|
-
}
|
|
646
|
-
export interface AuthoredWorkbenchRuntimeSpec {
|
|
647
|
-
dockerfile: string;
|
|
648
|
-
resources?: {
|
|
649
|
-
cpu?: number;
|
|
650
|
-
memoryGb?: number;
|
|
651
|
-
diskGb?: number;
|
|
652
|
-
timeoutMinutes?: number;
|
|
653
|
-
};
|
|
654
|
-
network?: {
|
|
655
|
-
egress?: "none" | "open";
|
|
308
|
+
export interface EngineResolveBinding {
|
|
309
|
+
engine: string;
|
|
310
|
+
resolver: {
|
|
311
|
+
use: string;
|
|
312
|
+
withFingerprint: string;
|
|
656
313
|
};
|
|
657
314
|
}
|
|
658
|
-
export type AuthoredWorkbenchImproveSpec = WorkbenchAuthoredAdapterSpec;
|
|
659
|
-
export type AuthoredWorkbenchRunSpec = WorkbenchAuthoredAdapterSpec;
|
|
660
|
-
export type AuthoredWorkbenchScoreSpec = WorkbenchAuthoredAdapterSpec;
|
|
661
|
-
export interface AuthoredWorkbenchEngineConfig {
|
|
662
|
-
tasks?: WorkbenchAuthoredAdapterSpec;
|
|
663
|
-
environment: AuthoredWorkbenchRuntimeSpec;
|
|
664
|
-
score: AuthoredWorkbenchScoreSpec;
|
|
665
|
-
}
|
|
666
|
-
export interface AuthoredWorkbenchEngineSpec {
|
|
667
|
-
use: string;
|
|
668
|
-
auth?: string | Record<string, string>;
|
|
669
|
-
with?: AuthoredWorkbenchEngineConfig | Record<string, Json>;
|
|
670
|
-
}
|
|
671
|
-
export interface AuthoredWorkbenchBenchmarkSpec {
|
|
672
|
-
name: string;
|
|
673
|
-
description: string;
|
|
674
|
-
engine: AuthoredWorkbenchEngineSpec;
|
|
675
|
-
}
|
|
676
|
-
export interface AuthoredWorkbenchSourceSpec {
|
|
677
|
-
version: 4;
|
|
678
|
-
benchmark: AuthoredWorkbenchBenchmarkSpec;
|
|
679
|
-
candidate: AuthoredWorkbenchCandidateSpec;
|
|
680
|
-
}
|
|
681
315
|
export type WorkbenchExecutionPurpose = "improve" | "attempt";
|
|
682
316
|
export type WorkbenchSandboxTemplateKind = "snapshot" | "oci";
|
|
683
317
|
export interface WorkbenchAdapterInvocation {
|
|
@@ -704,11 +338,11 @@ export interface WorkbenchSandboxAllocation {
|
|
|
704
338
|
}
|
|
705
339
|
export interface WorkbenchExecutionCapability {
|
|
706
340
|
executionId: string;
|
|
707
|
-
|
|
341
|
+
skill: {
|
|
708
342
|
tenantId: string;
|
|
709
343
|
projectId: string;
|
|
710
344
|
runId: string;
|
|
711
|
-
|
|
345
|
+
versionId?: string;
|
|
712
346
|
};
|
|
713
347
|
inputs: WorkbenchExecutionInputRef[];
|
|
714
348
|
outputPrefix: string;
|
|
@@ -735,7 +369,7 @@ export interface WorkbenchExecutionInputRef {
|
|
|
735
369
|
mountPath: string;
|
|
736
370
|
writable: boolean;
|
|
737
371
|
}
|
|
738
|
-
export type WorkbenchExecutionOutputSchema = "workbench.
|
|
372
|
+
export type WorkbenchExecutionOutputSchema = "workbench.skill_patch.v1" | "workbench.result.v1" | string;
|
|
739
373
|
export interface WorkbenchExecutionOutputContract {
|
|
740
374
|
name: string;
|
|
741
375
|
schema: WorkbenchExecutionOutputSchema;
|
|
@@ -761,7 +395,7 @@ export interface WorkbenchExecutionSpec {
|
|
|
761
395
|
id: string;
|
|
762
396
|
projectId: string;
|
|
763
397
|
runId: string;
|
|
764
|
-
|
|
398
|
+
versionId?: string;
|
|
765
399
|
purpose: WorkbenchExecutionPurpose;
|
|
766
400
|
adapter: WorkbenchAdapterInvocation;
|
|
767
401
|
sandbox: WorkbenchSandboxTemplate;
|
|
@@ -770,12 +404,87 @@ export interface WorkbenchExecutionSpec {
|
|
|
770
404
|
policy: WorkbenchExecutionPolicy;
|
|
771
405
|
metadata: Record<string, Json>;
|
|
772
406
|
}
|
|
773
|
-
export interface
|
|
407
|
+
export interface BlobObjectRef {
|
|
408
|
+
bucket: string;
|
|
409
|
+
key: string;
|
|
410
|
+
byteLength: number;
|
|
411
|
+
sha256: string;
|
|
412
|
+
}
|
|
413
|
+
export interface WorkbenchSkillPatch {
|
|
774
414
|
files: SurfaceSnapshotFile[];
|
|
775
415
|
fileChanges: string[];
|
|
776
416
|
summary?: string;
|
|
777
417
|
feedback?: Json;
|
|
778
418
|
}
|
|
419
|
+
export interface WorkbenchCaseCriterionScore {
|
|
420
|
+
criterion_id: string;
|
|
421
|
+
label: string;
|
|
422
|
+
score: number;
|
|
423
|
+
pass: boolean;
|
|
424
|
+
errors?: string[];
|
|
425
|
+
rationale?: string;
|
|
426
|
+
}
|
|
427
|
+
export interface MetricStats {
|
|
428
|
+
count: number;
|
|
429
|
+
mean: number;
|
|
430
|
+
variance: number;
|
|
431
|
+
stddev: number;
|
|
432
|
+
min: number;
|
|
433
|
+
max: number;
|
|
434
|
+
}
|
|
435
|
+
export type EvalCaseStatus = "completed" | "error";
|
|
436
|
+
export type EvalCaseSource = Record<string, Json>;
|
|
437
|
+
export interface EvalCaseResult {
|
|
438
|
+
id: string;
|
|
439
|
+
label?: string;
|
|
440
|
+
split?: string;
|
|
441
|
+
status?: EvalCaseStatus;
|
|
442
|
+
durationMs?: number;
|
|
443
|
+
metrics: Record<string, number>;
|
|
444
|
+
source?: EvalCaseSource;
|
|
445
|
+
feedback?: Json;
|
|
446
|
+
criteria?: WorkbenchCaseCriterionScore[];
|
|
447
|
+
}
|
|
448
|
+
export type ExecutionRole = "improver" | "runner" | "engine";
|
|
449
|
+
export type ExecutionUsageCostSource = "provider" | "estimated" | "mixed";
|
|
450
|
+
export interface ExecutionUsage {
|
|
451
|
+
provider?: string;
|
|
452
|
+
model?: string;
|
|
453
|
+
inputTokens?: number;
|
|
454
|
+
uncachedInputTokens?: number;
|
|
455
|
+
cachedInputTokens?: number;
|
|
456
|
+
cacheCreationInputTokens?: number;
|
|
457
|
+
cacheReadInputTokens?: number;
|
|
458
|
+
outputTokens?: number;
|
|
459
|
+
reasoningOutputTokens?: number;
|
|
460
|
+
totalTokens?: number;
|
|
461
|
+
costUsd?: number;
|
|
462
|
+
costSource?: ExecutionUsageCostSource;
|
|
463
|
+
pricingSource?: string;
|
|
464
|
+
}
|
|
465
|
+
export interface UsageSummary {
|
|
466
|
+
total?: ExecutionUsage;
|
|
467
|
+
improver?: ExecutionUsage;
|
|
468
|
+
runner?: ExecutionUsage;
|
|
469
|
+
engine?: ExecutionUsage;
|
|
470
|
+
}
|
|
471
|
+
export interface EvaluationUsageStats {
|
|
472
|
+
total?: ExecutionUsageStats;
|
|
473
|
+
improver?: ExecutionUsageStats;
|
|
474
|
+
runner?: ExecutionUsageStats;
|
|
475
|
+
engine?: ExecutionUsageStats;
|
|
476
|
+
}
|
|
477
|
+
export interface ExecutionUsageStats {
|
|
478
|
+
inputTokens?: MetricStats;
|
|
479
|
+
uncachedInputTokens?: MetricStats;
|
|
480
|
+
cachedInputTokens?: MetricStats;
|
|
481
|
+
cacheCreationInputTokens?: MetricStats;
|
|
482
|
+
cacheReadInputTokens?: MetricStats;
|
|
483
|
+
reasoningOutputTokens?: MetricStats;
|
|
484
|
+
outputTokens?: MetricStats;
|
|
485
|
+
totalTokens?: MetricStats;
|
|
486
|
+
costUsd?: MetricStats;
|
|
487
|
+
}
|
|
779
488
|
export interface WorkbenchResult {
|
|
780
489
|
score: number;
|
|
781
490
|
metrics?: Record<string, number>;
|
|
@@ -888,6 +597,24 @@ export interface WorkbenchTraceSession {
|
|
|
888
597
|
trace: WorkbenchExecutionTrace;
|
|
889
598
|
metadata?: Record<string, Json>;
|
|
890
599
|
}
|
|
600
|
+
export type RemoteWorkbenchJobStatus = "queued" | "running" | "succeeded" | "failed" | "cancelled";
|
|
601
|
+
export type RemoteWorkbenchJobKind = "execute";
|
|
602
|
+
export interface RemoteWorkbenchJob {
|
|
603
|
+
id: string;
|
|
604
|
+
projectId: string;
|
|
605
|
+
runId: string;
|
|
606
|
+
versionId?: string;
|
|
607
|
+
kind: RemoteWorkbenchJobKind;
|
|
608
|
+
status: RemoteWorkbenchJobStatus;
|
|
609
|
+
attempt: number;
|
|
610
|
+
createdAt: string;
|
|
611
|
+
updatedAt: string;
|
|
612
|
+
startedAt?: string;
|
|
613
|
+
finishedAt?: string;
|
|
614
|
+
input: Json;
|
|
615
|
+
output?: Json;
|
|
616
|
+
error?: string;
|
|
617
|
+
}
|
|
891
618
|
export interface WorkbenchExecutionEvidence {
|
|
892
619
|
id: string;
|
|
893
620
|
kind: string;
|
|
@@ -896,7 +623,7 @@ export interface WorkbenchExecutionEvidence {
|
|
|
896
623
|
status: RemoteWorkbenchJobStatus;
|
|
897
624
|
jobIds: string[];
|
|
898
625
|
executionIds: string[];
|
|
899
|
-
|
|
626
|
+
versionId?: string;
|
|
900
627
|
caseId?: string;
|
|
901
628
|
sampleIndex?: number;
|
|
902
629
|
attemptIndex?: number;
|
|
@@ -908,44 +635,6 @@ export interface WorkbenchExecutionTraceDetail {
|
|
|
908
635
|
runId: string;
|
|
909
636
|
executions: WorkbenchExecutionEvidence[];
|
|
910
637
|
}
|
|
911
|
-
export interface AuthoredWorkbenchCaseSummary {
|
|
912
|
-
id: string;
|
|
913
|
-
slug: string;
|
|
914
|
-
path: string;
|
|
915
|
-
name: string;
|
|
916
|
-
split?: string;
|
|
917
|
-
fileCount: number;
|
|
918
|
-
}
|
|
919
|
-
export interface AuthoredWorkbenchSourceFile {
|
|
920
|
-
path: string;
|
|
921
|
-
content: string;
|
|
922
|
-
}
|
|
923
|
-
export interface AuthoredWorkbenchSourceDocument {
|
|
924
|
-
path: string;
|
|
925
|
-
exists: boolean;
|
|
926
|
-
source_yaml: string;
|
|
927
|
-
source_files: AuthoredWorkbenchSourceFile[];
|
|
928
|
-
spec: AuthoredWorkbenchSourceSpec | null;
|
|
929
|
-
cases: AuthoredWorkbenchCaseSummary[];
|
|
930
|
-
}
|
|
931
|
-
export type RemoteWorkbenchJobStatus = "queued" | "running" | "succeeded" | "failed" | "cancelled";
|
|
932
|
-
export type RemoteWorkbenchJobKind = "execute";
|
|
933
|
-
export interface RemoteWorkbenchJob {
|
|
934
|
-
id: string;
|
|
935
|
-
projectId: string;
|
|
936
|
-
runId: string;
|
|
937
|
-
candidateId?: string;
|
|
938
|
-
kind: RemoteWorkbenchJobKind;
|
|
939
|
-
status: RemoteWorkbenchJobStatus;
|
|
940
|
-
attempt: number;
|
|
941
|
-
createdAt: string;
|
|
942
|
-
updatedAt: string;
|
|
943
|
-
startedAt?: string;
|
|
944
|
-
finishedAt?: string;
|
|
945
|
-
input: Json;
|
|
946
|
-
output?: Json;
|
|
947
|
-
error?: string;
|
|
948
|
-
}
|
|
949
638
|
export interface WorkbenchRemoteJobClaimRequest {
|
|
950
639
|
schema: "workbench.remote.job.claim_request.v1";
|
|
951
640
|
ownerUserId: string;
|
|
@@ -1015,32 +704,6 @@ export interface WorkbenchRemoteJobRetry {
|
|
|
1015
704
|
leaseToken: string;
|
|
1016
705
|
reason: string;
|
|
1017
706
|
}
|
|
1018
|
-
export
|
|
1019
|
-
|
|
1020
|
-
environmentVersionId?: string;
|
|
1021
|
-
specVersionId: string;
|
|
1022
|
-
candidateId: string | null;
|
|
1023
|
-
activeCandidateId?: string | null;
|
|
1024
|
-
outputCandidateId?: string | null;
|
|
1025
|
-
input: {
|
|
1026
|
-
benchmarkFingerprint: string;
|
|
1027
|
-
candidateFingerprint: string;
|
|
1028
|
-
baseCandidateId: string | null;
|
|
1029
|
-
payerUserId?: string;
|
|
1030
|
-
candidateOwnerUserId?: string;
|
|
1031
|
-
candidateOwnerUsername?: string;
|
|
1032
|
-
preserveActiveCandidateId?: string | null;
|
|
1033
|
-
selectedSamples?: Array<{
|
|
1034
|
-
caseId: string;
|
|
1035
|
-
sampleIndex: number;
|
|
1036
|
-
}>;
|
|
1037
|
-
sourceYaml?: string;
|
|
1038
|
-
candidateSourceFiles?: SurfaceSnapshotFile[];
|
|
1039
|
-
baseFiles: SurfaceSnapshotFile[];
|
|
1040
|
-
engineResolveFiles: SurfaceSnapshotFile[];
|
|
1041
|
-
};
|
|
1042
|
-
jobCount: number;
|
|
1043
|
-
completedJobCount: number;
|
|
1044
|
-
failedJobCount: number;
|
|
1045
|
-
}
|
|
707
|
+
export declare function isReservedWorkbenchAdapterAuthEnvName(name: string): boolean;
|
|
708
|
+
export declare function assertWorkbenchAdapterAuthEnvNameAllowed(name: string): void;
|
|
1046
709
|
//# sourceMappingURL=index.d.ts.map
|