@workbench-ai/workbench-contract 0.0.67 → 0.0.69
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +439 -698
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +80 -201
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,682 +1,394 @@
|
|
|
1
1
|
export type Json = null | boolean | number | string | Json[] | {
|
|
2
2
|
[key: string]: Json;
|
|
3
3
|
};
|
|
4
|
-
export
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
visibility: "private" | "public";
|
|
11
|
-
createdAt: string;
|
|
12
|
-
updatedAt: string;
|
|
13
|
-
activeEnvironmentVersionId: string;
|
|
14
|
-
currentSpecVersionId: string;
|
|
15
|
-
activeCandidateId?: string | null;
|
|
16
|
-
sourceFingerprint?: string;
|
|
17
|
-
starCount: number;
|
|
4
|
+
export interface SurfaceSnapshotFile {
|
|
5
|
+
path: string;
|
|
6
|
+
kind?: "text" | "binary";
|
|
7
|
+
encoding?: "utf8" | "base64";
|
|
8
|
+
content: string;
|
|
9
|
+
executable?: boolean;
|
|
18
10
|
}
|
|
19
|
-
export
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
runCount: number;
|
|
32
|
-
starCount: number;
|
|
33
|
-
viewerHasStarred?: boolean;
|
|
11
|
+
export declare function normalizeWorkbenchSourcePath(filePath: string): string;
|
|
12
|
+
export declare function normalizeWorkbenchSourceRequestPath(filePath: string): string;
|
|
13
|
+
export declare function normalizeWorkbenchSkillName(value: string): string;
|
|
14
|
+
export declare function isWorkbenchLocalMetadataPath(filePath: string): boolean;
|
|
15
|
+
export type WorkbenchInspectionFileOwnerKind = "version" | "trace" | "artifact";
|
|
16
|
+
export interface WorkbenchInspectionFileContent {
|
|
17
|
+
path: string;
|
|
18
|
+
kind?: SurfaceSnapshotFile["kind"];
|
|
19
|
+
encoding?: SurfaceSnapshotFile["encoding"];
|
|
20
|
+
executable?: boolean;
|
|
21
|
+
content?: string;
|
|
22
|
+
unavailableReason?: string;
|
|
34
23
|
}
|
|
35
|
-
export
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
24
|
+
export declare function workbenchInspectionFileContentUnavailableReason(file: Pick<SurfaceSnapshotFile, "kind" | "encoding">): string | null;
|
|
25
|
+
export declare function workbenchInspectionFileContent(file: SurfaceSnapshotFile): WorkbenchInspectionFileContent;
|
|
26
|
+
export declare function workbenchInspectionFileManifest(file: SurfaceSnapshotFile): SurfaceSnapshotFile;
|
|
27
|
+
export interface WorkbenchAgent {
|
|
28
|
+
name: string;
|
|
29
|
+
adapter: string;
|
|
30
|
+
model?: string;
|
|
31
|
+
config: Record<string, Json>;
|
|
39
32
|
}
|
|
40
|
-
export interface
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
ordinal: number;
|
|
44
|
-
sourceYaml: string;
|
|
45
|
-
createdAt: string;
|
|
46
|
-
updatedAt: string;
|
|
47
|
-
validation: WorkbenchSpecValidation;
|
|
33
|
+
export interface WorkbenchAgentSnapshot {
|
|
34
|
+
hash: string;
|
|
35
|
+
agent: WorkbenchAgent;
|
|
48
36
|
}
|
|
49
|
-
export
|
|
50
|
-
|
|
37
|
+
export type WorkbenchSkillSourceKind = "local" | "remote" | "none";
|
|
38
|
+
export type WorkbenchSkillIncludeKind = Exclude<WorkbenchSkillSourceKind, "none">;
|
|
39
|
+
export interface WorkbenchSkillInclude {
|
|
51
40
|
name: string;
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
41
|
+
kind: WorkbenchSkillIncludeKind;
|
|
42
|
+
path?: string;
|
|
43
|
+
from?: string;
|
|
44
|
+
ref?: string;
|
|
45
|
+
resolvedRef?: string;
|
|
46
|
+
hash?: string;
|
|
47
|
+
files?: SurfaceSnapshotFile[];
|
|
48
|
+
}
|
|
49
|
+
export interface WorkbenchSkillSource {
|
|
50
|
+
name: string;
|
|
51
|
+
kind: WorkbenchSkillSourceKind;
|
|
52
|
+
path?: string;
|
|
53
|
+
from?: string;
|
|
54
|
+
ref?: string;
|
|
55
|
+
resolvedRef?: string;
|
|
56
|
+
hash?: string;
|
|
57
|
+
includes?: WorkbenchSkillInclude[];
|
|
58
|
+
}
|
|
59
|
+
export interface WorkbenchSkillBundleSnapshot {
|
|
60
|
+
hash: string;
|
|
61
|
+
skillName: string;
|
|
62
|
+
entryName: string;
|
|
63
|
+
source: WorkbenchSkillSource;
|
|
64
|
+
files: SurfaceSnapshotFile[];
|
|
65
|
+
includedSkills: WorkbenchSkillInclude[];
|
|
55
66
|
createdAt: string;
|
|
56
|
-
updatedAt: string;
|
|
57
67
|
}
|
|
58
|
-
export interface
|
|
68
|
+
export interface WorkbenchVersion {
|
|
59
69
|
id: string;
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
imageRef: string;
|
|
64
|
-
sourceHash: string;
|
|
65
|
-
sourceType: "builtin" | "dockerfile";
|
|
66
|
-
build?: {
|
|
67
|
-
dockerfileRef?: BlobObjectRef;
|
|
68
|
-
logRef?: BlobObjectRef;
|
|
69
|
-
error?: string;
|
|
70
|
-
startedAt?: string;
|
|
71
|
-
finishedAt?: string;
|
|
72
|
-
};
|
|
73
|
-
status: "ready" | "building" | "failed";
|
|
70
|
+
hash: string;
|
|
71
|
+
message: string;
|
|
72
|
+
parentIds: string[];
|
|
74
73
|
createdAt: string;
|
|
75
|
-
updatedAt: string;
|
|
76
|
-
}
|
|
77
|
-
export interface RemoteWorkbenchEnvironmentSpec {
|
|
78
|
-
base: string;
|
|
79
|
-
resources: {
|
|
80
|
-
cpu: number;
|
|
81
|
-
memoryGb: number;
|
|
82
|
-
diskGb: number;
|
|
83
|
-
timeoutMinutes: number;
|
|
84
|
-
};
|
|
85
|
-
network: "off" | "on";
|
|
86
|
-
}
|
|
87
|
-
export interface BlobObjectRef {
|
|
88
|
-
bucket: string;
|
|
89
|
-
key: string;
|
|
90
|
-
byteLength: number;
|
|
91
|
-
sha256: string;
|
|
92
|
-
}
|
|
93
|
-
export type RemoteWorkbenchSnapshotKind = "candidate" | "engineResolve" | "adapters" | "runtime";
|
|
94
|
-
export type WorkspaceWriteEncoding = "utf8" | "base64";
|
|
95
|
-
export interface SurfaceSnapshotFile {
|
|
96
|
-
path: string;
|
|
97
|
-
kind: "text" | "binary";
|
|
98
|
-
encoding: WorkspaceWriteEncoding;
|
|
99
|
-
content: string;
|
|
100
|
-
executable: boolean;
|
|
101
|
-
contentRedacted?: boolean;
|
|
102
|
-
}
|
|
103
|
-
export interface WorkbenchEngineCaseFiles {
|
|
104
|
-
public?: SurfaceSnapshotFile[];
|
|
105
|
-
private?: SurfaceSnapshotFile[];
|
|
106
|
-
source?: SurfaceSnapshotFile[];
|
|
107
|
-
}
|
|
108
|
-
export interface SurfaceSnapshot {
|
|
109
74
|
files: SurfaceSnapshotFile[];
|
|
110
75
|
}
|
|
111
|
-
export interface
|
|
112
|
-
|
|
113
|
-
content: string;
|
|
114
|
-
encoding?: WorkspaceWriteEncoding;
|
|
115
|
-
executable?: boolean;
|
|
116
|
-
}
|
|
117
|
-
export interface EngineResolveBinding {
|
|
118
|
-
engine: string;
|
|
119
|
-
resolver: {
|
|
120
|
-
use: string;
|
|
121
|
-
withFingerprint: string;
|
|
122
|
-
};
|
|
123
|
-
}
|
|
124
|
-
export interface RemoteWorkbenchSnapshotBase {
|
|
76
|
+
export interface WorkbenchEvalSnapshot {
|
|
77
|
+
hash: string;
|
|
125
78
|
files: SurfaceSnapshotFile[];
|
|
79
|
+
caseCount: number;
|
|
80
|
+
createdAt: string;
|
|
126
81
|
updatedAt: string;
|
|
82
|
+
scoreAdapter: string;
|
|
127
83
|
}
|
|
128
|
-
export
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
export interface
|
|
133
|
-
kind: Exclude<RemoteWorkbenchSnapshotKind, "engineResolve">;
|
|
134
|
-
}
|
|
135
|
-
export type RemoteWorkbenchSnapshot = RemoteWorkbenchEngineResolveSnapshot | RemoteWorkbenchStandardSnapshot;
|
|
136
|
-
export type CandidateStatus = "running" | "evaluated" | "repair_exhausted" | "eval_error" | "agent_error";
|
|
137
|
-
export interface MetricStats {
|
|
138
|
-
count: number;
|
|
139
|
-
mean: number;
|
|
140
|
-
variance: number;
|
|
141
|
-
stddev: number;
|
|
142
|
-
min: number;
|
|
143
|
-
max: number;
|
|
144
|
-
}
|
|
145
|
-
export type EvalCaseStatus = "completed" | "error";
|
|
146
|
-
export type EvalCaseSource = Record<string, Json>;
|
|
147
|
-
export interface CandidateCaseCriterionScore {
|
|
148
|
-
criterion_id: string;
|
|
149
|
-
label: string;
|
|
150
|
-
score: number;
|
|
151
|
-
pass: boolean;
|
|
152
|
-
errors?: string[];
|
|
153
|
-
rationale?: string;
|
|
154
|
-
}
|
|
155
|
-
export interface EvalCaseResult {
|
|
84
|
+
export type WorkbenchRunKind = "eval" | "improve" | "compare";
|
|
85
|
+
export type WorkbenchRunStatus = "running" | "succeeded" | "failed" | "canceled";
|
|
86
|
+
export type WorkbenchJobStatus = "queued" | "running" | "succeeded" | "failed" | "canceled";
|
|
87
|
+
export type WorkbenchArtifactKind = "file" | "directory" | "log" | "scorecard";
|
|
88
|
+
export interface WorkbenchRun {
|
|
156
89
|
id: string;
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
export type ExecutionRole = "improver" | "runner" | "engine";
|
|
167
|
-
export type ExecutionUsageCostSource = "provider" | "estimated" | "mixed";
|
|
168
|
-
export interface ExecutionUsage {
|
|
169
|
-
provider?: string;
|
|
170
|
-
model?: string;
|
|
171
|
-
inputTokens?: number;
|
|
172
|
-
uncachedInputTokens?: number;
|
|
173
|
-
cachedInputTokens?: number;
|
|
174
|
-
cacheCreationInputTokens?: number;
|
|
175
|
-
cacheReadInputTokens?: number;
|
|
176
|
-
outputTokens?: number;
|
|
177
|
-
reasoningOutputTokens?: number;
|
|
178
|
-
totalTokens?: number;
|
|
90
|
+
kind: WorkbenchRunKind;
|
|
91
|
+
versionId: string;
|
|
92
|
+
skillName: string;
|
|
93
|
+
skillBundleHash: string;
|
|
94
|
+
evalHash: string;
|
|
95
|
+
agentName: string;
|
|
96
|
+
agentHash: string;
|
|
97
|
+
status: WorkbenchRunStatus;
|
|
98
|
+
score?: number;
|
|
179
99
|
costUsd?: number;
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
total?: ExecutionUsage;
|
|
185
|
-
improver?: ExecutionUsage;
|
|
186
|
-
runner?: ExecutionUsage;
|
|
187
|
-
engine?: ExecutionUsage;
|
|
188
|
-
}
|
|
189
|
-
export interface EvaluationCandidateSummary {
|
|
190
|
-
id: string;
|
|
191
|
-
kind: "candidate";
|
|
192
|
-
label?: string;
|
|
193
|
-
}
|
|
194
|
-
export type EvaluationSampleStatus = "planned" | "running" | "completed" | "error";
|
|
195
|
-
export type EvaluationStatus = EvaluationSampleStatus | "partial";
|
|
196
|
-
export interface EvaluationSampleRecord {
|
|
197
|
-
id: string;
|
|
198
|
-
index: number;
|
|
199
|
-
candidate: EvaluationCandidateSummary;
|
|
200
|
-
status: EvaluationSampleStatus;
|
|
201
|
-
startedAt?: string;
|
|
100
|
+
latencyMs?: number;
|
|
101
|
+
jobIds?: string[];
|
|
102
|
+
traceIds: string[];
|
|
103
|
+
createdAt: string;
|
|
202
104
|
finishedAt?: string;
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
usage?: UsageSummary;
|
|
105
|
+
parentRunId?: string;
|
|
106
|
+
outputVersionId?: string;
|
|
206
107
|
error?: string;
|
|
207
|
-
cases?: EvalCaseResult[];
|
|
208
|
-
feedback?: Json;
|
|
209
108
|
}
|
|
210
|
-
export interface
|
|
109
|
+
export interface WorkbenchJob {
|
|
211
110
|
id: string;
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
cacheReadInputTokens?: MetricStats;
|
|
231
|
-
outputTokens?: MetricStats;
|
|
232
|
-
reasoningOutputTokens?: MetricStats;
|
|
233
|
-
totalTokens?: MetricStats;
|
|
234
|
-
costUsd?: MetricStats;
|
|
235
|
-
}
|
|
236
|
-
export interface EvaluationRecord {
|
|
237
|
-
candidate: EvaluationCandidateSummary;
|
|
238
|
-
status: EvaluationStatus;
|
|
239
|
-
sampleCount: number;
|
|
240
|
-
completedSampleCount: number;
|
|
241
|
-
errorSampleCount: number;
|
|
111
|
+
runId: string;
|
|
112
|
+
kind: WorkbenchRunKind;
|
|
113
|
+
versionId: string;
|
|
114
|
+
skillName: string;
|
|
115
|
+
skillBundleHash: string;
|
|
116
|
+
evalHash: string;
|
|
117
|
+
agentName: string;
|
|
118
|
+
agentHash: string;
|
|
119
|
+
caseId: string;
|
|
120
|
+
sample: number;
|
|
121
|
+
status: WorkbenchJobStatus;
|
|
122
|
+
score?: number;
|
|
123
|
+
command?: string;
|
|
124
|
+
dockerImage?: string;
|
|
125
|
+
exitCode?: number;
|
|
126
|
+
artifactIds: string[];
|
|
127
|
+
traceIds: string[];
|
|
128
|
+
createdAt: string;
|
|
242
129
|
startedAt?: string;
|
|
243
130
|
finishedAt?: string;
|
|
244
|
-
|
|
245
|
-
durationMs?: MetricStats;
|
|
246
|
-
usage?: EvaluationUsageStats;
|
|
247
|
-
cases?: EvaluationCaseStats[];
|
|
248
|
-
samples: EvaluationSampleRecord[];
|
|
131
|
+
durationMs?: number;
|
|
249
132
|
error?: string;
|
|
250
133
|
}
|
|
251
|
-
export interface
|
|
134
|
+
export interface WorkbenchArtifact {
|
|
252
135
|
id: string;
|
|
253
136
|
runId: string;
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
candidateName?: string;
|
|
258
|
-
candidateVersion: number;
|
|
259
|
-
candidateRunId?: string;
|
|
260
|
-
candidateRunName?: string;
|
|
137
|
+
jobId: string;
|
|
138
|
+
kind: WorkbenchArtifactKind;
|
|
139
|
+
path: string;
|
|
261
140
|
createdAt: string;
|
|
262
|
-
|
|
263
|
-
status: EvaluationStatus;
|
|
264
|
-
sampleCount: number;
|
|
265
|
-
completedSampleCount: number;
|
|
266
|
-
errorSampleCount: number;
|
|
267
|
-
metrics?: Record<string, MetricStats>;
|
|
268
|
-
selectionMetric?: string;
|
|
269
|
-
selectionLabel?: string;
|
|
270
|
-
selectionScore?: MetricStats;
|
|
271
|
-
durationMs?: MetricStats;
|
|
272
|
-
usage?: EvaluationUsageStats;
|
|
273
|
-
error?: string;
|
|
274
|
-
}
|
|
275
|
-
export interface EvaluationScorecard extends EvaluationSummary {
|
|
276
|
-
evaluation: EvaluationRecord;
|
|
141
|
+
files: SurfaceSnapshotFile[];
|
|
277
142
|
}
|
|
278
|
-
export interface
|
|
143
|
+
export interface WorkbenchTrace {
|
|
279
144
|
id: string;
|
|
280
|
-
label: string;
|
|
281
|
-
direction: "higher" | "lower";
|
|
282
|
-
kind: "number" | "duration_ms" | "currency_usd";
|
|
283
|
-
group: "metric" | "execution" | "usage" | "other";
|
|
284
|
-
primary: boolean;
|
|
285
|
-
semanticRole?: "performance" | "speed" | "cost";
|
|
286
|
-
}
|
|
287
|
-
export interface WorkbenchEvaluationComparisonRow {
|
|
288
|
-
evaluationId: string;
|
|
289
145
|
runId: string;
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
updatedAt: string;
|
|
298
|
-
error?: string;
|
|
299
|
-
}
|
|
300
|
-
export interface WorkbenchCandidateEvaluationRollup {
|
|
301
|
-
candidateId: string;
|
|
302
|
-
candidateLabel: string;
|
|
303
|
-
evaluationCount: number;
|
|
304
|
-
completeEvaluationCount: number;
|
|
305
|
-
scoredEvaluationCount: number;
|
|
306
|
-
bestEvaluationId: string | null;
|
|
307
|
-
bestScore: number | null;
|
|
308
|
-
meanScore: number | null;
|
|
309
|
-
}
|
|
310
|
-
export interface WorkbenchEvaluationComparison {
|
|
311
|
-
evaluations: EvaluationSummary[];
|
|
312
|
-
rows: WorkbenchEvaluationComparisonRow[];
|
|
313
|
-
candidates: WorkbenchCandidateEvaluationRollup[];
|
|
314
|
-
metrics: WorkbenchEvaluationMetricDescriptor[];
|
|
315
|
-
}
|
|
316
|
-
export interface CandidateSummary {
|
|
317
|
-
id: string;
|
|
318
|
-
name?: string;
|
|
319
|
-
version: number;
|
|
320
|
-
ordinal: number;
|
|
321
|
-
benchmarkFingerprint: string;
|
|
322
|
-
candidateFingerprint: string;
|
|
323
|
-
ownerUserId?: string;
|
|
324
|
-
ownerUsername?: string;
|
|
325
|
-
visibility?: "private" | "public";
|
|
146
|
+
jobId?: string;
|
|
147
|
+
versionId: string;
|
|
148
|
+
skillName: string;
|
|
149
|
+
skillBundleHash: string;
|
|
150
|
+
evalHash?: string;
|
|
151
|
+
agentName: string;
|
|
152
|
+
agentHash?: string;
|
|
326
153
|
createdAt: string;
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
fileChanges: string[];
|
|
331
|
-
usage?: UsageSummary;
|
|
332
|
-
}
|
|
333
|
-
export interface CandidateRecord extends CandidateSummary {
|
|
334
|
-
eval?: EvaluationRecord;
|
|
335
|
-
prompt?: string;
|
|
336
|
-
meta?: Json;
|
|
337
|
-
}
|
|
338
|
-
export interface CandidateLineageNode {
|
|
339
|
-
id: string;
|
|
340
|
-
active: boolean;
|
|
341
|
-
summary: CandidateSummary;
|
|
342
|
-
}
|
|
343
|
-
export interface CandidateLineageEdge {
|
|
344
|
-
id: string;
|
|
345
|
-
kind: "anchor";
|
|
346
|
-
sourceId: string;
|
|
347
|
-
targetId: string;
|
|
348
|
-
}
|
|
349
|
-
export interface CandidateLineageGraph {
|
|
350
|
-
activeId: string | null;
|
|
351
|
-
nodes: CandidateLineageNode[];
|
|
352
|
-
edges: CandidateLineageEdge[];
|
|
353
|
-
}
|
|
354
|
-
export declare function buildCandidateLineage(args: {
|
|
355
|
-
summaries: readonly CandidateSummary[];
|
|
356
|
-
activeId: string | null;
|
|
357
|
-
}): CandidateLineageGraph;
|
|
358
|
-
export declare function buildWorkbenchEvaluationComparison(evaluations: readonly EvaluationSummary[]): WorkbenchEvaluationComparison;
|
|
359
|
-
export declare function buildWorkbenchEvaluationMetricDescriptors(evaluations: readonly EvaluationSummary[]): WorkbenchEvaluationMetricDescriptor[];
|
|
360
|
-
export declare function readEvaluationScore(evaluation: EvaluationSummary): number | null;
|
|
361
|
-
export declare function isCompleteEvaluationSummary(evaluation: Pick<EvaluationSummary, "status" | "sampleCount" | "completedSampleCount" | "errorSampleCount">): boolean;
|
|
362
|
-
export declare function formatEvaluationConfigurationLabel(evaluation: Pick<EvaluationSummary, "candidateRunName" | "candidateRunId">): string;
|
|
363
|
-
export type CandidatePreviewMode = "diff" | "raw" | "rendered";
|
|
364
|
-
export type CandidatePreviewKind = "text" | "markdown" | "table" | "spreadsheet" | "image" | "pdf" | "unsupported";
|
|
365
|
-
export type CandidatePreviewSourceEncoding = "utf8" | "base64";
|
|
366
|
-
export type CandidateFileStatus = "added" | "modified" | "unchanged";
|
|
367
|
-
export interface CandidateFileSummary {
|
|
368
|
-
path: string;
|
|
369
|
-
old_path: string | null;
|
|
370
|
-
status: CandidateFileStatus;
|
|
371
|
-
mime_type: string | null;
|
|
372
|
-
preview_kind: CandidatePreviewKind;
|
|
373
|
-
additions: number;
|
|
374
|
-
deletions: number;
|
|
375
|
-
}
|
|
376
|
-
export interface CandidateFilePreviewSource {
|
|
377
|
-
content: string;
|
|
378
|
-
encoding: CandidatePreviewSourceEncoding;
|
|
379
|
-
}
|
|
380
|
-
export interface CandidateFilePreview {
|
|
381
|
-
path: string;
|
|
382
|
-
view: CandidatePreviewMode;
|
|
383
|
-
mime_type: string | null;
|
|
384
|
-
preview_kind: CandidatePreviewKind;
|
|
385
|
-
diff: string | null;
|
|
386
|
-
source: CandidateFilePreviewSource | null;
|
|
387
|
-
rendered_html: string | null;
|
|
388
|
-
}
|
|
389
|
-
export interface CandidateCaseCriterionResult {
|
|
390
|
-
criterion_id: string;
|
|
391
|
-
pass: boolean;
|
|
392
|
-
score: number;
|
|
393
|
-
errors: string[];
|
|
394
|
-
rationale?: string;
|
|
395
|
-
}
|
|
396
|
-
export interface CandidateCaseExecutionRef {
|
|
397
|
-
runId: string;
|
|
398
|
-
kind: string;
|
|
399
|
-
role: WorkbenchExecutionEventRole;
|
|
400
|
-
status: RemoteWorkbenchJobStatus;
|
|
401
|
-
jobIds: string[];
|
|
402
|
-
executionIds: string[];
|
|
403
|
-
createdAt?: string;
|
|
404
|
-
startedAt?: string;
|
|
405
|
-
finishedAt?: string;
|
|
406
|
-
durationMs?: number;
|
|
407
|
-
caseId?: string;
|
|
408
|
-
sampleIndex?: number;
|
|
409
|
-
attemptIndex?: number;
|
|
410
|
-
}
|
|
411
|
-
export interface CandidateCaseReview {
|
|
412
|
-
candidateId: string;
|
|
413
|
-
caseId: string;
|
|
414
|
-
caseLabel: string;
|
|
415
|
-
sampleId?: string;
|
|
416
|
-
sampleIndex?: number;
|
|
417
|
-
status?: EvalCaseStatus | RemoteWorkbenchJobStatus;
|
|
418
|
-
metrics: Record<string, number>;
|
|
419
|
-
durationMs?: number;
|
|
420
|
-
source?: EvalCaseSource;
|
|
421
|
-
feedback?: Json;
|
|
422
|
-
executions: CandidateCaseExecutionRef[];
|
|
423
|
-
criteria_results: CandidateCaseCriterionResult[];
|
|
424
|
-
}
|
|
425
|
-
export type RunStatus = "queued" | "running" | "finished";
|
|
426
|
-
export type RunOutcome = "ok" | "error" | "cancelled";
|
|
427
|
-
export type RemoteRunWorkflow = "eval" | "improve";
|
|
428
|
-
export interface RunSummary {
|
|
429
|
-
id: string;
|
|
430
|
-
workflow: RemoteRunWorkflow;
|
|
431
|
-
benchmarkFingerprint: string;
|
|
432
|
-
status: RunStatus;
|
|
433
|
-
candidateId?: string | null;
|
|
434
|
-
candidateRunId?: string;
|
|
435
|
-
candidateRunName?: string;
|
|
436
|
-
startedAt: string;
|
|
437
|
-
finishedAt?: string;
|
|
438
|
-
durationMs?: number;
|
|
439
|
-
improver: string;
|
|
440
|
-
engineRun: string;
|
|
441
|
-
strategy: string;
|
|
442
|
-
optimizeOn?: string;
|
|
443
|
-
selectBy?: string;
|
|
444
|
-
budget: number;
|
|
445
|
-
repairBudget: number;
|
|
446
|
-
attemptsRequested: number;
|
|
447
|
-
attemptsExecuted: number;
|
|
448
|
-
samples: number;
|
|
449
|
-
executionFingerprint?: string;
|
|
450
|
-
stoppedReason?: "budget_exhausted" | "completed" | "dry_run" | "cancelled";
|
|
451
|
-
outcome?: RunOutcome;
|
|
452
|
-
error?: string;
|
|
453
|
-
activeCandidateId?: string | null;
|
|
454
|
-
outputCandidateId?: string | null;
|
|
455
|
-
}
|
|
456
|
-
export interface WorkbenchRuntimeRun extends RunSummary {
|
|
457
|
-
jobCount?: number;
|
|
458
|
-
completedJobCount?: number;
|
|
459
|
-
failedJobCount?: number;
|
|
154
|
+
request: Json;
|
|
155
|
+
result: Json;
|
|
156
|
+
files: SurfaceSnapshotFile[];
|
|
460
157
|
}
|
|
461
|
-
export interface
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
type: "run_started" | "job_queued" | "job_started" | "job_progress" | "sandbox_allocated" | "sandbox_stopped" | "candidate_created" | "candidate_evaluated" | "active_changed" | "run_finished";
|
|
158
|
+
export interface WorkbenchLineageEdge {
|
|
159
|
+
parentId: string;
|
|
160
|
+
childId: string;
|
|
465
161
|
runId?: string;
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
activeId?: string;
|
|
470
|
-
status?: CandidateStatus | RemoteWorkbenchJobStatus;
|
|
471
|
-
metrics?: Record<string, number>;
|
|
472
|
-
detail?: Record<string, Json>;
|
|
473
|
-
}
|
|
474
|
-
export interface RuntimeSnapshot {
|
|
475
|
-
workspaceRoot: string;
|
|
476
|
-
activeId: string | null;
|
|
477
|
-
currentBenchmarkFingerprint: string | null;
|
|
478
|
-
summaries: CandidateSummary[];
|
|
479
|
-
evaluations: EvaluationSummary[];
|
|
480
|
-
runs: RunSummary[];
|
|
481
|
-
}
|
|
482
|
-
export interface WorkbenchRuntimeCandidateFiles {
|
|
483
|
-
candidateId: string;
|
|
484
|
-
files: SurfaceSnapshotFile[];
|
|
162
|
+
reason: "version" | "improve";
|
|
163
|
+
createdAt: string;
|
|
164
|
+
message?: string;
|
|
485
165
|
}
|
|
486
|
-
export
|
|
487
|
-
|
|
488
|
-
|
|
166
|
+
export type WorkbenchRemoteKind = "workbench-cloud" | "file";
|
|
167
|
+
export interface WorkbenchRemote {
|
|
168
|
+
name: string;
|
|
169
|
+
url: string;
|
|
170
|
+
kind: WorkbenchRemoteKind;
|
|
489
171
|
}
|
|
490
|
-
export interface
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
candidates: CandidateRecord[];
|
|
494
|
-
candidateFiles: WorkbenchRuntimeCandidateFiles[];
|
|
495
|
-
evaluations: EvaluationScorecard[];
|
|
496
|
-
runs: WorkbenchRuntimeRun[];
|
|
497
|
-
jobs: RemoteWorkbenchJob[];
|
|
498
|
-
executionFiles: WorkbenchRuntimeExecutionFiles[];
|
|
499
|
-
events: RuntimeEvent[];
|
|
500
|
-
}
|
|
501
|
-
export interface WorkbenchRuntimeBundleStats {
|
|
502
|
-
candidates: number;
|
|
503
|
-
candidateFiles: number;
|
|
504
|
-
evaluations: number;
|
|
505
|
-
runs: number;
|
|
506
|
-
jobs: number;
|
|
507
|
-
executionFiles: number;
|
|
508
|
-
events: number;
|
|
509
|
-
activeId: string | null;
|
|
510
|
-
}
|
|
511
|
-
export interface WorkbenchRuntimeImportResult {
|
|
512
|
-
changed: boolean;
|
|
513
|
-
stats: WorkbenchRuntimeBundleStats;
|
|
514
|
-
}
|
|
515
|
-
export interface WorkbenchProjectSourceResources {
|
|
516
|
-
cpu?: number;
|
|
517
|
-
memoryGb?: number;
|
|
518
|
-
diskGb?: number;
|
|
519
|
-
timeoutMinutes?: number;
|
|
520
|
-
}
|
|
521
|
-
export interface WorkbenchProjectStateSource {
|
|
522
|
-
source: string;
|
|
523
|
-
files: SurfaceSnapshotFile[];
|
|
524
|
-
candidateFiles: SurfaceSnapshotFile[];
|
|
525
|
-
engineResolveFiles: SurfaceSnapshotFile[];
|
|
526
|
-
engineResolveBinding: EngineResolveBinding;
|
|
527
|
-
adapterFiles: SurfaceSnapshotFile[];
|
|
528
|
-
dockerfile: string;
|
|
529
|
-
runtimeDockerfile: string;
|
|
530
|
-
runtimeFiles: SurfaceSnapshotFile[];
|
|
531
|
-
network: "off" | "on";
|
|
532
|
-
resources: WorkbenchProjectSourceResources;
|
|
533
|
-
revisionId?: string;
|
|
534
|
-
fingerprint?: string;
|
|
172
|
+
export interface WorkbenchRefs {
|
|
173
|
+
current?: string;
|
|
174
|
+
[name: string]: string | undefined;
|
|
535
175
|
}
|
|
536
|
-
export interface
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
176
|
+
export interface WorkbenchProjectState {
|
|
177
|
+
schema: "workbench.skill.state.v1";
|
|
178
|
+
root: string;
|
|
179
|
+
refs: WorkbenchRefs;
|
|
180
|
+
remotes: Record<string, WorkbenchRemote>;
|
|
181
|
+
versions: WorkbenchVersion[];
|
|
182
|
+
skillSources: WorkbenchSkillSource[];
|
|
183
|
+
skillBundles: WorkbenchSkillBundleSnapshot[];
|
|
184
|
+
evals: WorkbenchEvalSnapshot[];
|
|
185
|
+
agents: WorkbenchAgent[];
|
|
186
|
+
runs: WorkbenchRun[];
|
|
187
|
+
jobs: WorkbenchJob[];
|
|
188
|
+
traces: WorkbenchTrace[];
|
|
189
|
+
executionEvents: WorkbenchExecutionEventBatch[];
|
|
190
|
+
artifacts: WorkbenchArtifact[];
|
|
191
|
+
lineage: WorkbenchLineageEdge[];
|
|
192
|
+
}
|
|
193
|
+
export interface WorkbenchStatus {
|
|
194
|
+
root: string;
|
|
195
|
+
initialized: boolean;
|
|
196
|
+
currentSkillHash?: string;
|
|
197
|
+
currentVersionId?: string;
|
|
198
|
+
defaultSkill?: string;
|
|
199
|
+
defaultAgent?: string;
|
|
200
|
+
versionCount: number;
|
|
201
|
+
skillCount: number;
|
|
202
|
+
agentCount: number;
|
|
203
|
+
runCount: number;
|
|
204
|
+
remoteCount: number;
|
|
205
|
+
pendingSyncCount?: number;
|
|
206
|
+
lastScore?: number;
|
|
540
207
|
}
|
|
541
|
-
export interface
|
|
542
|
-
|
|
208
|
+
export interface WorkbenchRemoteSyncState {
|
|
209
|
+
schema: "workbench.remote-sync-state.v1";
|
|
543
210
|
remote: string;
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
}
|
|
555
|
-
export interface
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
211
|
+
url: string;
|
|
212
|
+
status: "synced" | "error";
|
|
213
|
+
lastSyncedAt?: string;
|
|
214
|
+
lastAttemptAt: string;
|
|
215
|
+
lastError?: {
|
|
216
|
+
code: string;
|
|
217
|
+
message: string;
|
|
218
|
+
} | null;
|
|
219
|
+
pushed?: number;
|
|
220
|
+
pulled?: number;
|
|
221
|
+
}
|
|
222
|
+
export interface WorkbenchStatusSnapshot {
|
|
223
|
+
schema: "workbench.status.v1";
|
|
224
|
+
ok: true;
|
|
225
|
+
project: {
|
|
226
|
+
root: string;
|
|
227
|
+
initialized: boolean;
|
|
228
|
+
currentVersionId?: string;
|
|
229
|
+
defaultSkill?: string;
|
|
230
|
+
defaultAgent?: string;
|
|
561
231
|
};
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
export type WorkbenchRemoteContractSchema = "workbench.remote.capabilities.v1" | "workbench.remote.run.request.v1" | "workbench.remote.job.claim_request.v1" | "workbench.remote.job.claim.v1" | "workbench.remote.job.renewal.v1" | "workbench.remote.job.renewal_result.v1" | "workbench.remote.job.progress.v1" | "workbench.remote.job.completion.v1" | "workbench.remote.job.retry.v1";
|
|
566
|
-
export type WorkbenchRemoteProductionSandbox = "firecracker";
|
|
567
|
-
export type WorkbenchRemoteLocalSandbox = "docker";
|
|
568
|
-
export type WorkbenchRemoteNetworkPolicy = "open" | "none";
|
|
569
|
-
export interface WorkbenchRemoteCapabilities {
|
|
570
|
-
schema: "workbench.remote.capabilities.v1";
|
|
571
|
-
contractVersion: 1;
|
|
572
|
-
projectState: {
|
|
573
|
-
schema: WorkbenchProjectState["schema"];
|
|
574
|
-
guardedSourceWrites: true;
|
|
575
|
-
immutableRuntimeFacts: true;
|
|
232
|
+
worktree: {
|
|
233
|
+
hasUnversionedChanges: boolean;
|
|
234
|
+
latestVersionId?: string;
|
|
576
235
|
};
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
236
|
+
runs: {
|
|
237
|
+
total: number;
|
|
238
|
+
lastRunId?: string;
|
|
239
|
+
lastStatus?: WorkbenchRunStatus;
|
|
240
|
+
lastScore?: number;
|
|
582
241
|
};
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
sampleIndex: number;
|
|
242
|
+
remotes: Array<{
|
|
243
|
+
name: string;
|
|
244
|
+
kind: WorkbenchRemoteKind;
|
|
245
|
+
url: string;
|
|
246
|
+
sync: {
|
|
247
|
+
status: "up_to_date" | "error" | "never";
|
|
248
|
+
lastSyncedAt?: string;
|
|
249
|
+
lastAttemptAt?: string;
|
|
250
|
+
lastError?: {
|
|
251
|
+
code: string;
|
|
252
|
+
message: string;
|
|
253
|
+
} | null;
|
|
254
|
+
nextCommand?: string;
|
|
255
|
+
};
|
|
256
|
+
publication: {
|
|
257
|
+
status: "published" | "unpublished";
|
|
258
|
+
visibility?: string;
|
|
259
|
+
versionId?: string;
|
|
260
|
+
installUrl?: string;
|
|
261
|
+
pinnedInstallUrl?: string;
|
|
262
|
+
};
|
|
605
263
|
}>;
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
}
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
}
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
264
|
+
auth?: {
|
|
265
|
+
workbenchCloud: {
|
|
266
|
+
status: "authenticated" | "not_authenticated";
|
|
267
|
+
baseUrl?: string;
|
|
268
|
+
username?: string;
|
|
269
|
+
};
|
|
270
|
+
adapters: Array<{
|
|
271
|
+
adapter: string;
|
|
272
|
+
slot?: string;
|
|
273
|
+
profile: string;
|
|
274
|
+
status: string;
|
|
275
|
+
method?: string;
|
|
276
|
+
updatedAt?: string;
|
|
277
|
+
}>;
|
|
278
|
+
};
|
|
279
|
+
next: string[];
|
|
280
|
+
}
|
|
281
|
+
export interface WorkbenchComparisonCell {
|
|
282
|
+
versionId: string;
|
|
283
|
+
skillName: string;
|
|
284
|
+
skillBundleHash: string;
|
|
285
|
+
evalHash: string;
|
|
286
|
+
agentName: string;
|
|
287
|
+
agentHash: string;
|
|
288
|
+
runId?: string;
|
|
289
|
+
status?: WorkbenchRunStatus;
|
|
290
|
+
score?: number;
|
|
291
|
+
costUsd?: number;
|
|
292
|
+
latencyMs?: number;
|
|
293
|
+
error?: string;
|
|
624
294
|
}
|
|
625
|
-
export interface
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
295
|
+
export interface WorkbenchComparison {
|
|
296
|
+
evalHash?: string;
|
|
297
|
+
versions: WorkbenchVersion[];
|
|
298
|
+
skills: WorkbenchSkillBundleSnapshot[];
|
|
299
|
+
agents: WorkbenchAgentSnapshot[];
|
|
300
|
+
cells: WorkbenchComparisonCell[];
|
|
301
|
+
}
|
|
302
|
+
export interface WorkbenchInspectionSnapshot {
|
|
303
|
+
root: string;
|
|
304
|
+
status: WorkbenchStatus;
|
|
305
|
+
versions: WorkbenchVersion[];
|
|
306
|
+
skillSources: WorkbenchSkillSource[];
|
|
307
|
+
skillBundles: WorkbenchSkillBundleSnapshot[];
|
|
308
|
+
evals: WorkbenchEvalSnapshot[];
|
|
309
|
+
agents: WorkbenchAgentSnapshot[];
|
|
310
|
+
comparison?: WorkbenchComparison;
|
|
311
|
+
runs: WorkbenchRun[];
|
|
312
|
+
jobs: WorkbenchJob[];
|
|
313
|
+
traces: WorkbenchTrace[];
|
|
314
|
+
executionEvents: WorkbenchExecutionEventBatch[];
|
|
315
|
+
artifacts: WorkbenchArtifact[];
|
|
316
|
+
lineage: WorkbenchLineageEdge[];
|
|
317
|
+
remotes: WorkbenchRemote[];
|
|
318
|
+
refs: WorkbenchRefs;
|
|
319
|
+
publication?: WorkbenchPublication;
|
|
320
|
+
}
|
|
321
|
+
export interface WorkbenchPublication {
|
|
322
|
+
versionId: string;
|
|
323
|
+
installUrl: string;
|
|
324
|
+
pinnedInstallUrl: string;
|
|
325
|
+
}
|
|
326
|
+
export interface WorkbenchObjectPack {
|
|
327
|
+
schema: "workbench.object-pack.v1";
|
|
328
|
+
createdAt: string;
|
|
329
|
+
refs: WorkbenchRefs;
|
|
330
|
+
versions: WorkbenchVersion[];
|
|
331
|
+
skillSources: WorkbenchSkillSource[];
|
|
332
|
+
skillBundles: WorkbenchSkillBundleSnapshot[];
|
|
333
|
+
evals: WorkbenchEvalSnapshot[];
|
|
334
|
+
agents: WorkbenchAgent[];
|
|
335
|
+
runs: WorkbenchRun[];
|
|
336
|
+
jobs: WorkbenchJob[];
|
|
337
|
+
traces: WorkbenchTrace[];
|
|
338
|
+
executionEvents: WorkbenchExecutionEventBatch[];
|
|
339
|
+
artifacts: WorkbenchArtifact[];
|
|
340
|
+
lineage: WorkbenchLineageEdge[];
|
|
341
|
+
}
|
|
342
|
+
export interface WorkbenchFilePreview {
|
|
639
343
|
path: string;
|
|
344
|
+
source?: SurfaceSnapshotFile;
|
|
345
|
+
renderedText?: string;
|
|
346
|
+
diff?: string;
|
|
640
347
|
}
|
|
641
|
-
export interface
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
with?: Record<string, Json>;
|
|
645
|
-
}
|
|
646
|
-
export interface AuthoredWorkbenchRuntimeSpec {
|
|
647
|
-
dockerfile: string;
|
|
648
|
-
resources?: {
|
|
649
|
-
cpu?: number;
|
|
650
|
-
memoryGb?: number;
|
|
651
|
-
diskGb?: number;
|
|
652
|
-
timeoutMinutes?: number;
|
|
653
|
-
};
|
|
654
|
-
network?: {
|
|
655
|
-
egress?: "none" | "open";
|
|
656
|
-
};
|
|
348
|
+
export interface WorkbenchFileSurface {
|
|
349
|
+
files: SurfaceSnapshotFile[];
|
|
350
|
+
preview: WorkbenchFilePreview | null;
|
|
657
351
|
}
|
|
658
|
-
export
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
tasks?: WorkbenchAuthoredAdapterSpec;
|
|
663
|
-
environment: AuthoredWorkbenchRuntimeSpec;
|
|
664
|
-
score: AuthoredWorkbenchScoreSpec;
|
|
352
|
+
export interface WorkbenchSpecValidation {
|
|
353
|
+
ok: boolean;
|
|
354
|
+
errors: string[];
|
|
355
|
+
warnings: string[];
|
|
665
356
|
}
|
|
666
|
-
export interface
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
357
|
+
export interface RemoteWorkbenchEnvironmentSpec {
|
|
358
|
+
base: string;
|
|
359
|
+
resources: {
|
|
360
|
+
cpu: number;
|
|
361
|
+
memoryGb: number;
|
|
362
|
+
diskGb: number;
|
|
363
|
+
timeoutMinutes: number;
|
|
364
|
+
};
|
|
365
|
+
network: "off" | "on";
|
|
670
366
|
}
|
|
671
|
-
export interface
|
|
367
|
+
export interface RemoteWorkbenchEnvironmentVersion {
|
|
368
|
+
id: string;
|
|
369
|
+
environmentId: string;
|
|
672
370
|
name: string;
|
|
673
|
-
|
|
674
|
-
|
|
371
|
+
spec: RemoteWorkbenchEnvironmentSpec;
|
|
372
|
+
imageRef: string;
|
|
373
|
+
sourceHash: string;
|
|
374
|
+
sourceType: "builtin" | "dockerfile";
|
|
375
|
+
build?: {
|
|
376
|
+
dockerfileRef?: BlobObjectRef;
|
|
377
|
+
logRef?: BlobObjectRef;
|
|
378
|
+
error?: string;
|
|
379
|
+
startedAt?: string;
|
|
380
|
+
finishedAt?: string;
|
|
381
|
+
};
|
|
382
|
+
status: "ready" | "building" | "failed";
|
|
383
|
+
createdAt: string;
|
|
384
|
+
updatedAt: string;
|
|
675
385
|
}
|
|
676
|
-
export interface
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
386
|
+
export interface EngineResolveBinding {
|
|
387
|
+
engine: string;
|
|
388
|
+
resolver: {
|
|
389
|
+
use: string;
|
|
390
|
+
withFingerprint: string;
|
|
391
|
+
};
|
|
680
392
|
}
|
|
681
393
|
export type WorkbenchExecutionPurpose = "improve" | "attempt";
|
|
682
394
|
export type WorkbenchSandboxTemplateKind = "snapshot" | "oci";
|
|
@@ -704,11 +416,11 @@ export interface WorkbenchSandboxAllocation {
|
|
|
704
416
|
}
|
|
705
417
|
export interface WorkbenchExecutionCapability {
|
|
706
418
|
executionId: string;
|
|
707
|
-
|
|
419
|
+
skill: {
|
|
708
420
|
tenantId: string;
|
|
709
421
|
projectId: string;
|
|
710
422
|
runId: string;
|
|
711
|
-
|
|
423
|
+
versionId?: string;
|
|
712
424
|
};
|
|
713
425
|
inputs: WorkbenchExecutionInputRef[];
|
|
714
426
|
outputPrefix: string;
|
|
@@ -735,7 +447,7 @@ export interface WorkbenchExecutionInputRef {
|
|
|
735
447
|
mountPath: string;
|
|
736
448
|
writable: boolean;
|
|
737
449
|
}
|
|
738
|
-
export type WorkbenchExecutionOutputSchema = "workbench.
|
|
450
|
+
export type WorkbenchExecutionOutputSchema = "workbench.skill_patch.v1" | "workbench.result.v1" | string;
|
|
739
451
|
export interface WorkbenchExecutionOutputContract {
|
|
740
452
|
name: string;
|
|
741
453
|
schema: WorkbenchExecutionOutputSchema;
|
|
@@ -761,7 +473,7 @@ export interface WorkbenchExecutionSpec {
|
|
|
761
473
|
id: string;
|
|
762
474
|
projectId: string;
|
|
763
475
|
runId: string;
|
|
764
|
-
|
|
476
|
+
versionId?: string;
|
|
765
477
|
purpose: WorkbenchExecutionPurpose;
|
|
766
478
|
adapter: WorkbenchAdapterInvocation;
|
|
767
479
|
sandbox: WorkbenchSandboxTemplate;
|
|
@@ -770,12 +482,87 @@ export interface WorkbenchExecutionSpec {
|
|
|
770
482
|
policy: WorkbenchExecutionPolicy;
|
|
771
483
|
metadata: Record<string, Json>;
|
|
772
484
|
}
|
|
773
|
-
export interface
|
|
485
|
+
export interface BlobObjectRef {
|
|
486
|
+
bucket: string;
|
|
487
|
+
key: string;
|
|
488
|
+
byteLength: number;
|
|
489
|
+
sha256: string;
|
|
490
|
+
}
|
|
491
|
+
export interface WorkbenchSkillPatch {
|
|
774
492
|
files: SurfaceSnapshotFile[];
|
|
775
493
|
fileChanges: string[];
|
|
776
494
|
summary?: string;
|
|
777
495
|
feedback?: Json;
|
|
778
496
|
}
|
|
497
|
+
export interface WorkbenchCaseCriterionScore {
|
|
498
|
+
criterion_id: string;
|
|
499
|
+
label: string;
|
|
500
|
+
score: number;
|
|
501
|
+
pass: boolean;
|
|
502
|
+
errors?: string[];
|
|
503
|
+
rationale?: string;
|
|
504
|
+
}
|
|
505
|
+
export interface MetricStats {
|
|
506
|
+
count: number;
|
|
507
|
+
mean: number;
|
|
508
|
+
variance: number;
|
|
509
|
+
stddev: number;
|
|
510
|
+
min: number;
|
|
511
|
+
max: number;
|
|
512
|
+
}
|
|
513
|
+
export type EvalCaseStatus = "completed" | "error";
|
|
514
|
+
export type EvalCaseSource = Record<string, Json>;
|
|
515
|
+
export interface EvalCaseResult {
|
|
516
|
+
id: string;
|
|
517
|
+
label?: string;
|
|
518
|
+
split?: string;
|
|
519
|
+
status?: EvalCaseStatus;
|
|
520
|
+
durationMs?: number;
|
|
521
|
+
metrics: Record<string, number>;
|
|
522
|
+
source?: EvalCaseSource;
|
|
523
|
+
feedback?: Json;
|
|
524
|
+
criteria?: WorkbenchCaseCriterionScore[];
|
|
525
|
+
}
|
|
526
|
+
export type ExecutionRole = "improver" | "runner" | "engine";
|
|
527
|
+
export type ExecutionUsageCostSource = "provider" | "estimated" | "mixed";
|
|
528
|
+
export interface ExecutionUsage {
|
|
529
|
+
provider?: string;
|
|
530
|
+
model?: string;
|
|
531
|
+
inputTokens?: number;
|
|
532
|
+
uncachedInputTokens?: number;
|
|
533
|
+
cachedInputTokens?: number;
|
|
534
|
+
cacheCreationInputTokens?: number;
|
|
535
|
+
cacheReadInputTokens?: number;
|
|
536
|
+
outputTokens?: number;
|
|
537
|
+
reasoningOutputTokens?: number;
|
|
538
|
+
totalTokens?: number;
|
|
539
|
+
costUsd?: number;
|
|
540
|
+
costSource?: ExecutionUsageCostSource;
|
|
541
|
+
pricingSource?: string;
|
|
542
|
+
}
|
|
543
|
+
export interface UsageSummary {
|
|
544
|
+
total?: ExecutionUsage;
|
|
545
|
+
improver?: ExecutionUsage;
|
|
546
|
+
runner?: ExecutionUsage;
|
|
547
|
+
engine?: ExecutionUsage;
|
|
548
|
+
}
|
|
549
|
+
export interface EvaluationUsageStats {
|
|
550
|
+
total?: ExecutionUsageStats;
|
|
551
|
+
improver?: ExecutionUsageStats;
|
|
552
|
+
runner?: ExecutionUsageStats;
|
|
553
|
+
engine?: ExecutionUsageStats;
|
|
554
|
+
}
|
|
555
|
+
export interface ExecutionUsageStats {
|
|
556
|
+
inputTokens?: MetricStats;
|
|
557
|
+
uncachedInputTokens?: MetricStats;
|
|
558
|
+
cachedInputTokens?: MetricStats;
|
|
559
|
+
cacheCreationInputTokens?: MetricStats;
|
|
560
|
+
cacheReadInputTokens?: MetricStats;
|
|
561
|
+
reasoningOutputTokens?: MetricStats;
|
|
562
|
+
outputTokens?: MetricStats;
|
|
563
|
+
totalTokens?: MetricStats;
|
|
564
|
+
costUsd?: MetricStats;
|
|
565
|
+
}
|
|
779
566
|
export interface WorkbenchResult {
|
|
780
567
|
score: number;
|
|
781
568
|
metrics?: Record<string, number>;
|
|
@@ -888,6 +675,24 @@ export interface WorkbenchTraceSession {
|
|
|
888
675
|
trace: WorkbenchExecutionTrace;
|
|
889
676
|
metadata?: Record<string, Json>;
|
|
890
677
|
}
|
|
678
|
+
export type RemoteWorkbenchJobStatus = "queued" | "running" | "succeeded" | "failed" | "cancelled";
|
|
679
|
+
export type RemoteWorkbenchJobKind = "execute";
|
|
680
|
+
export interface RemoteWorkbenchJob {
|
|
681
|
+
id: string;
|
|
682
|
+
projectId: string;
|
|
683
|
+
runId: string;
|
|
684
|
+
versionId?: string;
|
|
685
|
+
kind: RemoteWorkbenchJobKind;
|
|
686
|
+
status: RemoteWorkbenchJobStatus;
|
|
687
|
+
attempt: number;
|
|
688
|
+
createdAt: string;
|
|
689
|
+
updatedAt: string;
|
|
690
|
+
startedAt?: string;
|
|
691
|
+
finishedAt?: string;
|
|
692
|
+
input: Json;
|
|
693
|
+
output?: Json;
|
|
694
|
+
error?: string;
|
|
695
|
+
}
|
|
891
696
|
export interface WorkbenchExecutionEvidence {
|
|
892
697
|
id: string;
|
|
893
698
|
kind: string;
|
|
@@ -896,7 +701,7 @@ export interface WorkbenchExecutionEvidence {
|
|
|
896
701
|
status: RemoteWorkbenchJobStatus;
|
|
897
702
|
jobIds: string[];
|
|
898
703
|
executionIds: string[];
|
|
899
|
-
|
|
704
|
+
versionId?: string;
|
|
900
705
|
caseId?: string;
|
|
901
706
|
sampleIndex?: number;
|
|
902
707
|
attemptIndex?: number;
|
|
@@ -908,44 +713,6 @@ export interface WorkbenchExecutionTraceDetail {
|
|
|
908
713
|
runId: string;
|
|
909
714
|
executions: WorkbenchExecutionEvidence[];
|
|
910
715
|
}
|
|
911
|
-
export interface AuthoredWorkbenchCaseSummary {
|
|
912
|
-
id: string;
|
|
913
|
-
slug: string;
|
|
914
|
-
path: string;
|
|
915
|
-
name: string;
|
|
916
|
-
split?: string;
|
|
917
|
-
fileCount: number;
|
|
918
|
-
}
|
|
919
|
-
export interface AuthoredWorkbenchSourceFile {
|
|
920
|
-
path: string;
|
|
921
|
-
content: string;
|
|
922
|
-
}
|
|
923
|
-
export interface AuthoredWorkbenchSourceDocument {
|
|
924
|
-
path: string;
|
|
925
|
-
exists: boolean;
|
|
926
|
-
source_yaml: string;
|
|
927
|
-
source_files: AuthoredWorkbenchSourceFile[];
|
|
928
|
-
spec: AuthoredWorkbenchSourceSpec | null;
|
|
929
|
-
cases: AuthoredWorkbenchCaseSummary[];
|
|
930
|
-
}
|
|
931
|
-
export type RemoteWorkbenchJobStatus = "queued" | "running" | "succeeded" | "failed" | "cancelled";
|
|
932
|
-
export type RemoteWorkbenchJobKind = "execute";
|
|
933
|
-
export interface RemoteWorkbenchJob {
|
|
934
|
-
id: string;
|
|
935
|
-
projectId: string;
|
|
936
|
-
runId: string;
|
|
937
|
-
candidateId?: string;
|
|
938
|
-
kind: RemoteWorkbenchJobKind;
|
|
939
|
-
status: RemoteWorkbenchJobStatus;
|
|
940
|
-
attempt: number;
|
|
941
|
-
createdAt: string;
|
|
942
|
-
updatedAt: string;
|
|
943
|
-
startedAt?: string;
|
|
944
|
-
finishedAt?: string;
|
|
945
|
-
input: Json;
|
|
946
|
-
output?: Json;
|
|
947
|
-
error?: string;
|
|
948
|
-
}
|
|
949
716
|
export interface WorkbenchRemoteJobClaimRequest {
|
|
950
717
|
schema: "workbench.remote.job.claim_request.v1";
|
|
951
718
|
ownerUserId: string;
|
|
@@ -1015,32 +782,6 @@ export interface WorkbenchRemoteJobRetry {
|
|
|
1015
782
|
leaseToken: string;
|
|
1016
783
|
reason: string;
|
|
1017
784
|
}
|
|
1018
|
-
export
|
|
1019
|
-
|
|
1020
|
-
environmentVersionId?: string;
|
|
1021
|
-
specVersionId: string;
|
|
1022
|
-
candidateId: string | null;
|
|
1023
|
-
activeCandidateId?: string | null;
|
|
1024
|
-
outputCandidateId?: string | null;
|
|
1025
|
-
input: {
|
|
1026
|
-
benchmarkFingerprint: string;
|
|
1027
|
-
candidateFingerprint: string;
|
|
1028
|
-
baseCandidateId: string | null;
|
|
1029
|
-
payerUserId?: string;
|
|
1030
|
-
candidateOwnerUserId?: string;
|
|
1031
|
-
candidateOwnerUsername?: string;
|
|
1032
|
-
preserveActiveCandidateId?: string | null;
|
|
1033
|
-
selectedSamples?: Array<{
|
|
1034
|
-
caseId: string;
|
|
1035
|
-
sampleIndex: number;
|
|
1036
|
-
}>;
|
|
1037
|
-
sourceYaml?: string;
|
|
1038
|
-
candidateSourceFiles?: SurfaceSnapshotFile[];
|
|
1039
|
-
baseFiles: SurfaceSnapshotFile[];
|
|
1040
|
-
engineResolveFiles: SurfaceSnapshotFile[];
|
|
1041
|
-
};
|
|
1042
|
-
jobCount: number;
|
|
1043
|
-
completedJobCount: number;
|
|
1044
|
-
failedJobCount: number;
|
|
1045
|
-
}
|
|
785
|
+
export declare function isReservedWorkbenchAdapterAuthEnvName(name: string): boolean;
|
|
786
|
+
export declare function assertWorkbenchAdapterAuthEnvNameAllowed(name: string): void;
|
|
1046
787
|
//# sourceMappingURL=index.d.ts.map
|