@workbench-ai/workbench-core 0.0.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapter-auth.d.ts +63 -0
- package/dist/adapter-auth.d.ts.map +1 -0
- package/dist/adapter-auth.js +244 -0
- package/dist/execution-events.d.ts +53 -0
- package/dist/execution-events.d.ts.map +1 -0
- package/dist/execution-events.js +195 -0
- package/dist/execution-graph.d.ts +27 -0
- package/dist/execution-graph.d.ts.map +1 -0
- package/dist/execution-graph.js +126 -0
- package/dist/execution-jobs.d.ts +70 -0
- package/dist/execution-jobs.d.ts.map +1 -0
- package/dist/execution-jobs.js +229 -0
- package/dist/execution-outputs.d.ts +9 -0
- package/dist/execution-outputs.d.ts.map +1 -0
- package/dist/execution-outputs.js +393 -0
- package/dist/execution-phases.d.ts +21 -0
- package/dist/execution-phases.d.ts.map +1 -0
- package/dist/execution-phases.js +262 -0
- package/dist/execution-runtime-types.d.ts +35 -0
- package/dist/execution-runtime-types.d.ts.map +1 -0
- package/dist/execution-runtime-types.js +1 -0
- package/dist/execution-scheduler.d.ts +31 -0
- package/dist/execution-scheduler.d.ts.map +1 -0
- package/dist/execution-scheduler.js +241 -0
- package/dist/execution-traces.d.ts +16 -0
- package/dist/execution-traces.d.ts.map +1 -0
- package/dist/execution-traces.js +164 -0
- package/dist/execution-usage.d.ts +12 -0
- package/dist/execution-usage.d.ts.map +1 -0
- package/dist/execution-usage.js +433 -0
- package/dist/generic-spec.d.ts +113 -0
- package/dist/generic-spec.d.ts.map +1 -0
- package/dist/generic-spec.js +656 -0
- package/dist/index.d.ts +160 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +2858 -0
- package/dist/model-prices-litellm.d.ts +9674 -0
- package/dist/model-prices-litellm.d.ts.map +1 -0
- package/dist/model-prices-litellm.js +9668 -0
- package/dist/runtime-utils.d.ts +18 -0
- package/dist/runtime-utils.d.ts.map +1 -0
- package/dist/runtime-utils.js +108 -0
- package/dist/sandbox-backends/docker.d.ts +5 -0
- package/dist/sandbox-backends/docker.d.ts.map +1 -0
- package/dist/sandbox-backends/docker.js +568 -0
- package/dist/sandbox-backends/index.d.ts +37 -0
- package/dist/sandbox-backends/index.d.ts.map +1 -0
- package/dist/sandbox-backends/index.js +79 -0
- package/dist/sandbox-backends/names.d.ts +6 -0
- package/dist/sandbox-backends/names.d.ts.map +1 -0
- package/dist/sandbox-backends/names.js +14 -0
- package/dist/sandbox-backends/template-images.d.ts +4 -0
- package/dist/sandbox-backends/template-images.d.ts.map +1 -0
- package/dist/sandbox-backends/template-images.js +48 -0
- package/dist/sandbox-inputs.d.ts +27 -0
- package/dist/sandbox-inputs.d.ts.map +1 -0
- package/dist/sandbox-inputs.js +220 -0
- package/dist/sandbox-plane.d.ts +89 -0
- package/dist/sandbox-plane.d.ts.map +1 -0
- package/dist/sandbox-plane.js +327 -0
- package/dist/subject-patch.d.ts +8 -0
- package/dist/subject-patch.d.ts.map +1 -0
- package/dist/subject-patch.js +63 -0
- package/dist/trace-files.d.ts +18 -0
- package/dist/trace-files.d.ts.map +1 -0
- package/dist/trace-files.js +94 -0
- package/environments/libreoffice-agent/Dockerfile +13 -0
- package/environments/libreoffice-python/Dockerfile +11 -0
- package/environments/node-22/Dockerfile +3 -0
- package/environments/python-3.12/Dockerfile +8 -0
- package/package.json +42 -0
- package/worker/sandbox-adapter-runner.cjs +275 -0
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
import { LITELLM_MODEL_PRICES, LITELLM_PRICING_SOURCE, } from "./model-prices-litellm.js";
|
|
2
|
+
import { jsonRecord, numberValue, stringValue, } from "./runtime-utils.js";
|
|
3
|
+
const NUMERIC_USAGE_FIELDS = [
|
|
4
|
+
"inputTokens",
|
|
5
|
+
"uncachedInputTokens",
|
|
6
|
+
"cachedInputTokens",
|
|
7
|
+
"cacheCreationInputTokens",
|
|
8
|
+
"cacheReadInputTokens",
|
|
9
|
+
"outputTokens",
|
|
10
|
+
"reasoningOutputTokens",
|
|
11
|
+
"totalTokens",
|
|
12
|
+
"costUsd",
|
|
13
|
+
];
|
|
14
|
+
const USAGE_ROLES = [
|
|
15
|
+
"optimizer",
|
|
16
|
+
"runner",
|
|
17
|
+
"engine",
|
|
18
|
+
];
|
|
19
|
+
export function extractExecutionUsageFromTrace(trace, provider, providerId, events = []) {
|
|
20
|
+
const usage = selectBestExecutionUsage([
|
|
21
|
+
...usageRecordsFromTraceSummaries(trace),
|
|
22
|
+
...usageRecordsFromTraceEntries(jsonRecord(trace).spans, "started_at"),
|
|
23
|
+
...usageRecordsFromTraceEntries(jsonRecord(trace).events, "at"),
|
|
24
|
+
...usageRecordsFromAgentEvents(events),
|
|
25
|
+
].map((record) => normalizeExecutionUsage(record.usage, {
|
|
26
|
+
provider: providerId,
|
|
27
|
+
model: provider.model,
|
|
28
|
+
})));
|
|
29
|
+
return usage ? usageSummaryFromExecutionUsage(usage) : undefined;
|
|
30
|
+
}
|
|
31
|
+
export function assignUsageRole(role, usage) {
|
|
32
|
+
const execution = usage?.[role] ?? usage?.total;
|
|
33
|
+
if (!execution) {
|
|
34
|
+
return usage;
|
|
35
|
+
}
|
|
36
|
+
return completeUsageSummary({
|
|
37
|
+
[role]: execution,
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
export function usageSummaryFromExecutionUsage(usage) {
|
|
41
|
+
return usage ? { total: usage } : undefined;
|
|
42
|
+
}
|
|
43
|
+
export function completeUsageSummary(usage) {
|
|
44
|
+
if (!usage) {
|
|
45
|
+
return undefined;
|
|
46
|
+
}
|
|
47
|
+
const optimizer = usage.optimizer ? normalizeExecutionUsage(usage.optimizer) : undefined;
|
|
48
|
+
const runner = usage.runner ? normalizeExecutionUsage(usage.runner) : undefined;
|
|
49
|
+
const engine = usage.engine ? normalizeExecutionUsage(usage.engine) : undefined;
|
|
50
|
+
const roleTotal = mergeExecutionUsage([
|
|
51
|
+
optimizer,
|
|
52
|
+
runner,
|
|
53
|
+
engine,
|
|
54
|
+
]);
|
|
55
|
+
const total = roleTotal ?? normalizeExecutionUsage(usage.total);
|
|
56
|
+
return compactUsageSummary({
|
|
57
|
+
...(total ? { total } : {}),
|
|
58
|
+
...(optimizer ? { optimizer } : {}),
|
|
59
|
+
...(runner ? { runner } : {}),
|
|
60
|
+
...(engine ? { engine } : {}),
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
export function normalizeUsageSummary(value) {
|
|
64
|
+
const record = jsonRecord(value);
|
|
65
|
+
const total = normalizeExecutionUsage(record.total);
|
|
66
|
+
const optimizer = normalizeExecutionUsage(record.optimizer);
|
|
67
|
+
const runner = normalizeExecutionUsage(record.runner);
|
|
68
|
+
const engine = normalizeExecutionUsage(record.engine);
|
|
69
|
+
return completeUsageSummary({
|
|
70
|
+
...(total ? { total } : {}),
|
|
71
|
+
...(optimizer ? { optimizer } : {}),
|
|
72
|
+
...(runner ? { runner } : {}),
|
|
73
|
+
...(engine ? { engine } : {}),
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
export function mergeUsageSummaries(summaries) {
|
|
77
|
+
const entries = summaries.flatMap((summary) => {
|
|
78
|
+
const normalized = completeUsageSummary(summary);
|
|
79
|
+
return normalized ? [normalized] : [];
|
|
80
|
+
});
|
|
81
|
+
if (entries.length === 0) {
|
|
82
|
+
return undefined;
|
|
83
|
+
}
|
|
84
|
+
return compactUsageSummary({
|
|
85
|
+
total: mergeExecutionUsage(entries.map((entry) => entry.total)),
|
|
86
|
+
optimizer: mergeExecutionUsage(entries.map((entry) => entry.optimizer)),
|
|
87
|
+
runner: mergeExecutionUsage(entries.map((entry) => entry.runner)),
|
|
88
|
+
engine: mergeExecutionUsage(entries.map((entry) => entry.engine)),
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
export function mergeUsageRoles(roles) {
|
|
92
|
+
const optimizer = completeUsageSummary(roles.optimizer);
|
|
93
|
+
const runner = completeUsageSummary(roles.runner);
|
|
94
|
+
const engine = completeUsageSummary(roles.engine);
|
|
95
|
+
return completeUsageSummary({
|
|
96
|
+
optimizer: optimizer?.optimizer ?? optimizer?.total,
|
|
97
|
+
runner: runner?.runner ?? runner?.total,
|
|
98
|
+
engine: engine?.engine ?? engine?.total,
|
|
99
|
+
});
|
|
100
|
+
}
|
|
101
|
+
export function usageStats(summaries) {
|
|
102
|
+
const roles = Object.fromEntries(["total", ...USAGE_ROLES].flatMap((role) => {
|
|
103
|
+
const stats = executionUsageStats(summaries.map((summary) => summary[role]));
|
|
104
|
+
return stats ? [[role, stats]] : [];
|
|
105
|
+
}));
|
|
106
|
+
return Object.keys(roles).length > 0 ? roles : undefined;
|
|
107
|
+
}
|
|
108
|
+
function normalizeExecutionUsage(value, defaults = {}) {
|
|
109
|
+
const record = jsonRecord(value);
|
|
110
|
+
if (Object.keys(record).length === 0) {
|
|
111
|
+
return undefined;
|
|
112
|
+
}
|
|
113
|
+
const providerName = stringValue(record.provider) ?? defaults.provider;
|
|
114
|
+
const model = stringValue(record.model) ?? defaults.model;
|
|
115
|
+
const usage = {
|
|
116
|
+
...(providerName ? { provider: providerName } : {}),
|
|
117
|
+
...(model ? { model } : {}),
|
|
118
|
+
...numberField(record, "inputTokens", "input_tokens"),
|
|
119
|
+
...numberField(record, "uncachedInputTokens", "uncached_input_tokens"),
|
|
120
|
+
...numberField(record, "cachedInputTokens", "cached_input_tokens"),
|
|
121
|
+
...numberField(record, "cacheCreationInputTokens", "cache_creation_input_tokens"),
|
|
122
|
+
...numberField(record, "cacheReadInputTokens", "cache_read_input_tokens"),
|
|
123
|
+
...numberField(record, "outputTokens", "output_tokens"),
|
|
124
|
+
...numberField(record, "reasoningOutputTokens", "reasoning_output_tokens"),
|
|
125
|
+
...numberField(record, "totalTokens", "total_tokens"),
|
|
126
|
+
...numberField(record, "costUsd", "total_cost_usd", "totalCostUsd"),
|
|
127
|
+
};
|
|
128
|
+
const totalTokens = usage.totalTokens ?? inferredTotalTokens(usage);
|
|
129
|
+
const providerCost = numberValue(record.total_cost_usd);
|
|
130
|
+
const existingCost = usage.costUsd;
|
|
131
|
+
if (totalTokens !== undefined) {
|
|
132
|
+
usage.totalTokens = totalTokens;
|
|
133
|
+
}
|
|
134
|
+
if (existingCost === undefined) {
|
|
135
|
+
const estimate = estimateExecutionCost(usage);
|
|
136
|
+
if (estimate) {
|
|
137
|
+
usage.costUsd = estimate.costUsd;
|
|
138
|
+
usage.costSource = "estimated";
|
|
139
|
+
usage.pricingSource = estimate.pricingSource;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
else if (providerCost !== undefined) {
|
|
143
|
+
usage.costSource = "provider";
|
|
144
|
+
usage.pricingSource = stringValue(record.pricing_source)
|
|
145
|
+
?? stringValue(record.pricingSource)
|
|
146
|
+
?? "provider";
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
const costSource = normalizeCostSource(record.costSource ?? record.cost_source);
|
|
150
|
+
if (costSource) {
|
|
151
|
+
usage.costSource = costSource;
|
|
152
|
+
}
|
|
153
|
+
const pricingSource = stringValue(record.pricingSource) ?? stringValue(record.pricing_source);
|
|
154
|
+
if (pricingSource) {
|
|
155
|
+
usage.pricingSource = pricingSource;
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
if (!hasUsageSignal(usage)) {
|
|
159
|
+
return undefined;
|
|
160
|
+
}
|
|
161
|
+
return usage;
|
|
162
|
+
}
|
|
163
|
+
function mergeExecutionUsage(entries) {
|
|
164
|
+
const usages = entries.flatMap((entry) => {
|
|
165
|
+
const normalized = normalizeExecutionUsage(entry);
|
|
166
|
+
return normalized ? [normalized] : [];
|
|
167
|
+
});
|
|
168
|
+
if (usages.length === 0) {
|
|
169
|
+
return undefined;
|
|
170
|
+
}
|
|
171
|
+
const merged = {
|
|
172
|
+
...mergeIdentity(usages),
|
|
173
|
+
...Object.fromEntries(NUMERIC_USAGE_FIELDS.flatMap((field) => {
|
|
174
|
+
const value = sumFinite(usages.map((usage) => usage[field]));
|
|
175
|
+
return value === undefined ? [] : [[field, value]];
|
|
176
|
+
})),
|
|
177
|
+
};
|
|
178
|
+
const costSource = mergeCostSource(usages.map((usage) => usage.costSource));
|
|
179
|
+
if (costSource) {
|
|
180
|
+
merged.costSource = costSource;
|
|
181
|
+
}
|
|
182
|
+
const pricingSource = uniqueString(usages.map((usage) => usage.pricingSource));
|
|
183
|
+
if (pricingSource) {
|
|
184
|
+
merged.pricingSource = pricingSource;
|
|
185
|
+
}
|
|
186
|
+
return hasUsageSignal(merged) ? merged : undefined;
|
|
187
|
+
}
|
|
188
|
+
function executionUsageStats(entries) {
|
|
189
|
+
const usages = entries.flatMap((entry) => {
|
|
190
|
+
const normalized = normalizeExecutionUsage(entry);
|
|
191
|
+
return normalized ? [normalized] : [];
|
|
192
|
+
});
|
|
193
|
+
if (usages.length === 0) {
|
|
194
|
+
return undefined;
|
|
195
|
+
}
|
|
196
|
+
const stats = Object.fromEntries(NUMERIC_USAGE_FIELDS.flatMap((field) => {
|
|
197
|
+
const values = usages.flatMap((usage) => {
|
|
198
|
+
const value = usage[field];
|
|
199
|
+
return typeof value === "number" && Number.isFinite(value) ? [value] : [];
|
|
200
|
+
});
|
|
201
|
+
return values.length > 0 ? [[field, metricStats(values)]] : [];
|
|
202
|
+
}));
|
|
203
|
+
return stats && Object.keys(stats).length > 0 ? stats : undefined;
|
|
204
|
+
}
|
|
205
|
+
function usageRecordsFromTraceSummaries(trace) {
|
|
206
|
+
const summaries = jsonRecord(trace).summaries;
|
|
207
|
+
if (!Array.isArray(summaries)) {
|
|
208
|
+
return [];
|
|
209
|
+
}
|
|
210
|
+
return summaries.flatMap((entry) => {
|
|
211
|
+
const record = jsonRecord(entry);
|
|
212
|
+
const usage = jsonRecord(record.usage);
|
|
213
|
+
return Object.keys(usage).length > 0
|
|
214
|
+
? [{
|
|
215
|
+
at: stringValue(record.ended_at) ?? stringValue(record.started_at) ?? "",
|
|
216
|
+
usage,
|
|
217
|
+
}]
|
|
218
|
+
: [];
|
|
219
|
+
});
|
|
220
|
+
}
|
|
221
|
+
function usageRecordsFromTraceEntries(value, timestampKey) {
|
|
222
|
+
if (!Array.isArray(value)) {
|
|
223
|
+
return [];
|
|
224
|
+
}
|
|
225
|
+
return value.flatMap((entry) => {
|
|
226
|
+
const record = jsonRecord(entry);
|
|
227
|
+
if (record.kind !== "usage") {
|
|
228
|
+
return [];
|
|
229
|
+
}
|
|
230
|
+
const usage = usageRecordFromAttributes(jsonRecord(record.attributes));
|
|
231
|
+
return usage
|
|
232
|
+
? [{
|
|
233
|
+
at: stringValue(record[timestampKey]) ?? "",
|
|
234
|
+
usage,
|
|
235
|
+
}]
|
|
236
|
+
: [];
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
function usageRecordsFromAgentEvents(events) {
|
|
240
|
+
return events.flatMap((event) => {
|
|
241
|
+
const record = jsonRecord(event);
|
|
242
|
+
if (record.name !== "thread/tokenUsage/updated" && record.method !== "thread/tokenUsage/updated") {
|
|
243
|
+
return [];
|
|
244
|
+
}
|
|
245
|
+
const tokenUsage = jsonRecord(jsonRecord(record.payload).tokenUsage);
|
|
246
|
+
const total = jsonRecord(tokenUsage.total);
|
|
247
|
+
if (Object.keys(total).length === 0) {
|
|
248
|
+
return [];
|
|
249
|
+
}
|
|
250
|
+
const totalTokens = numberValue(total.totalTokens);
|
|
251
|
+
const inputTokens = numberValue(total.inputTokens);
|
|
252
|
+
const outputTokens = numberValue(total.outputTokens);
|
|
253
|
+
const cachedInputTokens = numberValue(total.cachedInputTokens);
|
|
254
|
+
const reasoningOutputTokens = numberValue(total.reasoningOutputTokens);
|
|
255
|
+
const usage = {
|
|
256
|
+
...(totalTokens !== undefined ? { total_tokens: totalTokens } : {}),
|
|
257
|
+
...(inputTokens !== undefined ? { input_tokens: inputTokens } : {}),
|
|
258
|
+
...(outputTokens !== undefined ? { output_tokens: outputTokens } : {}),
|
|
259
|
+
...(cachedInputTokens !== undefined ? { cached_input_tokens: cachedInputTokens } : {}),
|
|
260
|
+
...(reasoningOutputTokens !== undefined ? { reasoning_output_tokens: reasoningOutputTokens } : {}),
|
|
261
|
+
};
|
|
262
|
+
return hasUsageRecordSignal(usage)
|
|
263
|
+
? [{
|
|
264
|
+
at: stringValue(record.at) ?? "",
|
|
265
|
+
usage,
|
|
266
|
+
}]
|
|
267
|
+
: [];
|
|
268
|
+
});
|
|
269
|
+
}
|
|
270
|
+
function usageRecordFromAttributes(attributes) {
|
|
271
|
+
const usage = {
|
|
272
|
+
...copyNumber(attributes, "input_tokens"),
|
|
273
|
+
...copyNumber(attributes, "uncached_input_tokens"),
|
|
274
|
+
...copyNumber(attributes, "cached_input_tokens"),
|
|
275
|
+
...copyNumber(attributes, "cache_creation_input_tokens"),
|
|
276
|
+
...copyNumber(attributes, "cache_read_input_tokens"),
|
|
277
|
+
...copyNumber(attributes, "output_tokens"),
|
|
278
|
+
...copyNumber(attributes, "reasoning_output_tokens"),
|
|
279
|
+
...copyNumber(attributes, "total_tokens"),
|
|
280
|
+
...copyNumber(attributes, "total_cost_usd"),
|
|
281
|
+
...(stringValue(attributes.cost_source) ? { cost_source: stringValue(attributes.cost_source) } : {}),
|
|
282
|
+
...(stringValue(attributes.pricing_source) ? { pricing_source: stringValue(attributes.pricing_source) } : {}),
|
|
283
|
+
};
|
|
284
|
+
return hasUsageRecordSignal(usage) ? usage : undefined;
|
|
285
|
+
}
|
|
286
|
+
function selectBestExecutionUsage(entries) {
|
|
287
|
+
return entries
|
|
288
|
+
.flatMap((entry) => {
|
|
289
|
+
const normalized = normalizeExecutionUsage(entry);
|
|
290
|
+
return normalized ? [normalized] : [];
|
|
291
|
+
})
|
|
292
|
+
.sort((left, right) => usageDetailScore(left) - usageDetailScore(right))
|
|
293
|
+
.at(-1);
|
|
294
|
+
}
|
|
295
|
+
function usageDetailScore(usage) {
|
|
296
|
+
const costScore = usage.costUsd !== undefined ? 100 : 0;
|
|
297
|
+
const tokenScore = NUMERIC_USAGE_FIELDS.filter((field) => usage[field] !== undefined).length;
|
|
298
|
+
const totalScore = usage.totalTokens ?? 0;
|
|
299
|
+
return costScore + tokenScore + Math.min(totalScore / 1_000_000_000, 1);
|
|
300
|
+
}
|
|
301
|
+
function estimateExecutionCost(usage) {
|
|
302
|
+
if (!usage.model) {
|
|
303
|
+
return undefined;
|
|
304
|
+
}
|
|
305
|
+
const price = LITELLM_MODEL_PRICES[usage.model];
|
|
306
|
+
if (!price) {
|
|
307
|
+
return undefined;
|
|
308
|
+
}
|
|
309
|
+
const inputPrice = price.input_cost_per_token;
|
|
310
|
+
const outputPrice = price.output_cost_per_token;
|
|
311
|
+
if (typeof inputPrice !== "number" && typeof outputPrice !== "number") {
|
|
312
|
+
return undefined;
|
|
313
|
+
}
|
|
314
|
+
const cacheCreationTokens = usage.cacheCreationInputTokens ?? 0;
|
|
315
|
+
const cacheReadTokens = usage.cacheReadInputTokens
|
|
316
|
+
?? (usage.cachedInputTokens !== undefined
|
|
317
|
+
? Math.max(usage.cachedInputTokens - cacheCreationTokens, 0)
|
|
318
|
+
: 0);
|
|
319
|
+
const uncachedInputTokens = usage.uncachedInputTokens
|
|
320
|
+
?? (usage.inputTokens !== undefined
|
|
321
|
+
? Math.max(usage.inputTokens - cacheReadTokens - cacheCreationTokens, 0)
|
|
322
|
+
: 0);
|
|
323
|
+
const outputTokens = usage.outputTokens ?? 0;
|
|
324
|
+
if (uncachedInputTokens === 0 && cacheReadTokens === 0 && cacheCreationTokens === 0 && outputTokens === 0) {
|
|
325
|
+
return undefined;
|
|
326
|
+
}
|
|
327
|
+
const costUsd = (uncachedInputTokens * (inputPrice ?? 0)) +
|
|
328
|
+
(cacheReadTokens * (price.cache_read_input_token_cost ?? inputPrice ?? 0)) +
|
|
329
|
+
(cacheCreationTokens * (price.cache_creation_input_token_cost ?? inputPrice ?? 0)) +
|
|
330
|
+
(outputTokens * (outputPrice ?? 0));
|
|
331
|
+
return Number.isFinite(costUsd) && costUsd > 0
|
|
332
|
+
? {
|
|
333
|
+
costUsd: roundUsageNumber(costUsd),
|
|
334
|
+
pricingSource: LITELLM_PRICING_SOURCE,
|
|
335
|
+
}
|
|
336
|
+
: undefined;
|
|
337
|
+
}
|
|
338
|
+
function inferredTotalTokens(usage) {
|
|
339
|
+
const inputTokens = usage.inputTokens ?? usage.cachedInputTokens;
|
|
340
|
+
if (inputTokens === undefined && usage.outputTokens === undefined) {
|
|
341
|
+
return undefined;
|
|
342
|
+
}
|
|
343
|
+
return sumFinite([
|
|
344
|
+
inputTokens,
|
|
345
|
+
usage.outputTokens,
|
|
346
|
+
]);
|
|
347
|
+
}
|
|
348
|
+
function numberField(record, camelKey, ...snakeKeys) {
|
|
349
|
+
const value = numberValue(record[camelKey])
|
|
350
|
+
?? snakeKeys.map((key) => numberValue(record[key])).find((entry) => entry !== undefined);
|
|
351
|
+
return value === undefined ? {} : { [camelKey]: value };
|
|
352
|
+
}
|
|
353
|
+
function copyNumber(record, key) {
|
|
354
|
+
const value = numberValue(record[key]);
|
|
355
|
+
return value === undefined ? {} : { [key]: value };
|
|
356
|
+
}
|
|
357
|
+
function hasUsageRecordSignal(record) {
|
|
358
|
+
return [
|
|
359
|
+
"input_tokens",
|
|
360
|
+
"uncached_input_tokens",
|
|
361
|
+
"cached_input_tokens",
|
|
362
|
+
"cache_creation_input_tokens",
|
|
363
|
+
"cache_read_input_tokens",
|
|
364
|
+
"output_tokens",
|
|
365
|
+
"reasoning_output_tokens",
|
|
366
|
+
"total_tokens",
|
|
367
|
+
"total_cost_usd",
|
|
368
|
+
].some((key) => numberValue(record[key]) !== undefined);
|
|
369
|
+
}
|
|
370
|
+
function hasUsageSignal(usage) {
|
|
371
|
+
return NUMERIC_USAGE_FIELDS.some((field) => usage[field] !== undefined);
|
|
372
|
+
}
|
|
373
|
+
function compactUsageSummary(usage) {
|
|
374
|
+
const output = Object.fromEntries(["total", ...USAGE_ROLES].flatMap((role) => usage[role] ? [[role, usage[role]]] : []));
|
|
375
|
+
return Object.keys(output).length > 0 ? output : undefined;
|
|
376
|
+
}
|
|
377
|
+
function mergeIdentity(usages) {
|
|
378
|
+
const provider = uniqueString(usages.map((usage) => usage.provider));
|
|
379
|
+
const model = uniqueString(usages.map((usage) => usage.model));
|
|
380
|
+
return {
|
|
381
|
+
...(provider ? { provider } : {}),
|
|
382
|
+
...(model ? { model } : {}),
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
function mergeCostSource(values) {
|
|
386
|
+
const unique = [...new Set(values.filter((value) => Boolean(value)))];
|
|
387
|
+
if (unique.length === 0) {
|
|
388
|
+
return undefined;
|
|
389
|
+
}
|
|
390
|
+
return unique.length === 1 ? unique[0] : "mixed";
|
|
391
|
+
}
|
|
392
|
+
function normalizeCostSource(value) {
|
|
393
|
+
return value === "provider" || value === "estimated" || value === "mixed"
|
|
394
|
+
? value
|
|
395
|
+
: undefined;
|
|
396
|
+
}
|
|
397
|
+
function sumFinite(values) {
|
|
398
|
+
const finite = values.filter((value) => typeof value === "number" && Number.isFinite(value));
|
|
399
|
+
if (finite.length === 0) {
|
|
400
|
+
return undefined;
|
|
401
|
+
}
|
|
402
|
+
return roundUsageNumber(finite.reduce((sum, value) => sum + value, 0));
|
|
403
|
+
}
|
|
404
|
+
function roundUsageNumber(value) {
|
|
405
|
+
return Number(value.toFixed(6));
|
|
406
|
+
}
|
|
407
|
+
function uniqueString(values) {
|
|
408
|
+
const unique = [...new Set(values.filter((value) => typeof value === "string" && value.length > 0))];
|
|
409
|
+
return unique.length === 1 ? unique[0] : undefined;
|
|
410
|
+
}
|
|
411
|
+
function metricStats(values) {
|
|
412
|
+
const count = values.length;
|
|
413
|
+
if (count === 0) {
|
|
414
|
+
return {
|
|
415
|
+
count: 0,
|
|
416
|
+
mean: 0,
|
|
417
|
+
variance: 0,
|
|
418
|
+
stddev: 0,
|
|
419
|
+
min: 0,
|
|
420
|
+
max: 0,
|
|
421
|
+
};
|
|
422
|
+
}
|
|
423
|
+
const mean = values.reduce((sum, value) => sum + value, 0) / count;
|
|
424
|
+
const variance = values.reduce((sum, value) => sum + (value - mean) ** 2, 0) / count;
|
|
425
|
+
return {
|
|
426
|
+
count,
|
|
427
|
+
mean,
|
|
428
|
+
variance,
|
|
429
|
+
stddev: Math.sqrt(variance),
|
|
430
|
+
min: Math.min(...values),
|
|
431
|
+
max: Math.max(...values),
|
|
432
|
+
};
|
|
433
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import type { EngineResolveBinding, SurfaceSnapshotFile, WorkbenchAdapterInvocation, WorkbenchExecutionNetworkPolicy, WorkbenchExecutionResources, WorkbenchSpecValidation } from "@workbench-ai/workbench-contract";
|
|
2
|
+
import type { WorkbenchEngineCase, WorkbenchEngineCaseSpec } from "@workbench-ai/workbench-protocol";
|
|
3
|
+
export declare const BENCHMARK_SPEC_FILE = "benchmark.yaml";
|
|
4
|
+
export interface WorkbenchRuntimeSpec {
|
|
5
|
+
dockerfile: string;
|
|
6
|
+
workdir?: string;
|
|
7
|
+
resources?: {
|
|
8
|
+
cpu?: number;
|
|
9
|
+
memoryGb?: number;
|
|
10
|
+
diskGb?: number;
|
|
11
|
+
timeoutMinutes?: number;
|
|
12
|
+
};
|
|
13
|
+
network?: WorkbenchExecutionNetworkPolicy;
|
|
14
|
+
}
|
|
15
|
+
export interface WorkbenchPathRef {
|
|
16
|
+
path: string;
|
|
17
|
+
}
|
|
18
|
+
export interface AuthoredBenchmarkSpec {
|
|
19
|
+
version: 3;
|
|
20
|
+
name: string;
|
|
21
|
+
description: string;
|
|
22
|
+
adapters: string[];
|
|
23
|
+
engine: WorkbenchAdapterInvocation;
|
|
24
|
+
}
|
|
25
|
+
export interface WorkbenchSubjectManifestSpec {
|
|
26
|
+
version: 3;
|
|
27
|
+
name: string;
|
|
28
|
+
description?: string;
|
|
29
|
+
files: WorkbenchPathRef;
|
|
30
|
+
adapters: string[];
|
|
31
|
+
run: WorkbenchAdapterInvocation;
|
|
32
|
+
}
|
|
33
|
+
export type ResolvedSubjectSpec = WorkbenchSubjectManifestSpec;
|
|
34
|
+
export interface AuthoredOptimizerSpec {
|
|
35
|
+
version: 3;
|
|
36
|
+
name: string;
|
|
37
|
+
description?: string;
|
|
38
|
+
edits: string[];
|
|
39
|
+
adapters: string[];
|
|
40
|
+
improve: WorkbenchAdapterInvocation;
|
|
41
|
+
}
|
|
42
|
+
export interface WorkbenchResolvedSource {
|
|
43
|
+
version: 3;
|
|
44
|
+
benchmark: AuthoredBenchmarkSpec;
|
|
45
|
+
subject: ResolvedSubjectSpec;
|
|
46
|
+
optimizer?: AuthoredOptimizerSpec;
|
|
47
|
+
}
|
|
48
|
+
export interface GenericRunSpec {
|
|
49
|
+
version: 3;
|
|
50
|
+
name: string;
|
|
51
|
+
description: string;
|
|
52
|
+
benchmark: {
|
|
53
|
+
name: string;
|
|
54
|
+
description: string;
|
|
55
|
+
engine: WorkbenchAdapterInvocation;
|
|
56
|
+
};
|
|
57
|
+
subject: {
|
|
58
|
+
name: string;
|
|
59
|
+
description?: string;
|
|
60
|
+
files: WorkbenchPathRef;
|
|
61
|
+
};
|
|
62
|
+
optimizer?: {
|
|
63
|
+
name: string;
|
|
64
|
+
description?: string;
|
|
65
|
+
edits: string[];
|
|
66
|
+
};
|
|
67
|
+
environment: WorkbenchRuntimeSpec;
|
|
68
|
+
adapters: string[];
|
|
69
|
+
engine: WorkbenchAdapterInvocation;
|
|
70
|
+
engineResolve: WorkbenchAdapterInvocation;
|
|
71
|
+
improve?: WorkbenchAdapterInvocation;
|
|
72
|
+
run: WorkbenchAdapterInvocation;
|
|
73
|
+
engineRun: WorkbenchAdapterInvocation;
|
|
74
|
+
}
|
|
75
|
+
export type GenericEngineCaseSpec = WorkbenchEngineCaseSpec;
|
|
76
|
+
export type { WorkbenchEngineCase } from "@workbench-ai/workbench-protocol";
|
|
77
|
+
export interface ResolvedEngineCaseExecutionConfig {
|
|
78
|
+
prompt: string;
|
|
79
|
+
environment: WorkbenchRuntimeSpec;
|
|
80
|
+
run: WorkbenchAdapterInvocation;
|
|
81
|
+
}
|
|
82
|
+
export declare const DEFAULT_EXECUTION_RESOURCES: WorkbenchExecutionResources;
|
|
83
|
+
export declare function validateWorkbenchResolvedSourceYaml(source: string): WorkbenchSpecValidation;
|
|
84
|
+
export declare function resolveWorkbenchResolvedSourceYaml(source: string): GenericRunSpec;
|
|
85
|
+
export declare function engineResolveBindingForSourceYaml(source: string): EngineResolveBinding;
|
|
86
|
+
export declare function engineResolveBindingForSpec(spec: GenericRunSpec): EngineResolveBinding;
|
|
87
|
+
export declare function resolveWorkbenchSourceFiles(args: {
|
|
88
|
+
benchmarkSource: string;
|
|
89
|
+
subjectSource: string;
|
|
90
|
+
optimizerSource?: string | null;
|
|
91
|
+
}): GenericRunSpec;
|
|
92
|
+
export declare function parseWorkbenchSourceFiles(args: {
|
|
93
|
+
benchmarkSource: string;
|
|
94
|
+
subjectSource?: string;
|
|
95
|
+
optimizerSource?: string | null;
|
|
96
|
+
}): WorkbenchResolvedSource;
|
|
97
|
+
export declare function serializeWorkbenchResolvedSourceYaml(source: WorkbenchResolvedSource): string;
|
|
98
|
+
export declare function isWorkbenchSubjectManifestPath(filePath: string): boolean;
|
|
99
|
+
export declare function resolveEngineCaseExecutionConfig(args: {
|
|
100
|
+
spec: GenericRunSpec;
|
|
101
|
+
engineCase: GenericEngineCaseSpec;
|
|
102
|
+
}): ResolvedEngineCaseExecutionConfig;
|
|
103
|
+
export declare function engineResolveInvocationForSpec(spec: GenericRunSpec): WorkbenchAdapterInvocation;
|
|
104
|
+
export declare function engineCaseFilesForRuntimeInput(args: {
|
|
105
|
+
spec: GenericRunSpec;
|
|
106
|
+
engineCase: WorkbenchEngineCase;
|
|
107
|
+
}): SurfaceSnapshotFile[];
|
|
108
|
+
export declare function engineCaseSubjectVisibleFiles(engineCase: WorkbenchEngineCase): SurfaceSnapshotFile[];
|
|
109
|
+
export declare function engineCaseEnginePrivateFiles(engineCase: WorkbenchEngineCase): SurfaceSnapshotFile[];
|
|
110
|
+
export declare function runtimeResources(runtime: WorkbenchRuntimeSpec): WorkbenchExecutionResources;
|
|
111
|
+
export declare function runtimeNetwork(runtime: WorkbenchRuntimeSpec): WorkbenchExecutionNetworkPolicy;
|
|
112
|
+
export declare function runtimeSandboxRef(runtime: WorkbenchRuntimeSpec): string;
|
|
113
|
+
//# sourceMappingURL=generic-spec.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,oBAAoB,EAEpB,mBAAmB,EACnB,0BAA0B,EAC1B,+BAA+B,EAC/B,2BAA2B,EAC3B,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,mBAAmB,mBAAmB,CAAC;AAEpD,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;CACpC;AAED,MAAM,WAAW,4BAA4B;IAC3C,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,gBAAgB,CAAC;IACxB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAED,MAAM,MAAM,mBAAmB,GAAG,4BAA4B,CAAC;AAE/D,MAAM,WAAW,qBAAqB;IACpC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,EAAE,0BAA0B,CAAC;CACrC;AAED,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,CAAC,CAAC;IACX,SAAS,EAAE,qBAAqB,CAAC;IACjC,OAAO,EAAE,mBAAmB,CAAC;IAC7B,SAAS,CAAC,EAAE,qBAAqB,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE;QACT,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,0BAA0B,CAAC;KACpC,CAAC;IACF,OAAO,EAAE;QACP,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,gBAAgB,CAAC;KACzB,CAAC;IACF,SAAS,CAAC,EAAE;QACV,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,MAAM,EAAE,CAAC;KACjB,CAAC;IACF,WAAW,EAAE,oBAAoB,CAAC;IAClC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;IACnC,aAAa,EAAE,0BAA0B,CAAC;IAC1C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,GAAG,EAAE,0BAA0B,CAAC;IAChC,SAAS,EAAE,0BAA0B,CAAC;CACvC;AAED,MAAM,MAAM,qBAAqB,GAAG,uBAAuB,CAAC;AAC5D,YAAY,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AAE5E,MAAM,WAAW,iCAAiC;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,oBAAoB,CAAC;IAClC,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAQD,eAAO,MAAM,2BAA2B,EAAE,2BAKzC,CAAC;AAEF,wBAAgB,mCAAmC,CACjD,MAAM,EAAE,MAAM,GACb,uBAAuB,CAmBzB;AAED,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,MAAM,GACb,cAAc,CAsChB;AAED,wBAAgB,iCAAiC,CAC/C,MAAM,EAAE,MAAM,GACb,oBAAoB,CAEtB;AAED,wBAAgB,2BAA2B,CACzC,IAAI,EAAE,cAAc,GACnB,oBAAoB,CAStB;AAED,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IAChD,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,EAAE,MAAM,CAAC;IACtB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,cAAc,CAMjB;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,eAAe,EAAE,MAAM,CAAC;IACxB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,uBAAuB,CA4B1B;AAED,wBAAgB,oCAAoC,CAClD,MAAM,EAAE,uBAAuB,GAC9B,MAAM,CAER;AAED,wBAAgB,8BAA8B,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAIxE;AAED,wBAAgB,gCAAgC,CAAC,IAAI,EAAE;IACrD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,qBAAqB,CAAC;CACnC,GAAG,iCAAiC,CAMpC;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,cAAc,GAAG,0BAA0B,CAE/F;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE;IACnD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,mBAAmB,CAAC;CACjC,GAAG,mBAAmB,EAAE,CAGxB;AAED,wBAAgB,6BAA6B,CAC3C,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,4BAA4B,CAC1C,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,GAC5B,2BAA2B,CAiB7B;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,oBAAoB,GAC5B,+BAA+B,CAEjC;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAEvE"}
|