@workbench-ai/workbench-core 0.0.66 → 0.0.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/execution-graph.d.ts +4 -3
- package/dist/execution-graph.d.ts.map +1 -1
- package/dist/execution-graph.js +15 -14
- package/dist/execution-jobs.d.ts +5 -20
- package/dist/execution-jobs.d.ts.map +1 -1
- package/dist/execution-jobs.js +7 -91
- package/dist/execution-outputs.d.ts +2 -2
- package/dist/execution-outputs.d.ts.map +1 -1
- package/dist/execution-outputs.js +10 -10
- package/dist/execution-runtime-types.d.ts +1 -1
- package/dist/execution-runtime-types.d.ts.map +1 -1
- package/dist/execution-scheduler.d.ts.map +1 -1
- package/dist/execution-scheduler.js +4 -1
- package/dist/execution-traces.js +1 -1
- package/dist/generic-spec.d.ts +29 -29
- package/dist/generic-spec.d.ts.map +1 -1
- package/dist/generic-spec.js +94 -92
- package/dist/index.d.ts +325 -220
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +5784 -3856
- package/dist/runtime-dockerfile.d.ts +1 -1
- package/dist/runtime-dockerfile.d.ts.map +1 -1
- package/dist/runtime-dockerfile.js +4 -4
- package/dist/runtime-utils.d.ts +1 -1
- package/dist/runtime-utils.d.ts.map +1 -1
- package/dist/runtime-utils.js +3 -3
- package/dist/sandbox-backends/docker.js +7 -5
- package/dist/sandbox-inputs.js +3 -3
- package/dist/sandbox-plane.d.ts.map +1 -1
- package/dist/sandbox-plane.js +13 -9
- package/dist/skill-patch.d.ts +8 -0
- package/dist/skill-patch.d.ts.map +1 -0
- package/dist/{candidate-patch.js → skill-patch.js} +5 -5
- package/package.json +3 -3
- package/worker/sandbox-adapter-runner.cjs +2 -2
- package/dist/candidate-patch.d.ts +0 -8
- package/dist/candidate-patch.d.ts.map +0 -1
- package/dist/execution-evidence.d.ts +0 -22
- package/dist/execution-evidence.d.ts.map +0 -1
- package/dist/execution-evidence.js +0 -302
- package/dist/inspection.d.ts +0 -111
- package/dist/inspection.d.ts.map +0 -1
- package/dist/inspection.js +0 -217
|
@@ -1,19 +1,20 @@
|
|
|
1
|
-
import type { WorkbenchExecutionSpec } from "@workbench-ai/workbench-contract";
|
|
1
|
+
import type { Json, WorkbenchExecutionSpec } from "@workbench-ai/workbench-contract";
|
|
2
2
|
import { type GenericRunSpec, type GenericEngineCaseSpec } from "./generic-spec.ts";
|
|
3
3
|
export interface CompileExecutionGraphInput {
|
|
4
4
|
ownerUserId: string;
|
|
5
5
|
projectId: string;
|
|
6
6
|
runId: string;
|
|
7
|
-
|
|
7
|
+
versionId: string;
|
|
8
8
|
attemptIndex: number;
|
|
9
9
|
sampleIndex?: number;
|
|
10
10
|
caseId?: string;
|
|
11
11
|
engineCase?: GenericEngineCaseSpec;
|
|
12
12
|
spec: GenericRunSpec;
|
|
13
13
|
workflow?: "eval" | "improve";
|
|
14
|
-
|
|
14
|
+
skillRef?: string;
|
|
15
15
|
caseRef?: string;
|
|
16
16
|
environmentRef?: string;
|
|
17
|
+
metadata?: Record<string, Json>;
|
|
17
18
|
}
|
|
18
19
|
export interface WorkbenchExecutionGraph {
|
|
19
20
|
nodes: WorkbenchExecutionGraphNode[];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"execution-graph.d.ts","sourceRoot":"","sources":["../src/execution-graph.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"execution-graph.d.ts","sourceRoot":"","sources":["../src/execution-graph.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,IAAI,EAGJ,sBAAsB,EAEvB,MAAM,kCAAkC,CAAC;AAE1C,OAAO,EAKL,KAAK,cAAc,EACnB,KAAK,qBAAqB,EAE3B,MAAM,mBAAmB,CAAC;AAE3B,MAAM,WAAW,0BAA0B;IACzC,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,qBAAqB,CAAC;IACnC,IAAI,EAAE,cAAc,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;CACjC;AAED,MAAM,WAAW,uBAAuB;IACtC,KAAK,EAAE,2BAA2B,EAAE,CAAC;IACrC,UAAU,EAAE,sBAAsB,EAAE,CAAC;CACtC;AAED,MAAM,WAAW,2BAA2B;IAC1C,SAAS,EAAE,sBAAsB,CAAC;IAClC,SAAS,EAAE,MAAM,EAAE,CAAC;CACrB;AAED,wBAAgB,8BAA8B,CAAC,KAAK,EAAE,0BAA0B,GAAG,uBAAuB,CAqEzG"}
|
package/dist/execution-graph.js
CHANGED
|
@@ -3,8 +3,8 @@ export function compileWorkbenchExecutionGraph(input) {
|
|
|
3
3
|
const workflow = input.workflow ?? "improve";
|
|
4
4
|
const sampleIndex = input.sampleIndex ?? 0;
|
|
5
5
|
const caseId = input.caseId ?? "current";
|
|
6
|
-
const
|
|
7
|
-
const caseRef = input.caseRef ?? `workbench://
|
|
6
|
+
const skillRef = input.skillRef ?? `workbench://skills/${input.projectId}/versions/${input.versionId}`;
|
|
7
|
+
const caseRef = input.caseRef ?? `workbench://skills/${input.projectId}/cases/${caseId}`;
|
|
8
8
|
if (!input.engineCase) {
|
|
9
9
|
throw new Error("Execution graph compilation requires an engine case.");
|
|
10
10
|
}
|
|
@@ -16,44 +16,45 @@ export function compileWorkbenchExecutionGraph(input) {
|
|
|
16
16
|
const nodes = [];
|
|
17
17
|
const executions = [];
|
|
18
18
|
const improveExecutionId = executionId(input, "improve", "current", 0);
|
|
19
|
-
const improveOutputRef = `execution://${improveExecutionId}/candidate_patch`;
|
|
20
19
|
const engineAdapter = input.spec.engineRun;
|
|
21
20
|
if (workflow === "improve") {
|
|
22
|
-
if (!input.spec.
|
|
23
|
-
throw new Error("
|
|
21
|
+
if (!input.spec.skill.improve || !input.spec.improve) {
|
|
22
|
+
throw new Error("Skill improve configuration is required for improve execution graphs.");
|
|
24
23
|
}
|
|
25
24
|
pushExecution(nodes, executions, createExecution({
|
|
26
25
|
input,
|
|
27
26
|
purpose: "improve",
|
|
28
27
|
adapter: input.spec.improve,
|
|
29
28
|
inputs: [
|
|
30
|
-
inputRef("
|
|
31
|
-
inputRef("traces", `workbench://
|
|
29
|
+
inputRef("skill", skillRef, "/workspace", true),
|
|
30
|
+
inputRef("traces", `workbench://skills/${input.projectId}/runs/${input.runId}/traces`, "/workspace/input/traces", false),
|
|
32
31
|
],
|
|
33
|
-
outputs: [outputContract("
|
|
32
|
+
outputs: [outputContract("skill_patch", "workbench.skill_patch.v1")],
|
|
34
33
|
metadata: {
|
|
34
|
+
...(input.metadata ?? {}),
|
|
35
35
|
attemptIndex: input.attemptIndex,
|
|
36
36
|
sampleIndex: 0,
|
|
37
37
|
caseId: "current",
|
|
38
|
-
|
|
39
|
-
edits: input.spec.
|
|
38
|
+
eval: input.spec.eval.name,
|
|
39
|
+
edits: input.spec.skill.improve.edits,
|
|
40
40
|
},
|
|
41
41
|
runtime: input.spec.environment,
|
|
42
42
|
idOverride: improveExecutionId,
|
|
43
43
|
}), []);
|
|
44
|
+
return { nodes, executions };
|
|
44
45
|
}
|
|
45
|
-
const runCandidateRef = workflow === "improve" ? improveOutputRef : candidateRef;
|
|
46
46
|
const attemptExecutionId = executionId(input, "attempt", caseId, sampleIndex);
|
|
47
47
|
pushExecution(nodes, executions, createExecution({
|
|
48
48
|
input,
|
|
49
49
|
purpose: "attempt",
|
|
50
50
|
adapter: engineAdapter,
|
|
51
51
|
inputs: [
|
|
52
|
-
inputRef("
|
|
52
|
+
inputRef("skills", skillRef, "/workspace/input/skills", false),
|
|
53
53
|
inputRef("case", caseRef, "/workspace/input/case", false),
|
|
54
54
|
],
|
|
55
55
|
outputs: [outputContract("result", "workbench.result.v1")],
|
|
56
56
|
metadata: {
|
|
57
|
+
...(input.metadata ?? {}),
|
|
57
58
|
attemptIndex: input.attemptIndex,
|
|
58
59
|
sampleIndex,
|
|
59
60
|
caseId,
|
|
@@ -62,7 +63,7 @@ export function compileWorkbenchExecutionGraph(input) {
|
|
|
62
63
|
},
|
|
63
64
|
runtime: executionConfig.environment,
|
|
64
65
|
idOverride: attemptExecutionId,
|
|
65
|
-
}),
|
|
66
|
+
}), []);
|
|
66
67
|
return { nodes, executions };
|
|
67
68
|
}
|
|
68
69
|
function pushExecution(nodes, executions, execution, dependsOn) {
|
|
@@ -77,7 +78,7 @@ function createExecution(args) {
|
|
|
77
78
|
id: args.idOverride ?? executionId(args.input, args.purpose, args.input.caseId ?? "current", args.input.sampleIndex ?? 0),
|
|
78
79
|
projectId: args.input.projectId,
|
|
79
80
|
runId: args.input.runId,
|
|
80
|
-
|
|
81
|
+
versionId: args.input.versionId,
|
|
81
82
|
purpose: args.purpose,
|
|
82
83
|
adapter: args.adapter,
|
|
83
84
|
sandbox: args.input.environmentRef
|
package/dist/execution-jobs.d.ts
CHANGED
|
@@ -19,7 +19,7 @@ export declare function planWorkbenchExecutionJobsForPurpose(args: {
|
|
|
19
19
|
ownerUserId: string;
|
|
20
20
|
projectId: string;
|
|
21
21
|
runId: string;
|
|
22
|
-
|
|
22
|
+
versionId: string;
|
|
23
23
|
attemptIndex: number;
|
|
24
24
|
samples: number;
|
|
25
25
|
caseIds?: readonly string[];
|
|
@@ -32,15 +32,18 @@ export declare function planWorkbenchExecutionJobsForPurpose(args: {
|
|
|
32
32
|
engineCases: readonly WorkbenchEngineCase[];
|
|
33
33
|
traceFiles?: readonly SurfaceSnapshotFile[];
|
|
34
34
|
environmentRef?: string;
|
|
35
|
+
skillRef?: string;
|
|
36
|
+
caseRef?: string;
|
|
35
37
|
environmentRefsByCase?: ReadonlyMap<string, string>;
|
|
36
38
|
baseId?: string | null;
|
|
39
|
+
metadata?: Record<string, Json>;
|
|
37
40
|
}): RemoteWorkbenchJob[];
|
|
38
41
|
export declare function engineCaseIds(engineCases: readonly WorkbenchEngineCase[]): string[];
|
|
39
42
|
export declare function engineCaseForCase(engineCases: readonly WorkbenchEngineCase[], caseId: string): WorkbenchEngineCase;
|
|
40
43
|
export declare function createWorkbenchExecutionJob(args: {
|
|
41
44
|
projectId: string;
|
|
42
45
|
runId: string;
|
|
43
|
-
|
|
46
|
+
versionId: string;
|
|
44
47
|
execution: WorkbenchExecutionSpec;
|
|
45
48
|
dependsOn: readonly string[];
|
|
46
49
|
now: string;
|
|
@@ -48,24 +51,6 @@ export declare function createWorkbenchExecutionJob(args: {
|
|
|
48
51
|
traceFiles?: readonly SurfaceSnapshotFile[];
|
|
49
52
|
baseId?: string | null;
|
|
50
53
|
}): RemoteWorkbenchJob;
|
|
51
|
-
export declare function createBaselineCandidateExecution(args: {
|
|
52
|
-
ownerUserId: string;
|
|
53
|
-
projectId: string;
|
|
54
|
-
runId: string;
|
|
55
|
-
candidateId: string;
|
|
56
|
-
attemptIndex: number;
|
|
57
|
-
}): WorkbenchExecutionSpec;
|
|
58
|
-
export declare function createBaselineCandidateJob(args: {
|
|
59
|
-
ownerUserId: string;
|
|
60
|
-
projectId: string;
|
|
61
|
-
runId: string;
|
|
62
|
-
candidateId: string;
|
|
63
|
-
files: readonly SurfaceSnapshotFile[];
|
|
64
|
-
now: string;
|
|
65
|
-
baseId: string | null;
|
|
66
|
-
attemptIndex: number;
|
|
67
|
-
fileSet?: Json;
|
|
68
|
-
}): RemoteWorkbenchJob;
|
|
69
54
|
export declare function workbenchExecutionJobId(executionId: string): string;
|
|
70
55
|
export declare function workbenchExecutionJobPurpose(job: RemoteWorkbenchJob): WorkbenchExecutionSpec["purpose"] | null;
|
|
71
56
|
//# sourceMappingURL=execution-jobs.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"execution-jobs.d.ts","sourceRoot":"","sources":["../src/execution-jobs.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,EAClB,IAAI,EACJ,mBAAmB,EACnB,sBAAsB,EACvB,MAAM,kCAAkC,CAAC;AAK1C,OAAO,KAAK,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,mBAAmB,CAAC;AAE3B,MAAM,MAAM,oBAAoB,GAAG,MAAM,GAAG,SAAS,CAAC;AAEtD,eAAO,MAAM,wBAAwB,KAAK,CAAC;AAE3C,wBAAgB,4BAA4B,CAAC,IAAI,EAAE;IACjD,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB,GAAG,MAAM,CAMT;AAED,wBAAgB,4BAA4B,CAAC,IAAI,EAAE;IACjD,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB,GAAG,MAAM,GAAG,IAAI,CAchB;AAED,wBAAgB,yBAAyB,CAAC,KAAK,EAAE,cAAc,GAAG,MAAM,CAEvE;AAED,wBAAgB,oCAAoC,CAAC,IAAI,EAAE;IACzD,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,
|
|
1
|
+
{"version":3,"file":"execution-jobs.d.ts","sourceRoot":"","sources":["../src/execution-jobs.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,EAClB,IAAI,EACJ,mBAAmB,EACnB,sBAAsB,EACvB,MAAM,kCAAkC,CAAC;AAK1C,OAAO,KAAK,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,mBAAmB,CAAC;AAE3B,MAAM,MAAM,oBAAoB,GAAG,MAAM,GAAG,SAAS,CAAC;AAEtD,eAAO,MAAM,wBAAwB,KAAK,CAAC;AAE3C,wBAAgB,4BAA4B,CAAC,IAAI,EAAE;IACjD,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB,GAAG,MAAM,CAMT;AAED,wBAAgB,4BAA4B,CAAC,IAAI,EAAE;IACjD,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB,GAAG,MAAM,GAAG,IAAI,CAchB;AAED,wBAAgB,yBAAyB,CAAC,KAAK,EAAE,cAAc,GAAG,MAAM,CAEvE;AAED,wBAAgB,oCAAoC,CAAC,IAAI,EAAE;IACzD,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,YAAY,EAAE,MAAM,CAAC;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,SAAS,MAAM,EAAE,CAAC;IAC5B,mBAAmB,CAAC,EAAE,WAAW,CAAC,MAAM,EAAE,SAAS,MAAM,EAAE,CAAC,CAAC;IAC7D,IAAI,EAAE,cAAc,CAAC;IACrB,QAAQ,EAAE,oBAAoB,CAAC;IAC/B,OAAO,EAAE,sBAAsB,CAAC,SAAS,CAAC,CAAC;IAC3C,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC3C,WAAW,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC5C,UAAU,CAAC,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC5C,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,qBAAqB,CAAC,EAAE,WAAW,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACpD,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACvB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;CACjC,GAAG,kBAAkB,EAAE,CAoDvB;AAmBD,wBAAgB,aAAa,CAAC,WAAW,EAAE,SAAS,mBAAmB,EAAE,GAAG,MAAM,EAAE,CAEnF;AAED,wBAAgB,iBAAiB,CAC/B,WAAW,EAAE,SAAS,mBAAmB,EAAE,EAC3C,MAAM,EAAE,MAAM,GACb,mBAAmB,CAMrB;AAED,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IAChD,SAAS,EAAE,MAAM,CAAC;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,sBAAsB,CAAC;IAClC,SAAS,EAAE,SAAS,MAAM,EAAE,CAAC;IAC7B,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,CAAC,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC3C,UAAU,CAAC,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC5C,MAAM,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACxB,GAAG,kBAAkB,CA0BrB;AAED,wBAAgB,uBAAuB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAEnE;AAED,wBAAgB,4BAA4B,CAAC,GAAG,EAAE,kBAAkB,GAAG,sBAAsB,CAAC,SAAS,CAAC,GAAG,IAAI,CAO9G"}
|
package/dist/execution-jobs.js
CHANGED
|
@@ -46,13 +46,16 @@ export function planWorkbenchExecutionJobsForPurpose(args) {
|
|
|
46
46
|
ownerUserId: args.ownerUserId,
|
|
47
47
|
projectId: args.projectId,
|
|
48
48
|
runId: args.runId,
|
|
49
|
-
|
|
49
|
+
versionId: args.versionId,
|
|
50
50
|
attemptIndex: args.attemptIndex,
|
|
51
51
|
sampleIndex,
|
|
52
52
|
caseId,
|
|
53
53
|
spec: args.spec,
|
|
54
54
|
engineCase: engineCase.case,
|
|
55
55
|
environmentRef: args.environmentRefsByCase?.get(caseId) ?? args.environmentRef,
|
|
56
|
+
skillRef: args.skillRef,
|
|
57
|
+
caseRef: args.caseRef,
|
|
58
|
+
metadata: args.metadata,
|
|
56
59
|
workflow: args.workflow === "improve" ? "improve" : "eval",
|
|
57
60
|
});
|
|
58
61
|
for (const node of graph.nodes) {
|
|
@@ -62,7 +65,7 @@ export function planWorkbenchExecutionJobsForPurpose(args) {
|
|
|
62
65
|
jobs.push(createWorkbenchExecutionJob({
|
|
63
66
|
projectId: args.projectId,
|
|
64
67
|
runId: args.runId,
|
|
65
|
-
|
|
68
|
+
versionId: args.versionId,
|
|
66
69
|
execution: node.execution,
|
|
67
70
|
dependsOn: node.dependsOn,
|
|
68
71
|
now: args.now,
|
|
@@ -103,7 +106,7 @@ export function createWorkbenchExecutionJob(args) {
|
|
|
103
106
|
id: workbenchExecutionJobId(args.execution.id),
|
|
104
107
|
projectId: args.projectId,
|
|
105
108
|
runId: args.runId,
|
|
106
|
-
|
|
109
|
+
versionId: args.versionId,
|
|
107
110
|
kind: "execute",
|
|
108
111
|
status: "queued",
|
|
109
112
|
attempt: 0,
|
|
@@ -112,7 +115,7 @@ export function createWorkbenchExecutionJob(args) {
|
|
|
112
115
|
input: {
|
|
113
116
|
execution: args.execution,
|
|
114
117
|
dependsOn: args.dependsOn.map(workbenchExecutionJobId),
|
|
115
|
-
|
|
118
|
+
versionId: args.versionId,
|
|
116
119
|
attemptIndex,
|
|
117
120
|
sampleIndex,
|
|
118
121
|
caseId,
|
|
@@ -122,93 +125,6 @@ export function createWorkbenchExecutionJob(args) {
|
|
|
122
125
|
},
|
|
123
126
|
};
|
|
124
127
|
}
|
|
125
|
-
export function createBaselineCandidateExecution(args) {
|
|
126
|
-
return {
|
|
127
|
-
id: `exec_${args.runId.replace(/[^a-z0-9_]/giu, "_")}_attempt_${String(args.attemptIndex).padStart(3, "0")}_case_current_sample_000_improve`,
|
|
128
|
-
projectId: args.projectId,
|
|
129
|
-
runId: args.runId,
|
|
130
|
-
candidateId: args.candidateId,
|
|
131
|
-
purpose: "improve",
|
|
132
|
-
adapter: {
|
|
133
|
-
use: "baseline",
|
|
134
|
-
with: {},
|
|
135
|
-
},
|
|
136
|
-
sandbox: {
|
|
137
|
-
kind: "snapshot",
|
|
138
|
-
ref: "workbench/baseline-candidate",
|
|
139
|
-
},
|
|
140
|
-
inputs: [],
|
|
141
|
-
outputs: [{
|
|
142
|
-
name: "candidate_patch",
|
|
143
|
-
schema: "workbench.candidate_patch.v1",
|
|
144
|
-
required: true,
|
|
145
|
-
}],
|
|
146
|
-
policy: {
|
|
147
|
-
tenantId: args.ownerUserId,
|
|
148
|
-
resources: {
|
|
149
|
-
cpu: 1,
|
|
150
|
-
memoryGb: 1,
|
|
151
|
-
diskGb: 1,
|
|
152
|
-
timeoutMinutes: 1,
|
|
153
|
-
},
|
|
154
|
-
network: {
|
|
155
|
-
egress: "none",
|
|
156
|
-
},
|
|
157
|
-
},
|
|
158
|
-
metadata: {
|
|
159
|
-
attemptIndex: args.attemptIndex,
|
|
160
|
-
sampleIndex: 0,
|
|
161
|
-
caseId: "current",
|
|
162
|
-
baseline: true,
|
|
163
|
-
},
|
|
164
|
-
};
|
|
165
|
-
}
|
|
166
|
-
export function createBaselineCandidateJob(args) {
|
|
167
|
-
const execution = createBaselineCandidateExecution({
|
|
168
|
-
ownerUserId: args.ownerUserId,
|
|
169
|
-
projectId: args.projectId,
|
|
170
|
-
runId: args.runId,
|
|
171
|
-
candidateId: args.candidateId,
|
|
172
|
-
attemptIndex: args.attemptIndex,
|
|
173
|
-
});
|
|
174
|
-
const files = args.files.map((file) => ({ ...file }));
|
|
175
|
-
return {
|
|
176
|
-
id: workbenchExecutionJobId(execution.id),
|
|
177
|
-
projectId: args.projectId,
|
|
178
|
-
runId: args.runId,
|
|
179
|
-
candidateId: args.candidateId,
|
|
180
|
-
kind: "execute",
|
|
181
|
-
status: "succeeded",
|
|
182
|
-
attempt: 1,
|
|
183
|
-
createdAt: args.now,
|
|
184
|
-
startedAt: args.now,
|
|
185
|
-
finishedAt: args.now,
|
|
186
|
-
updatedAt: args.now,
|
|
187
|
-
input: {
|
|
188
|
-
execution,
|
|
189
|
-
dependsOn: [],
|
|
190
|
-
candidateId: args.candidateId,
|
|
191
|
-
attemptIndex: args.attemptIndex,
|
|
192
|
-
baseline: true,
|
|
193
|
-
},
|
|
194
|
-
output: {
|
|
195
|
-
ok: true,
|
|
196
|
-
executionId: execution.id,
|
|
197
|
-
purpose: "improve",
|
|
198
|
-
candidateId: args.candidateId,
|
|
199
|
-
attemptIndex: args.attemptIndex,
|
|
200
|
-
baseId: args.baseId,
|
|
201
|
-
candidatePatch: {
|
|
202
|
-
files,
|
|
203
|
-
fileChanges: [],
|
|
204
|
-
},
|
|
205
|
-
fileChanges: [],
|
|
206
|
-
files,
|
|
207
|
-
...(args.fileSet ? { fileSet: args.fileSet } : {}),
|
|
208
|
-
traces: [],
|
|
209
|
-
},
|
|
210
|
-
};
|
|
211
|
-
}
|
|
212
128
|
export function workbenchExecutionJobId(executionId) {
|
|
213
129
|
return `job_${executionId.replace(/[^a-z0-9_]/giu, "_")}`;
|
|
214
130
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import type { Json,
|
|
1
|
+
import type { Json, WorkbenchSkillPatch, WorkbenchExecutionSpec, WorkbenchResult } from "@workbench-ai/workbench-contract";
|
|
2
2
|
export interface WorkbenchExecutionOutputPayloads {
|
|
3
|
-
|
|
3
|
+
skillPatch?: WorkbenchSkillPatch;
|
|
4
4
|
result?: WorkbenchResult;
|
|
5
5
|
}
|
|
6
6
|
export declare function validateWorkbenchExecutionOutputPayloads(execution: WorkbenchExecutionSpec, payloads: Record<string, Json>): WorkbenchExecutionOutputPayloads;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"execution-outputs.d.ts","sourceRoot":"","sources":["../src/execution-outputs.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAEV,IAAI,EAEJ,
|
|
1
|
+
{"version":3,"file":"execution-outputs.d.ts","sourceRoot":"","sources":["../src/execution-outputs.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAEV,IAAI,EAEJ,mBAAmB,EAEnB,sBAAsB,EACtB,eAAe,EAChB,MAAM,kCAAkC,CAAC;AAK1C,MAAM,WAAW,gCAAgC;IAC/C,UAAU,CAAC,EAAE,mBAAmB,CAAC;IACjC,MAAM,CAAC,EAAE,eAAe,CAAC;CAC1B;AAED,wBAAgB,wCAAwC,CACtD,SAAS,EAAE,sBAAsB,EACjC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,GAC7B,gCAAgC,CAoClC;AAED,wBAAgB,wCAAwC,CAAC,SAAS,EAAE,sBAAsB,GAAG,MAAM,EAAE,CAuFpG;AAuCD,wBAAgB,iCAAiC,CAAC,SAAS,EAAE,sBAAsB,GAAG,IAAI,CAKzF"}
|
|
@@ -20,8 +20,8 @@ export function validateWorkbenchExecutionOutputPayloads(execution, payloads) {
|
|
|
20
20
|
continue;
|
|
21
21
|
}
|
|
22
22
|
switch (contract.schema) {
|
|
23
|
-
case "workbench.
|
|
24
|
-
validated.
|
|
23
|
+
case "workbench.skill_patch.v1":
|
|
24
|
+
validated.skillPatch = normalizeSkillPatch(payload, execution, contract, issues);
|
|
25
25
|
break;
|
|
26
26
|
case "workbench.result.v1":
|
|
27
27
|
validated.result = normalizeResult(payload, execution, contract, issues);
|
|
@@ -120,25 +120,25 @@ export function collectWorkbenchExecutionIsolationIssues(execution) {
|
|
|
120
120
|
}
|
|
121
121
|
function expectedInputsForPurpose(purpose) {
|
|
122
122
|
if (purpose === "improve") {
|
|
123
|
-
return new Set(["
|
|
123
|
+
return new Set(["skill", "traces"]);
|
|
124
124
|
}
|
|
125
125
|
if (purpose === "attempt") {
|
|
126
|
-
return new Set(["
|
|
126
|
+
return new Set(["skills", "case"]);
|
|
127
127
|
}
|
|
128
128
|
return new Set();
|
|
129
129
|
}
|
|
130
130
|
function expectedInputMountPath(purpose, name) {
|
|
131
|
-
if (purpose === "improve" && name === "
|
|
131
|
+
if (purpose === "improve" && name === "skill") {
|
|
132
132
|
return "/workspace";
|
|
133
133
|
}
|
|
134
134
|
return `/workspace/input/${name}`;
|
|
135
135
|
}
|
|
136
136
|
function expectedInputWritable(purpose, name) {
|
|
137
|
-
return purpose === "improve" && name === "
|
|
137
|
+
return purpose === "improve" && name === "skill";
|
|
138
138
|
}
|
|
139
139
|
function expectedOutputForPurpose(purpose) {
|
|
140
140
|
if (purpose === "improve") {
|
|
141
|
-
return "
|
|
141
|
+
return "skill_patch";
|
|
142
142
|
}
|
|
143
143
|
if (purpose === "attempt") {
|
|
144
144
|
return "result";
|
|
@@ -153,20 +153,20 @@ export function assertWorkbenchExecutionIsolation(execution) {
|
|
|
153
153
|
}
|
|
154
154
|
function outputAllowedForPurpose(purpose, output) {
|
|
155
155
|
if (purpose === "improve") {
|
|
156
|
-
return output.schema === "workbench.
|
|
156
|
+
return output.schema === "workbench.skill_patch.v1";
|
|
157
157
|
}
|
|
158
158
|
if (purpose === "attempt") {
|
|
159
159
|
return output.schema === "workbench.result.v1";
|
|
160
160
|
}
|
|
161
161
|
return false;
|
|
162
162
|
}
|
|
163
|
-
function
|
|
163
|
+
function normalizeSkillPatch(value, execution, contract, issues) {
|
|
164
164
|
const record = readRecord(value, contract.name, issues);
|
|
165
165
|
const files = normalizeSnapshotFiles(record?.files, `${contract.name}.files`, issues);
|
|
166
166
|
const fileChanges = normalizeStringArray(record?.fileChanges, `${contract.name}.fileChanges`, issues);
|
|
167
167
|
const edits = normalizeMetadataStringArray(execution.metadata.edits);
|
|
168
168
|
if (edits.length === 0) {
|
|
169
|
-
issues.push(`Execution ${execution.id}
|
|
169
|
+
issues.push(`Execution ${execution.id} skill patch validation requires metadata.edits.`);
|
|
170
170
|
}
|
|
171
171
|
for (const file of files) {
|
|
172
172
|
if (!isAllowedEditPath(file.path, edits)) {
|
|
@@ -29,7 +29,7 @@ export interface WorkbenchExecutionRuntimeInput {
|
|
|
29
29
|
runtimeControlOperation?: WorkbenchRuntimeControlOperationSequenceRequest;
|
|
30
30
|
}
|
|
31
31
|
export interface WorkbenchWorkloadStepCommand {
|
|
32
|
-
kind: "improver" | "
|
|
32
|
+
kind: "improver" | "skill" | "engine";
|
|
33
33
|
label: string;
|
|
34
34
|
operation: WorkbenchAdapterOperation;
|
|
35
35
|
executor: WorkbenchAdapterOperationExecutor;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"execution-runtime-types.d.ts","sourceRoot":"","sources":["../src/execution-runtime-types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,iCAAiC,EACjC,kBAAkB,EAClB,IAAI,EACJ,mBAAmB,EACnB,0BAA0B,EAC3B,MAAM,kCAAkC,CAAC;AAE1C,OAAO,KAAK,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,KAAK,EACV,gCAAgC,EACjC,MAAM,uBAAuB,CAAC;AAC/B,OAAO,KAAK,EACV,0BAA0B,EAC3B,MAAM,mBAAmB,CAAC;AAC3B,OAAO,KAAK,EACV,yBAAyB,EACzB,iCAAiC,EACjC,wBAAwB,EACxB,+CAA+C,EAChD,MAAM,kCAAkC,CAAC;AAE1C,MAAM,WAAW,8BAA8B;IAC7C,GAAG,EAAE,kBAAkB,CAAC;IACxB,IAAI,EAAE,cAAc,CAAC;IACrB,kBAAkB,CAAC,EAAE,IAAI,CAAC,iCAAiC,EAAE,IAAI,GAAG,UAAU,GAAG,YAAY,GAAG,MAAM,CAAC,CAAC;IACxG,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,SAAS,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC1C,kBAAkB,EAAE,SAAS,mBAAmB,EAAE,CAAC;IACnD,WAAW,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC5C,YAAY,CAAC,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC9C,UAAU,CAAC,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC5C,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,mBAAmB,CAAC,EAAE,SAAS,0BAA0B,EAAE,CAAC;IAC5D,qBAAqB,CAAC,EAAE,CAAC,QAAQ,EAAE,SAAS,0BAA0B,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3F,gBAAgB,CAAC,EAAE,SAAS,wBAAwB,EAAE,CAAC;IACvD,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,IAAI,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxC,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC3C,QAAQ,CAAC,EAAE,gCAAgC,CAAC;IAC5C,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,+CAA+C,CAAC;CAC3E;AAED,MAAM,WAAW,4BAA4B;IAC3C,IAAI,EAAE,UAAU,GAAG,
|
|
1
|
+
{"version":3,"file":"execution-runtime-types.d.ts","sourceRoot":"","sources":["../src/execution-runtime-types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,iCAAiC,EACjC,kBAAkB,EAClB,IAAI,EACJ,mBAAmB,EACnB,0BAA0B,EAC3B,MAAM,kCAAkC,CAAC;AAE1C,OAAO,KAAK,EACV,cAAc,EACd,mBAAmB,EACpB,MAAM,mBAAmB,CAAC;AAC3B,OAAO,KAAK,EACV,gCAAgC,EACjC,MAAM,uBAAuB,CAAC;AAC/B,OAAO,KAAK,EACV,0BAA0B,EAC3B,MAAM,mBAAmB,CAAC;AAC3B,OAAO,KAAK,EACV,yBAAyB,EACzB,iCAAiC,EACjC,wBAAwB,EACxB,+CAA+C,EAChD,MAAM,kCAAkC,CAAC;AAE1C,MAAM,WAAW,8BAA8B;IAC7C,GAAG,EAAE,kBAAkB,CAAC;IACxB,IAAI,EAAE,cAAc,CAAC;IACrB,kBAAkB,CAAC,EAAE,IAAI,CAAC,iCAAiC,EAAE,IAAI,GAAG,UAAU,GAAG,YAAY,GAAG,MAAM,CAAC,CAAC;IACxG,qBAAqB,CAAC,EAAE,MAAM,CAAC;IAC/B,SAAS,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC1C,kBAAkB,EAAE,SAAS,mBAAmB,EAAE,CAAC;IACnD,WAAW,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC5C,YAAY,CAAC,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC9C,UAAU,CAAC,EAAE,SAAS,mBAAmB,EAAE,CAAC;IAC5C,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,mBAAmB,CAAC,EAAE,SAAS,0BAA0B,EAAE,CAAC;IAC5D,qBAAqB,CAAC,EAAE,CAAC,QAAQ,EAAE,SAAS,0BAA0B,EAAE,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IAC3F,gBAAgB,CAAC,EAAE,SAAS,wBAAwB,EAAE,CAAC;IACvD,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,kBAAkB,CAAC,EAAE,IAAI,CAAC;IAC1B,cAAc,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACxC,iBAAiB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC3C,QAAQ,CAAC,EAAE,gCAAgC,CAAC;IAC5C,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,uBAAuB,CAAC,EAAE,+CAA+C,CAAC;CAC3E;AAED,MAAM,WAAW,4BAA4B;IAC3C,IAAI,EAAE,UAAU,GAAG,OAAO,GAAG,QAAQ,CAAC;IACtC,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,yBAAyB,CAAC;IACrC,QAAQ,EAAE,iCAAiC,CAAC;IAC5C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,WAAW,CAAC,EAAE,MAAM,EAAE,CAAC;CACxB"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"execution-scheduler.d.ts","sourceRoot":"","sources":["../src/execution-scheduler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,EAEnB,MAAM,kCAAkC,CAAC;AAE1C,OAAO,EAEL,KAAK,sBAAsB,EAC3B,KAAK,gCAAgC,EACrC,KAAK,2BAA2B,EACjC,MAAM,6BAA6B,CAAC;AAErC,MAAM,WAAW,6BAA6B;IAC5C,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,2BAA2B;IAC1C,IAAI,EAAE,kBAAkB,EAAE,CAAC;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,6BAA6B;IAC5C,IAAI,EAAE,SAAS,kBAAkB,EAAE,CAAC;IACpC,QAAQ,EAAE,6BAA6B,CAAC;IACxC,cAAc,EAAE,2BAA2B,CAAC;IAC5C,UAAU,EAAE,CAAC,GAAG,EAAE,kBAAkB,KAAK,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACrE,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,CAAC,GAAG,EAAE,kBAAkB,KAAK,IAAI,CAAC;IAChD,YAAY,CAAC,EAAE,CAAC,GAAG,EAAE,kBAAkB,KAAK,IAAI,CAAC;IACjD,aAAa,CAAC,EAAE,CAAC,GAAG,EAAE,kBAAkB,KAAK,IAAI,CAAC;CACnD;AASD,wBAAsB,wBAAwB,CAC5C,IAAI,EAAE,6BAA6B,GAClC,OAAO,CAAC,2BAA2B,CAAC,CAuLtC;AAED,wBAAgB,wBAAwB,CAAC,GAAG,EAAE,kBAAkB,GAAG,MAAM,EAAE,CAM1E;AAED,wBAAgB,qBAAqB,CACnC,GAAG,EAAE,kBAAkB,GACtB,gCAAgC,
|
|
1
|
+
{"version":3,"file":"execution-scheduler.d.ts","sourceRoot":"","sources":["../src/execution-scheduler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,kBAAkB,EAEnB,MAAM,kCAAkC,CAAC;AAE1C,OAAO,EAEL,KAAK,sBAAsB,EAC3B,KAAK,gCAAgC,EACrC,KAAK,2BAA2B,EACjC,MAAM,6BAA6B,CAAC;AAErC,MAAM,WAAW,6BAA6B;IAC5C,GAAG,EAAE,MAAM,CAAC;IACZ,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,2BAA2B;IAC1C,IAAI,EAAE,kBAAkB,EAAE,CAAC;IAC3B,cAAc,EAAE,MAAM,CAAC;IACvB,eAAe,EAAE,MAAM,CAAC;IACxB,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,MAAM,WAAW,6BAA6B;IAC5C,IAAI,EAAE,SAAS,kBAAkB,EAAE,CAAC;IACpC,QAAQ,EAAE,6BAA6B,CAAC;IACxC,cAAc,EAAE,2BAA2B,CAAC;IAC5C,UAAU,EAAE,CAAC,GAAG,EAAE,kBAAkB,KAAK,OAAO,CAAC,kBAAkB,CAAC,CAAC;IACrE,GAAG,CAAC,EAAE,MAAM,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,CAAC,GAAG,EAAE,kBAAkB,KAAK,IAAI,CAAC;IAChD,YAAY,CAAC,EAAE,CAAC,GAAG,EAAE,kBAAkB,KAAK,IAAI,CAAC;IACjD,aAAa,CAAC,EAAE,CAAC,GAAG,EAAE,kBAAkB,KAAK,IAAI,CAAC;CACnD;AASD,wBAAsB,wBAAwB,CAC5C,IAAI,EAAE,6BAA6B,GAClC,OAAO,CAAC,2BAA2B,CAAC,CAuLtC;AAED,wBAAgB,wBAAwB,CAAC,GAAG,EAAE,kBAAkB,GAAG,MAAM,EAAE,CAM1E;AAED,wBAAgB,qBAAqB,CACnC,GAAG,EAAE,kBAAkB,GACtB,gCAAgC,CAYlC;AAED,wBAAgB,oBAAoB,CAClC,GAAG,EAAE,kBAAkB,EACvB,OAAO,EAAE,2BAA2B,GACnC,sBAAsB,CAExB;AAED,wBAAgB,WAAW,CACzB,IAAI,EAAE,6BAA6B,EACnC,KAAK,EAAE,6BAA6B,GACnC,6BAA6B,CAM/B;AAED,wBAAgB,gBAAgB,CAC9B,IAAI,EAAE,6BAA6B,EACnC,KAAK,EAAE,6BAA6B,GACnC,6BAA6B,CAM/B;AAED,wBAAgB,YAAY,CAC1B,SAAS,EAAE,6BAA6B,EACxC,IAAI,EAAE,6BAA6B,GAClC,OAAO,CAIT"}
|
|
@@ -175,7 +175,10 @@ export function workbenchJobDependencies(job) {
|
|
|
175
175
|
: [];
|
|
176
176
|
}
|
|
177
177
|
export function workbenchJobResources(job) {
|
|
178
|
-
const
|
|
178
|
+
const input = jsonRecord(job.input);
|
|
179
|
+
const resources = input.kind === "workbench.skill.eval.job.v1"
|
|
180
|
+
? input.resources
|
|
181
|
+
: jsonRecord(jsonRecord(input.execution).policy).resources;
|
|
179
182
|
const record = jsonRecord(resources);
|
|
180
183
|
return {
|
|
181
184
|
cpu: readPositiveResource(record.cpu, job.id, "resources.cpu"),
|
package/dist/execution-traces.js
CHANGED
|
@@ -140,7 +140,7 @@ export function readWorkbenchExecutionTraceFiles(files) {
|
|
|
140
140
|
export function traceSessionLabel(filePath, role) {
|
|
141
141
|
const innerPath = traceSessionInnerPath(filePath);
|
|
142
142
|
if (innerPath === "runner/session") {
|
|
143
|
-
return "
|
|
143
|
+
return "Skill run";
|
|
144
144
|
}
|
|
145
145
|
if (innerPath === "improver/session") {
|
|
146
146
|
return "Improver";
|
package/dist/generic-spec.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { type EngineResolveBinding, type SurfaceSnapshotFile, type WorkbenchAdapterInvocation, type WorkbenchExecutionNetworkPolicy, type WorkbenchExecutionResources, type WorkbenchSpecValidation } from "@workbench-ai/workbench-contract";
|
|
2
2
|
import type { WorkbenchEngineCase, WorkbenchEngineCaseSpec } from "@workbench-ai/workbench-protocol";
|
|
3
|
-
export declare const
|
|
4
|
-
export declare const
|
|
3
|
+
export declare const EVAL_SPEC_FILE = "eval.yaml";
|
|
4
|
+
export declare const SKILL_SPEC_FILE = "skill.yaml";
|
|
5
5
|
export interface WorkbenchRuntimeSpec {
|
|
6
6
|
dockerfile: string;
|
|
7
7
|
workdir?: string;
|
|
@@ -16,17 +16,17 @@ export interface WorkbenchRuntimeSpec {
|
|
|
16
16
|
export interface WorkbenchPathRef {
|
|
17
17
|
path: string;
|
|
18
18
|
}
|
|
19
|
-
export interface
|
|
19
|
+
export interface WorkbenchSkillPrepareSpec {
|
|
20
20
|
command: string;
|
|
21
21
|
}
|
|
22
|
-
export interface
|
|
22
|
+
export interface AuthoredEvalSpec {
|
|
23
23
|
version: 4;
|
|
24
24
|
name: string;
|
|
25
25
|
description: string;
|
|
26
26
|
adapters: string[];
|
|
27
27
|
engine: WorkbenchAdapterInvocation;
|
|
28
28
|
}
|
|
29
|
-
export interface
|
|
29
|
+
export interface WorkbenchSkillAgentSpec extends WorkbenchAdapterInvocation {
|
|
30
30
|
name: string;
|
|
31
31
|
}
|
|
32
32
|
export interface WorkbenchCaseSelector {
|
|
@@ -37,48 +37,48 @@ export interface WorkbenchSelectionSpec {
|
|
|
37
37
|
metric: string;
|
|
38
38
|
cases?: WorkbenchCaseSelector;
|
|
39
39
|
}
|
|
40
|
-
export interface
|
|
40
|
+
export interface WorkbenchSkillImproveSpec extends WorkbenchAdapterInvocation {
|
|
41
41
|
edits: string[];
|
|
42
42
|
optimizeOn?: WorkbenchCaseSelector;
|
|
43
43
|
selectBy?: WorkbenchSelectionSpec;
|
|
44
44
|
}
|
|
45
|
-
export interface
|
|
45
|
+
export interface WorkbenchSkillManifestSpec {
|
|
46
46
|
version: 4;
|
|
47
47
|
name: string;
|
|
48
48
|
description?: string;
|
|
49
49
|
files: WorkbenchPathRef;
|
|
50
|
-
prepare?:
|
|
50
|
+
prepare?: WorkbenchSkillPrepareSpec;
|
|
51
51
|
adapters: string[];
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
improve?:
|
|
52
|
+
defaultAgent?: string;
|
|
53
|
+
agents: Record<string, WorkbenchSkillAgentSpec>;
|
|
54
|
+
improve?: WorkbenchSkillImproveSpec;
|
|
55
55
|
}
|
|
56
|
-
export interface
|
|
57
|
-
|
|
56
|
+
export interface ResolvedSkillSpec extends WorkbenchSkillManifestSpec {
|
|
57
|
+
selectedAgentId: string;
|
|
58
58
|
}
|
|
59
59
|
export interface WorkbenchResolvedSource {
|
|
60
60
|
version: 4;
|
|
61
|
-
|
|
62
|
-
|
|
61
|
+
eval: AuthoredEvalSpec;
|
|
62
|
+
skill: ResolvedSkillSpec;
|
|
63
63
|
}
|
|
64
64
|
export interface GenericRunSpec {
|
|
65
65
|
version: 4;
|
|
66
66
|
name: string;
|
|
67
67
|
description: string;
|
|
68
|
-
|
|
68
|
+
eval: {
|
|
69
69
|
name: string;
|
|
70
70
|
description: string;
|
|
71
71
|
engine: WorkbenchAdapterInvocation;
|
|
72
72
|
};
|
|
73
|
-
|
|
73
|
+
skill: {
|
|
74
74
|
name: string;
|
|
75
75
|
description?: string;
|
|
76
76
|
files: WorkbenchPathRef;
|
|
77
|
-
prepare?:
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
77
|
+
prepare?: WorkbenchSkillPrepareSpec;
|
|
78
|
+
defaultAgent: string;
|
|
79
|
+
selectedAgentId: string;
|
|
80
|
+
selectedAgentName: string;
|
|
81
|
+
agents: Record<string, WorkbenchSkillAgentSpec>;
|
|
82
82
|
improve?: {
|
|
83
83
|
edits: string[];
|
|
84
84
|
optimizeOn?: WorkbenchCaseSelector;
|
|
@@ -106,17 +106,17 @@ export declare function resolveWorkbenchResolvedSourceYaml(source: string): Gene
|
|
|
106
106
|
export declare function engineResolveBindingForSourceYaml(source: string): EngineResolveBinding;
|
|
107
107
|
export declare function engineResolveBindingForSpec(spec: GenericRunSpec): EngineResolveBinding;
|
|
108
108
|
export declare function resolveWorkbenchSourceFiles(args: {
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
109
|
+
evalSource: string;
|
|
110
|
+
skillSource: string;
|
|
111
|
+
selectedAgentId?: string | null;
|
|
112
112
|
}): GenericRunSpec;
|
|
113
113
|
export declare function parseWorkbenchSourceFiles(args: {
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
114
|
+
evalSource: string;
|
|
115
|
+
skillSource?: string;
|
|
116
|
+
selectedAgentId?: string | null;
|
|
117
117
|
}): WorkbenchResolvedSource;
|
|
118
118
|
export declare function serializeWorkbenchResolvedSourceYaml(source: WorkbenchResolvedSource): string;
|
|
119
|
-
export declare function
|
|
119
|
+
export declare function isWorkbenchSkillManifestPath(filePath: string): boolean;
|
|
120
120
|
export declare function resolveEngineCaseExecutionConfig(args: {
|
|
121
121
|
spec: GenericRunSpec;
|
|
122
122
|
engineCase: GenericEngineCaseSpec;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,oBAAoB,EAEzB,KAAK,mBAAmB,EACxB,KAAK,0BAA0B,EAC/B,KAAK,+BAA+B,EACpC,KAAK,2BAA2B,EAChC,KAAK,uBAAuB,EAC7B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,
|
|
1
|
+
{"version":3,"file":"generic-spec.d.ts","sourceRoot":"","sources":["../src/generic-spec.ts"],"names":[],"mappings":"AACA,OAAO,EAEL,KAAK,oBAAoB,EAEzB,KAAK,mBAAmB,EACxB,KAAK,0BAA0B,EAC/B,KAAK,+BAA+B,EACpC,KAAK,2BAA2B,EAChC,KAAK,uBAAuB,EAC7B,MAAM,kCAAkC,CAAC;AAC1C,OAAO,KAAK,EACV,mBAAmB,EACnB,uBAAuB,EACxB,MAAM,kCAAkC,CAAC;AAG1C,eAAO,MAAM,cAAc,cAAc,CAAC;AAC1C,eAAO,MAAM,eAAe,eAAe,CAAC;AAE5C,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE;QACV,GAAG,CAAC,EAAE,MAAM,CAAC;QACb,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,cAAc,CAAC,EAAE,MAAM,CAAC;KACzB,CAAC;IACF,OAAO,CAAC,EAAE,+BAA+B,CAAC;CAC3C;AAED,MAAM,WAAW,gBAAgB;IAC/B,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,yBAAyB;IACxC,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,gBAAgB;IAC/B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;CACpC;AAED,MAAM,WAAW,uBAAwB,SAAQ,0BAA0B;IACzE,IAAI,EAAE,MAAM,CAAC;CACd;AAED,MAAM,WAAW,qBAAqB;IACpC,GAAG,CAAC,EAAE,IAAI,CAAC;IACX,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,sBAAsB;IACrC,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,qBAAqB,CAAC;CAC/B;AAED,MAAM,WAAW,yBAA0B,SAAQ,0BAA0B;IAC3E,KAAK,EAAE,MAAM,EAAE,CAAC;IAChB,UAAU,CAAC,EAAE,qBAAqB,CAAC;IACnC,QAAQ,CAAC,EAAE,sBAAsB,CAAC;CACnC;AAED,MAAM,WAAW,0BAA0B;IACzC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,EAAE,gBAAgB,CAAC;IACxB,OAAO,CAAC,EAAE,yBAAyB,CAAC;IACpC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,uBAAuB,CAAC,CAAC;IAChD,OAAO,CAAC,EAAE,yBAAyB,CAAC;CACrC;AAED,MAAM,WAAW,iBAAkB,SAAQ,0BAA0B;IACnE,eAAe,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,gBAAgB,CAAC;IACvB,KAAK,EAAE,iBAAiB,CAAC;CAC1B;AAED,MAAM,WAAW,cAAc;IAC7B,OAAO,EAAE,CAAC,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,IAAI,EAAE;QACJ,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,EAAE,MAAM,CAAC;QACpB,MAAM,EAAE,0BAA0B,CAAC;KACpC,CAAC;IACF,KAAK,EAAE;QACL,IAAI,EAAE,MAAM,CAAC;QACb,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,KAAK,EAAE,gBAAgB,CAAC;QACxB,OAAO,CAAC,EAAE,yBAAyB,CAAC;QACpC,YAAY,EAAE,MAAM,CAAC;QACrB,eAAe,EAAE,MAAM,CAAC;QACxB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,uBAAuB,CAAC,CAAC;QAChD,OAAO,CAAC,EAAE;YACR,KAAK,EAAE,MAAM,EAAE,CAAC;YAChB,UAAU,CAAC,EAAE,qBAAqB,CAAC;YACnC,QAAQ,CAAC,EAAE,sBAAsB,CAAC;SACnC,CAAC;KACH,CAAC;IACF,WAAW,EAAE,oBAAoB,CAAC;IAClC,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,EAAE,0BAA0B,CAAC;IACnC,aAAa,EAAE,0BAA0B,CAAC;IAC1C,OAAO,CAAC,EAAE,0BAA0B,CAAC;IACrC,GAAG,EAAE,0BAA0B,CAAC;IAChC,SAAS,EAAE,0BAA0B,CAAC;CACvC;AAED,MAAM,MAAM,qBAAqB,GAAG,uBAAuB,CAAC;AAC5D,YAAY,EAAE,mBAAmB,EAAE,MAAM,kCAAkC,CAAC;AAE5E,MAAM,WAAW,iCAAiC;IAChD,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,oBAAoB,CAAC;IAClC,GAAG,EAAE,0BAA0B,CAAC;CACjC;AAQD,eAAO,MAAM,2BAA2B,EAAE,2BAKzC,CAAC;AAEF,wBAAgB,mCAAmC,CACjD,MAAM,EAAE,MAAM,GACb,uBAAuB,CAmBzB;AAED,wBAAgB,kCAAkC,CAChD,MAAM,EAAE,MAAM,GACb,cAAc,CA+BhB;AAED,wBAAgB,iCAAiC,CAC/C,MAAM,EAAE,MAAM,GACb,oBAAoB,CAEtB;AAED,wBAAgB,2BAA2B,CACzC,IAAI,EAAE,cAAc,GACnB,oBAAoB,CAStB;AAED,wBAAgB,2BAA2B,CAAC,IAAI,EAAE;IAChD,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,cAAc,CAMjB;AAED,wBAAgB,yBAAyB,CAAC,IAAI,EAAE;IAC9C,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,eAAe,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;CACjC,GAAG,uBAAuB,CAuB1B;AAED,wBAAgB,oCAAoC,CAClD,MAAM,EAAE,uBAAuB,GAC9B,MAAM,CAER;AAED,wBAAgB,4BAA4B,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAItE;AAED,wBAAgB,gCAAgC,CAAC,IAAI,EAAE;IACrD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,qBAAqB,CAAC;CACnC,GAAG,iCAAiC,CAMpC;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE,cAAc,GAAG,0BAA0B,CAE/F;AAED,wBAAgB,8BAA8B,CAAC,IAAI,EAAE;IACnD,IAAI,EAAE,cAAc,CAAC;IACrB,UAAU,EAAE,mBAAmB,CAAC;CACjC,GAAG,mBAAmB,EAAE,CAGxB;AAED,wBAAgB,qBAAqB,CACnC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,sBAAsB,CACpC,UAAU,EAAE,mBAAmB,GAC9B,mBAAmB,EAAE,CAIvB;AAED,wBAAgB,gBAAgB,CAC9B,OAAO,EAAE,oBAAoB,GAC5B,2BAA2B,CAiB7B;AAED,wBAAgB,cAAc,CAC5B,OAAO,EAAE,oBAAoB,GAC5B,+BAA+B,CAEjC;AAED,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAEvE"}
|