@workbench-ai/workbench 0.0.67 → 0.0.69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/dist/dev-open/client.css +387 -287
  2. package/dist/dev-open/client.js +202 -202
  3. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-400-normal.woff +0 -0
  4. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-400-normal.woff2 +0 -0
  5. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-500-normal.woff +0 -0
  6. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-500-normal.woff2 +0 -0
  7. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-600-normal.woff +0 -0
  8. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-600-normal.woff2 +0 -0
  9. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-400-normal.woff +0 -0
  10. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-400-normal.woff2 +0 -0
  11. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-500-normal.woff +0 -0
  12. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-500-normal.woff2 +0 -0
  13. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-600-normal.woff +0 -0
  14. package/dist/dev-open/fonts/ibm-plex-mono-cyrillic-ext-600-normal.woff2 +0 -0
  15. package/dist/dev-open/fonts/ibm-plex-mono-latin-400-normal.woff +0 -0
  16. package/dist/dev-open/fonts/ibm-plex-mono-latin-400-normal.woff2 +0 -0
  17. package/dist/dev-open/fonts/ibm-plex-mono-latin-500-normal.woff +0 -0
  18. package/dist/dev-open/fonts/ibm-plex-mono-latin-500-normal.woff2 +0 -0
  19. package/dist/dev-open/fonts/ibm-plex-mono-latin-600-normal.woff +0 -0
  20. package/dist/dev-open/fonts/ibm-plex-mono-latin-600-normal.woff2 +0 -0
  21. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-400-normal.woff +0 -0
  22. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-400-normal.woff2 +0 -0
  23. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-500-normal.woff +0 -0
  24. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-500-normal.woff2 +0 -0
  25. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-600-normal.woff +0 -0
  26. package/dist/dev-open/fonts/ibm-plex-mono-latin-ext-600-normal.woff2 +0 -0
  27. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-400-normal.woff +0 -0
  28. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-400-normal.woff2 +0 -0
  29. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-500-normal.woff +0 -0
  30. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-500-normal.woff2 +0 -0
  31. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-600-normal.woff +0 -0
  32. package/dist/dev-open/fonts/ibm-plex-mono-vietnamese-600-normal.woff2 +0 -0
  33. package/dist/dev-open/fonts/libre-caslon-display-latin-400-normal.woff +0 -0
  34. package/dist/dev-open/fonts/libre-caslon-display-latin-400-normal.woff2 +0 -0
  35. package/dist/dev-open/fonts/libre-caslon-display-latin-ext-400-normal.woff +0 -0
  36. package/dist/dev-open/fonts/libre-caslon-display-latin-ext-400-normal.woff2 +0 -0
  37. package/dist/index.d.ts +2 -6
  38. package/dist/index.d.ts.map +1 -1
  39. package/dist/index.js +2035 -5100
  40. package/dist/install-targets.d.ts +35 -0
  41. package/dist/install-targets.d.ts.map +1 -0
  42. package/dist/install-targets.js +188 -0
  43. package/dist/open-server.d.ts +12 -0
  44. package/dist/open-server.d.ts.map +1 -0
  45. package/dist/open-server.js +248 -0
  46. package/dist/output.d.ts +22 -0
  47. package/dist/output.d.ts.map +1 -0
  48. package/dist/output.js +38 -0
  49. package/package.json +5 -5
  50. package/dist/adapter-command-env.d.ts +0 -8
  51. package/dist/adapter-command-env.d.ts.map +0 -1
  52. package/dist/adapter-command-env.js +0 -80
  53. package/dist/adapter-project.d.ts +0 -29
  54. package/dist/adapter-project.d.ts.map +0 -1
  55. package/dist/adapter-project.js +0 -332
  56. package/dist/benchmark-fingerprint.d.ts +0 -6
  57. package/dist/benchmark-fingerprint.d.ts.map +0 -1
  58. package/dist/benchmark-fingerprint.js +0 -42
  59. package/dist/command-model.d.ts +0 -5
  60. package/dist/command-model.d.ts.map +0 -1
  61. package/dist/command-model.js +0 -537
  62. package/dist/dev-open-server.d.ts +0 -18
  63. package/dist/dev-open-server.d.ts.map +0 -1
  64. package/dist/dev-open-server.js +0 -297
  65. package/dist/init-scaffold.d.ts +0 -22
  66. package/dist/init-scaffold.d.ts.map +0 -1
  67. package/dist/init-scaffold.js +0 -30
  68. package/dist/init-template-pack.d.ts +0 -19
  69. package/dist/init-template-pack.d.ts.map +0 -1
  70. package/dist/init-template-pack.js +0 -262
  71. package/dist/local-archive.d.ts +0 -48
  72. package/dist/local-archive.d.ts.map +0 -1
  73. package/dist/local-archive.js +0 -838
  74. package/dist/local-inspection.d.ts +0 -9
  75. package/dist/local-inspection.d.ts.map +0 -1
  76. package/dist/local-inspection.js +0 -354
  77. package/dist/project-source.d.ts +0 -63
  78. package/dist/project-source.d.ts.map +0 -1
  79. package/dist/project-source.js +0 -682
  80. package/dist/workspace-snapshot.d.ts +0 -10
  81. package/dist/workspace-snapshot.d.ts.map +0 -1
  82. package/dist/workspace-snapshot.js +0 -81
@@ -1,297 +0,0 @@
1
- import { promises as fs } from "node:fs";
2
- import http from "node:http";
3
- import path from "node:path";
4
- import { fileURLToPath } from "node:url";
5
- import { createLocalProjectSourceReader, createLocalWorkbenchInspection, } from "./local-inspection.js";
6
- class LocalApiError extends Error {
7
- status;
8
- constructor(message, status = 400) {
9
- super(message);
10
- this.status = status;
11
- }
12
- }
13
- const DEV_OPEN_ASSET_DIR = "dev-open";
14
- export async function startLocalWorkbenchDevServer(options) {
15
- const workspace = path.resolve(options.workspace);
16
- const assetsRoot = options.assetsRoot ?? defaultDevOpenAssetsRoot();
17
- await assertDevOpenAssets(assetsRoot);
18
- const readProjectSource = createLocalProjectSourceReader(workspace);
19
- const context = {
20
- workspace,
21
- assetsRoot,
22
- inspection: createLocalWorkbenchInspection({
23
- workspace,
24
- readProjectSource,
25
- }),
26
- };
27
- const server = http.createServer((request, response) => {
28
- void handleLocalWorkbenchRequest({
29
- request,
30
- response,
31
- context,
32
- }).catch((error) => {
33
- sendError(response, error, request.method);
34
- });
35
- });
36
- server.requestTimeout = 0;
37
- server.timeout = 0;
38
- await new Promise((resolve, reject) => {
39
- server.once("error", reject);
40
- server.listen(options.port, options.host, () => {
41
- server.off("error", reject);
42
- resolve();
43
- });
44
- });
45
- const address = server.address();
46
- if (!address || typeof address === "string") {
47
- await closeServer(server);
48
- throw new Error("Workbench local server did not bind a TCP port.");
49
- }
50
- const host = displayHost(options.host);
51
- return {
52
- url: `http://${host}:${address.port}/`,
53
- close: () => closeServer(server),
54
- };
55
- }
56
- function defaultDevOpenAssetsRoot() {
57
- return path.join(path.dirname(fileURLToPath(import.meta.url)), DEV_OPEN_ASSET_DIR);
58
- }
59
- async function assertDevOpenAssets(assetsRoot) {
60
- await Promise.all([
61
- fs.stat(path.join(assetsRoot, "client.js")),
62
- fs.stat(path.join(assetsRoot, "client.css")),
63
- ]).catch(() => {
64
- throw new Error(`Workbench local browser assets are missing from ${assetsRoot}. Run pnpm --dir products/workbench/packages/cli build.`);
65
- });
66
- }
67
- async function closeServer(server) {
68
- await new Promise((resolve, reject) => {
69
- server.close((error) => error ? reject(error) : resolve());
70
- });
71
- }
72
- async function handleLocalWorkbenchRequest(args) {
73
- const url = new URL(args.request.url ?? "/", "http://workbench.local");
74
- if (args.request.method !== "GET" && args.request.method !== "HEAD") {
75
- sendJson(args.response, { message: "Workbench local open is read-only." }, 405, args.request.method);
76
- return;
77
- }
78
- if (url.pathname.startsWith("/api/")) {
79
- await handleApiRequest(args.request, args.response, args.context, url);
80
- return;
81
- }
82
- if (url.pathname === "/assets/client.js") {
83
- await sendFile(args.response, path.join(args.context.assetsRoot, "client.js"), "text/javascript; charset=utf-8", args.request.method);
84
- return;
85
- }
86
- if (url.pathname === "/assets/client.css") {
87
- await sendFile(args.response, path.join(args.context.assetsRoot, "client.css"), "text/css; charset=utf-8", args.request.method);
88
- return;
89
- }
90
- if (url.pathname.startsWith("/assets/fonts/")) {
91
- await sendFontFile(args.response, args.context.assetsRoot, url, args.request.method);
92
- return;
93
- }
94
- if (url.pathname.startsWith("/assets/")) {
95
- throw new LocalApiError("Workbench local asset not found.", 404);
96
- }
97
- await sendHtml(args.response, args.request.method, isKnownWorkbenchDocumentPath(url.pathname) ? 200 : 404);
98
- }
99
- function isKnownWorkbenchDocumentPath(pathname) {
100
- const segments = pathname.split("/").filter(Boolean).map(decodeDocumentPathSegment);
101
- if (segments.length === 0) {
102
- return true;
103
- }
104
- if (segments.length === 1 && (segments[0] === "manifest" || segments[0] === "files")) {
105
- return true;
106
- }
107
- if (segments[0] === "evaluations") {
108
- if (segments.length === 1 || segments.length === 2) {
109
- return true;
110
- }
111
- if (segments.length < 4 || segments[2] !== "cases") {
112
- return false;
113
- }
114
- return segments.length === 4 ||
115
- (segments.length === 5 && (segments[4] === "attempts" || segments[4] === "files"));
116
- }
117
- if (segments[0] !== "candidates") {
118
- return false;
119
- }
120
- if (segments.length === 1 || (segments.length === 2 && segments[1] === "lineage")) {
121
- return true;
122
- }
123
- if (segments.length === 2) {
124
- return true;
125
- }
126
- return segments.length === 3 && (segments[2] === "files" || segments[2] === "manifest");
127
- }
128
- function decodeDocumentPathSegment(segment) {
129
- try {
130
- return decodeURIComponent(segment);
131
- }
132
- catch {
133
- return segment;
134
- }
135
- }
136
- async function handleApiRequest(request, response, context, url) {
137
- const { inspection } = context;
138
- switch (url.pathname) {
139
- case "/api/snapshot":
140
- sendJson(response, await inspection.snapshot(), 200, request.method);
141
- return;
142
- case "/api/spec":
143
- sendJson(response, await inspection.spec({
144
- fingerprint: readOptionalSearchString(url.searchParams, "fingerprint"),
145
- }), 200, request.method);
146
- return;
147
- case "/api/source/files":
148
- sendJson(response, await inspection.sourceFileSurface({
149
- fingerprint: readOptionalSearchString(url.searchParams, "fingerprint"),
150
- path: readOptionalSearchString(url.searchParams, "path"),
151
- view: readPreviewMode(url.searchParams),
152
- }), 200, request.method);
153
- return;
154
- case "/api/record":
155
- sendJson(response, await inspection.candidate({ id: readSearchString(url.searchParams, "id") }), 200, request.method);
156
- return;
157
- case "/api/evaluation":
158
- sendJson(response, await inspection.evaluation({ id: readSearchString(url.searchParams, "id") }), 200, request.method);
159
- return;
160
- case "/api/candidate/files": {
161
- const candidateId = readSearchString(url.searchParams, "id");
162
- sendJson(response, await inspection.candidateFileSurface({
163
- id: candidateId,
164
- path: readOptionalSearchString(url.searchParams, "path"),
165
- view: readPreviewMode(url.searchParams),
166
- }), 200, request.method);
167
- return;
168
- }
169
- case "/api/case-review": {
170
- const candidateId = readSearchString(url.searchParams, "id");
171
- const caseId = readSearchString(url.searchParams, "case");
172
- const runId = readSearchString(url.searchParams, "run");
173
- sendJson(response, await inspection.caseReview({
174
- candidateId,
175
- caseId,
176
- runId,
177
- }), 200, request.method);
178
- return;
179
- }
180
- case "/api/traces": {
181
- const traceRunId = readSearchString(url.searchParams, "run");
182
- const traceJobId = readSearchString(url.searchParams, "job");
183
- sendJson(response, await inspection.executionTrace({
184
- runId: traceRunId,
185
- jobId: traceJobId,
186
- }), 200, request.method);
187
- return;
188
- }
189
- case "/api/execution/files": {
190
- const execRunId = readSearchString(url.searchParams, "run");
191
- const execJobId = readSearchString(url.searchParams, "id");
192
- sendJson(response, await inspection.executionFileSurface({
193
- runId: execRunId,
194
- jobId: execJobId,
195
- path: readOptionalSearchString(url.searchParams, "path"),
196
- view: readPreviewMode(url.searchParams),
197
- }), 200, request.method);
198
- return;
199
- }
200
- default:
201
- throw new LocalApiError(`Unknown Workbench local API route: ${url.pathname}`, 404);
202
- }
203
- }
204
- function readSearchString(params, key) {
205
- const value = params.get(key);
206
- if (!value) {
207
- throw new LocalApiError(`${key} is required.`);
208
- }
209
- return value;
210
- }
211
- function readOptionalSearchString(params, key) {
212
- const value = params.get(key)?.trim();
213
- return value ? value : null;
214
- }
215
- function readPreviewMode(params) {
216
- const view = params.get("view") ?? "rendered";
217
- if (view === "diff" || view === "raw" || view === "rendered") {
218
- return view;
219
- }
220
- throw new LocalApiError("view must be diff, raw, or rendered.");
221
- }
222
- async function sendFile(response, filePath, contentType, method = "GET") {
223
- let body;
224
- try {
225
- body = await fs.readFile(filePath);
226
- }
227
- catch (error) {
228
- if (error.code === "ENOENT") {
229
- throw new LocalApiError("Workbench local asset not found.", 404);
230
- }
231
- throw error;
232
- }
233
- response.writeHead(200, {
234
- "content-type": contentType,
235
- "content-length": body.byteLength,
236
- "cache-control": "no-store",
237
- });
238
- response.end(method === "HEAD" ? undefined : body);
239
- }
240
- async function sendFontFile(response, assetsRoot, url, method = "GET") {
241
- let fileName;
242
- try {
243
- fileName = decodeURIComponent(url.pathname.slice("/assets/fonts/".length));
244
- }
245
- catch {
246
- throw new LocalApiError("Invalid font asset path.", 404);
247
- }
248
- if (!fileName || fileName.includes("/") || fileName.includes("\\")) {
249
- throw new LocalApiError("Invalid font asset path.", 404);
250
- }
251
- await sendFile(response, path.join(assetsRoot, "fonts", fileName), "font/woff2", method);
252
- }
253
- async function sendHtml(response, method = "GET", status = 200) {
254
- const body = `<!doctype html>
255
- <html lang="en">
256
- <head>
257
- <meta charset="utf-8" />
258
- <meta name="viewport" content="width=device-width, initial-scale=1" />
259
- <title>Workbench Local</title>
260
- <link rel="stylesheet" href="/assets/client.css" />
261
- </head>
262
- <body>
263
- <div id="root"></div>
264
- <script type="module" src="/assets/client.js"></script>
265
- </body>
266
- </html>`;
267
- response.writeHead(status, {
268
- "content-type": "text/html; charset=utf-8",
269
- "content-length": Buffer.byteLength(body),
270
- "cache-control": "no-store",
271
- });
272
- response.end(method === "HEAD" ? undefined : body);
273
- }
274
- function sendJson(response, value, status = 200, method = "GET") {
275
- const body = `${JSON.stringify(value, null, 2)}\n`;
276
- response.writeHead(status, {
277
- "content-type": "application/json; charset=utf-8",
278
- "content-length": Buffer.byteLength(body),
279
- "cache-control": "no-store",
280
- });
281
- response.end(method === "HEAD" ? undefined : body);
282
- }
283
- function sendError(response, error, method = "GET") {
284
- const message = error instanceof Error ? error.message : String(error);
285
- const status = error instanceof LocalApiError
286
- ? error.status
287
- : typeof error?.statusCode === "number"
288
- ? error.statusCode
289
- : 500;
290
- sendJson(response, { message }, status, method);
291
- }
292
- function displayHost(host) {
293
- if (host === "0.0.0.0" || host === "::") {
294
- return "127.0.0.1";
295
- }
296
- return host.includes(":") && !host.startsWith("[") ? `[${host}]` : host;
297
- }
@@ -1,22 +0,0 @@
1
- export type InitCandidateKind = "skill" | "command";
2
- export type InitAgent = string;
3
- export interface WorkbenchInitScaffoldOptions {
4
- kind: InitCandidateKind;
5
- name: string;
6
- agent?: InitAgent;
7
- example: boolean;
8
- }
9
- export interface WorkbenchInitScaffoldFile {
10
- path: string;
11
- content: string;
12
- }
13
- export interface WorkbenchInitScaffold {
14
- kind: InitCandidateKind;
15
- name: string;
16
- candidateRoot: string;
17
- seedFileTarget: string;
18
- seedDirectoryTarget: string;
19
- files: WorkbenchInitScaffoldFile[];
20
- }
21
- export declare function createWorkbenchInitScaffold(options: WorkbenchInitScaffoldOptions): WorkbenchInitScaffold;
22
- //# sourceMappingURL=init-scaffold.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"init-scaffold.d.ts","sourceRoot":"","sources":["../src/init-scaffold.ts"],"names":[],"mappings":"AAEA,MAAM,MAAM,iBAAiB,GAAG,OAAO,GAAG,SAAS,CAAC;AACpD,MAAM,MAAM,SAAS,GAAG,MAAM,CAAC;AAE/B,MAAM,WAAW,4BAA4B;IAC3C,IAAI,EAAE,iBAAiB,CAAC;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,SAAS,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,yBAAyB;IACxC,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,iBAAiB,CAAC;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,aAAa,EAAE,MAAM,CAAC;IACtB,cAAc,EAAE,MAAM,CAAC;IACvB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,KAAK,EAAE,yBAAyB,EAAE,CAAC;CACpC;AAED,wBAAgB,2BAA2B,CAAC,OAAO,EAAE,4BAA4B,GAAG,qBAAqB,CAkBxG"}
@@ -1,30 +0,0 @@
1
- import { defaultWorkbenchInitTemplatePack } from "./init-template-pack.js";
2
- export function createWorkbenchInitScaffold(options) {
3
- const template = defaultWorkbenchInitTemplatePack[options.kind];
4
- const slug = slugify(options.name);
5
- const agent = template.requiresAgent ? requireAgent(options) : undefined;
6
- const context = {
7
- name: options.name,
8
- slug,
9
- ...(agent ? { agent } : {}),
10
- example: options.example,
11
- };
12
- return {
13
- kind: template.kind,
14
- name: options.name,
15
- candidateRoot: template.candidateRoot(context),
16
- seedFileTarget: template.seedFileTarget(context),
17
- seedDirectoryTarget: template.seedDirectoryTarget(context),
18
- files: template.files(context),
19
- };
20
- }
21
- function requireAgent(options) {
22
- if (options.agent && /^[a-z][a-z0-9-]*$/u.test(options.agent)) {
23
- return options.agent;
24
- }
25
- throw new Error(`--agent is required for --${options.kind} and must be a lowercase adapter id.`);
26
- }
27
- function slugify(value) {
28
- const slug = value.trim().toLowerCase().replace(/[^a-z0-9]+/gu, "-").replace(/^-+|-+$/gu, "");
29
- return slug || "workbench-candidate";
30
- }
@@ -1,19 +0,0 @@
1
- import type { InitAgent, InitCandidateKind, WorkbenchInitScaffoldFile } from "./init-scaffold.js";
2
- interface WorkbenchInitTemplateContext {
3
- name: string;
4
- slug: string;
5
- agent?: InitAgent;
6
- example: boolean;
7
- }
8
- export interface WorkbenchInitTemplate {
9
- kind: InitCandidateKind;
10
- requiresAgent: boolean;
11
- candidateRoot(context: WorkbenchInitTemplateContext): string;
12
- seedFileTarget(context: WorkbenchInitTemplateContext): string;
13
- seedDirectoryTarget(context: WorkbenchInitTemplateContext): string;
14
- files(context: WorkbenchInitTemplateContext): WorkbenchInitScaffoldFile[];
15
- }
16
- export type WorkbenchInitTemplatePack = Record<InitCandidateKind, WorkbenchInitTemplate>;
17
- export declare const defaultWorkbenchInitTemplatePack: WorkbenchInitTemplatePack;
18
- export {};
19
- //# sourceMappingURL=init-template-pack.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"init-template-pack.d.ts","sourceRoot":"","sources":["../src/init-template-pack.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,SAAS,EACT,iBAAiB,EACjB,yBAAyB,EAC1B,MAAM,oBAAoB,CAAC;AAE5B,UAAU,4BAA4B;IACpC,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,CAAC,EAAE,SAAS,CAAC;IAClB,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,qBAAqB;IACpC,IAAI,EAAE,iBAAiB,CAAC;IACxB,aAAa,EAAE,OAAO,CAAC;IACvB,aAAa,CAAC,OAAO,EAAE,4BAA4B,GAAG,MAAM,CAAC;IAC7D,cAAc,CAAC,OAAO,EAAE,4BAA4B,GAAG,MAAM,CAAC;IAC9D,mBAAmB,CAAC,OAAO,EAAE,4BAA4B,GAAG,MAAM,CAAC;IACnE,KAAK,CAAC,OAAO,EAAE,4BAA4B,GAAG,yBAAyB,EAAE,CAAC;CAC3E;AAED,MAAM,MAAM,yBAAyB,GAAG,MAAM,CAAC,iBAAiB,EAAE,qBAAqB,CAAC,CAAC;AAEzF,eAAO,MAAM,gCAAgC,EAAE,yBA+C9C,CAAC"}
@@ -1,262 +0,0 @@
1
- export const defaultWorkbenchInitTemplatePack = {
2
- skill: {
3
- kind: "skill",
4
- requiresAgent: true,
5
- candidateRoot: () => "candidates/current/files",
6
- seedFileTarget: () => "candidates/current/files/SKILL.md",
7
- seedDirectoryTarget: () => "candidates/current/files",
8
- files: ({ name, slug, agent, example }) => {
9
- const adapter = requiredAgent(agent);
10
- return [
11
- { path: "benchmark.yaml", content: skillBenchmarkSpec(name, adapter) },
12
- { path: "candidates/current/candidate.yaml", content: skillCandidateSpec(name, adapter) },
13
- { path: "candidates/current/files/SKILL.md", content: skillMarkdown(name, slug, example) },
14
- { path: "candidates/current/files/prepare.sh", content: candidatePrepareScript() },
15
- { path: "candidates/current/files/agents/openai.yaml", content: skillOpenAiMetadata(name, slug) },
16
- { path: "environment/Dockerfile", content: nodeDockerfile() },
17
- { path: "tasks/task-001/task.yaml", content: taskYaml(skillCasePrompt(name)) },
18
- { path: "tasks/task-001/tests/rubric.md", content: skillExpectedRubric() },
19
- ...(example ? [
20
- { path: "tasks/task-002/task.yaml", content: taskYaml(`Use ${name} for a second realistic prompt with different constraints.\n`) },
21
- { path: "tasks/task-002/tests/rubric.md", content: skillExpectedRubric() },
22
- ] : []),
23
- ];
24
- },
25
- },
26
- command: {
27
- kind: "command",
28
- requiresAgent: false,
29
- candidateRoot: () => "candidates/current/files",
30
- seedFileTarget: () => "candidates/current/files/run.js",
31
- seedDirectoryTarget: () => "candidates/current/files",
32
- files: ({ name, example }) => [
33
- { path: "benchmark.yaml", content: commandBenchmarkSpec(name) },
34
- { path: "candidates/current/candidate.yaml", content: commandCandidateSpec(name) },
35
- { path: "candidates/current/files/run.js", content: commandRunnerSource() },
36
- { path: "candidates/current/files/prepare.sh", content: candidatePrepareScript() },
37
- { path: "environment/Dockerfile", content: nodeDockerfile() },
38
- { path: "tasks/task-001/task.yaml", content: taskYaml("The command should produce a concise result for this task.\n") },
39
- { path: "tasks/task-001/tests/required-output.txt", content: "command candidate ran\n" },
40
- { path: "tasks/task-001/tests/test.sh", content: commandTestScript() },
41
- ...(example ? [
42
- { path: "tasks/task-002/task.yaml", content: taskYaml("The command should still produce deterministic output for a second task.\n") },
43
- { path: "tasks/task-002/tests/required-output.txt", content: "command candidate ran\n" },
44
- { path: "tasks/task-002/tests/test.sh", content: commandTestScript() },
45
- ] : []),
46
- ],
47
- },
48
- };
49
- function requiredAgent(agent) {
50
- if (!agent) {
51
- throw new Error("Template requires an agent adapter id.");
52
- }
53
- return agent;
54
- }
55
- function yamlString(value) {
56
- return JSON.stringify(value);
57
- }
58
- function taskYaml(task) {
59
- return [
60
- "version: 3",
61
- "task: |-",
62
- ...task.trimEnd().split("\n").map((line) => ` ${line}`),
63
- "tests:",
64
- " path: tests",
65
- "",
66
- ].join("\n");
67
- }
68
- function skillBenchmarkSpec(name, agent) {
69
- return [
70
- "version: 4",
71
- `name: ${yamlString(name)}`,
72
- `description: ${yamlString(`Evaluate the ${name} skill across representative tasks.`)}`,
73
- "engine:",
74
- " use: workbench",
75
- " with:",
76
- " environment:",
77
- " dockerfile: environment/Dockerfile",
78
- " score:",
79
- " use: rubric",
80
- " with:",
81
- " instructions: Score the completed task from the current working directory and engine-private verifier files. Do not score the candidate guidance by keyword matching.",
82
- " parallelism: 2",
83
- " judge:",
84
- ` use: ${agent}`,
85
- ...agentDefaultWithLines(agent, " "),
86
- " criteria:",
87
- " - id: task_fit",
88
- " description: The response follows the task prompt and uses the skill's workflow.",
89
- " weight: 1",
90
- " - id: output_quality",
91
- " description: The produced output is complete, readable, and directly useful.",
92
- " weight: 1",
93
- "",
94
- ].join("\n");
95
- }
96
- function skillCandidateSpec(name, agent) {
97
- return [
98
- "version: 4",
99
- `name: ${yamlString(name)}`,
100
- "files:",
101
- " path: files",
102
- "prepare:",
103
- " command: sh input/candidate/prepare.sh",
104
- "defaultRun: main",
105
- "runs:",
106
- " main:",
107
- ` name: ${yamlString(`Run with ${agent}`)}`,
108
- ` use: ${agent}`,
109
- ...agentDefaultWithLines(agent, " "),
110
- "improve:",
111
- " edits:",
112
- " - SKILL.md",
113
- ` use: ${agent}`,
114
- ...agentDefaultWithLines(agent, " "),
115
- "",
116
- ].join("\n");
117
- }
118
- function agentDefaultWithLines(agent, indent) {
119
- if (agent !== "codex") {
120
- return [];
121
- }
122
- return [
123
- `${indent}with:`,
124
- `${indent} model: gpt-5.5`,
125
- ];
126
- }
127
- function commandBenchmarkSpec(name) {
128
- return [
129
- "version: 4",
130
- `name: ${yamlString(name)}`,
131
- `description: ${yamlString(`Evaluate the ${name} command implementation across representative tasks.`)}`,
132
- "engine:",
133
- " use: workbench",
134
- " with:",
135
- " environment:",
136
- " dockerfile: environment/Dockerfile",
137
- " score:",
138
- " use: tests",
139
- "",
140
- ].join("\n");
141
- }
142
- function commandCandidateSpec(name) {
143
- const runnerCommand = JSON.stringify("node run.js");
144
- const improveCommand = JSON.stringify("node -e \"const fs=require('fs');const file='run.js';const current=fs.existsSync(file)?fs.readFileSync(file,'utf8'):'';const next=current.replace(/\\s*$/,'')+'\\n// Workbench candidate revision.\\n';fs.writeFileSync(file,next);\"");
145
- return [
146
- "version: 4",
147
- `name: ${yamlString(name)}`,
148
- "files:",
149
- " path: files",
150
- "prepare:",
151
- " command: sh input/candidate/prepare.sh",
152
- "defaultRun: main",
153
- "runs:",
154
- " main:",
155
- " name: Command",
156
- " use: command",
157
- " with:",
158
- ` command: ${runnerCommand}`,
159
- "improve:",
160
- " edits:",
161
- " - run.js",
162
- " use: command",
163
- " with:",
164
- ` command: ${improveCommand}`,
165
- "",
166
- ].join("\n");
167
- }
168
- function candidatePrepareScript() {
169
- return [
170
- "#!/usr/bin/env sh",
171
- "set -eu",
172
- "cp -R input/candidate/. .",
173
- "",
174
- ].join("\n");
175
- }
176
- function commandTestScript() {
177
- return [
178
- "#!/usr/bin/env sh",
179
- "set -eu",
180
- "expected=$(node -e \"const fs=require('fs'),path=require('path');const r=JSON.parse(fs.readFileSync(process.env.WORKBENCH_ADAPTER_REQUEST,'utf8'));process.stdout.write(fs.readFileSync(path.join(r.paths.enginePrivate,'required-output.txt'),'utf8'));\")",
181
- "verifier_output=${WORKBENCH_TESTS_VERIFIER_DIR:-$(node -e \"const fs=require('fs'),path=require('path');const r=JSON.parse(fs.readFileSync(process.env.WORKBENCH_ADAPTER_REQUEST,'utf8'));process.stdout.write(path.join(r.paths.output,'.workbench','internal','verifier'));\")}",
182
- "actual=$(cat command-output.txt 2>/dev/null || true)",
183
- "mkdir -p \"$verifier_output\"",
184
- "case \"$actual\" in",
185
- " *\"$expected\"*) printf '{\"reward\":1,\"exact\":1}\\n' > \"$verifier_output/reward.json\" ;;",
186
- " *) printf '{\"reward\":0,\"exact\":0}\\n' > \"$verifier_output/reward.json\" ;;",
187
- "esac",
188
- "",
189
- ].join("\n");
190
- }
191
- function nodeDockerfile() {
192
- return [
193
- "FROM node:22-slim",
194
- "",
195
- ...caCertificatesDockerfileStep(),
196
- "",
197
- ].join("\n");
198
- }
199
- function caCertificatesDockerfileStep() {
200
- return [
201
- "RUN apt-get update \\",
202
- " && apt-get install -y --no-install-recommends ca-certificates \\",
203
- " && rm -rf /var/lib/apt/lists/*",
204
- ];
205
- }
206
- function skillMarkdown(name, slug, example) {
207
- return [
208
- "---",
209
- `name: ${slug}`,
210
- `description: ${yamlString(`Use this skill whenever the user asks for ${name} work, related deliverable generation, or iterative improvement of this workflow.`)}`,
211
- "---",
212
- "",
213
- `# ${name}`,
214
- "",
215
- "Use this skill to turn the user's request into a concrete deliverable.",
216
- "",
217
- "## Workflow",
218
- "",
219
- "1. Identify the requested deliverable and success criteria.",
220
- "2. Gather only the source context needed for this deliverable.",
221
- "3. Produce the deliverable in the format the user can use directly.",
222
- "4. Validate the output against the success criteria before returning it.",
223
- "",
224
- ...(example ? [
225
- "## Example",
226
- "",
227
- "When a prompt asks for a concrete deliverable, produce the deliverable first and keep explanation secondary.",
228
- "",
229
- ] : []),
230
- ].join("\n");
231
- }
232
- function skillOpenAiMetadata(name, slug) {
233
- return [
234
- `name: ${slug}`,
235
- `description: ${yamlString(`Generate and improve ${name} deliverables.`)}`,
236
- "",
237
- ].join("\n");
238
- }
239
- function skillCasePrompt(name) {
240
- return [
241
- `Use the ${name} skill to produce a small but complete deliverable for a realistic request.`,
242
- "Return the deliverable content and a one-sentence validation note.",
243
- "",
244
- ].join("\n");
245
- }
246
- function skillExpectedRubric() {
247
- return [
248
- "Reward complete, usable deliverables created from the task input.",
249
- "Penalize placeholder output, missing validation, and keyword-only compliance.",
250
- "",
251
- ].join("\n");
252
- }
253
- function commandRunnerSource() {
254
- return [
255
- "const fs = require('fs');",
256
- "const path = require('path');",
257
- "",
258
- "fs.writeFileSync(path.join(process.cwd(), 'command-output.txt'), 'command candidate ran\\n');",
259
- "console.log('command candidate ran');",
260
- "",
261
- ].join("\n");
262
- }