@vibe-agent-toolkit/agent-skills 0.1.39-rc.7 → 0.1.39-rc.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/skill-test/build-hook.d.ts +58 -0
- package/dist/skill-test/build-hook.d.ts.map +1 -0
- package/dist/skill-test/build-hook.js +63 -0
- package/dist/skill-test/build-hook.js.map +1 -0
- package/dist/skill-test/exit-codes.d.ts +3 -2
- package/dist/skill-test/exit-codes.d.ts.map +1 -1
- package/dist/skill-test/exit-codes.js +5 -2
- package/dist/skill-test/exit-codes.js.map +1 -1
- package/dist/skill-test/experimenter-prompt.d.ts.map +1 -1
- package/dist/skill-test/experimenter-prompt.js +18 -2
- package/dist/skill-test/experimenter-prompt.js.map +1 -1
- package/dist/skill-test/grading-adapter.d.ts +6 -1
- package/dist/skill-test/grading-adapter.d.ts.map +1 -1
- package/dist/skill-test/grading-adapter.js +25 -18
- package/dist/skill-test/grading-adapter.js.map +1 -1
- package/dist/skill-test/grading-schema.d.ts +171 -0
- package/dist/skill-test/grading-schema.d.ts.map +1 -0
- package/dist/skill-test/grading-schema.js +65 -0
- package/dist/skill-test/grading-schema.js.map +1 -0
- package/dist/skill-test/harness-location.d.ts +13 -0
- package/dist/skill-test/harness-location.d.ts.map +1 -1
- package/dist/skill-test/harness-location.js +31 -1
- package/dist/skill-test/harness-location.js.map +1 -1
- package/dist/skill-test/index.d.ts +1 -0
- package/dist/skill-test/index.d.ts.map +1 -1
- package/dist/skill-test/index.js +1 -0
- package/dist/skill-test/index.js.map +1 -1
- package/dist/skill-test/lock.js +1 -1
- package/dist/skill-test/lock.js.map +1 -1
- package/dist/skill-test/plugin-env.d.ts +20 -0
- package/dist/skill-test/plugin-env.d.ts.map +1 -0
- package/dist/skill-test/plugin-env.js +24 -0
- package/dist/skill-test/plugin-env.js.map +1 -0
- package/dist/skill-test/plugin-layout.d.ts +41 -0
- package/dist/skill-test/plugin-layout.d.ts.map +1 -0
- package/dist/skill-test/plugin-layout.js +49 -0
- package/dist/skill-test/plugin-layout.js.map +1 -0
- package/dist/skill-test/run-harness.d.ts +12 -0
- package/dist/skill-test/run-harness.d.ts.map +1 -1
- package/dist/skill-test/run-harness.js +60 -14
- package/dist/skill-test/run-harness.js.map +1 -1
- package/dist/skill-test/staging.d.ts +20 -1
- package/dist/skill-test/staging.d.ts.map +1 -1
- package/dist/skill-test/staging.js +56 -13
- package/dist/skill-test/staging.js.map +1 -1
- package/package.json +5 -5
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* build-hook.ts — optional pre-stage build step for `vat skill test run`.
|
|
3
|
+
*
|
|
4
|
+
* When the test config includes a `build:` field, this module runs that shell
|
|
5
|
+
* command ONCE before staging so that generated artifacts (e.g. bundled scripts
|
|
6
|
+
* not committed to source) are present in the source tree for staging to copy.
|
|
7
|
+
*
|
|
8
|
+
* The command runs with cwd = the CONFIG ROOT (the directory containing
|
|
9
|
+
* vibe-agent-toolkit.config.yaml), because real build commands are root-level
|
|
10
|
+
* package scripts (e.g. `pnpm bundle:report`).
|
|
11
|
+
*
|
|
12
|
+
* Security note: the `build:` field is a developer-authored value from the
|
|
13
|
+
* project's own vibe-agent-toolkit.config.yaml — a trusted source under the
|
|
14
|
+
* adopter's source control. The command is passed directly to the OS shell
|
|
15
|
+
* (shell: true) intentionally, because build commands frequently include shell
|
|
16
|
+
* syntax (npm script chaining, env vars, etc.). This is equivalent to running
|
|
17
|
+
* `npm run build` or `pnpm bundle:report` from the terminal; it is NOT arbitrary
|
|
18
|
+
* user input. The adopter is already executing skill code via this command
|
|
19
|
+
* (`vat skill test run` requires --i-understand-this-runs-skill-code).
|
|
20
|
+
*/
|
|
21
|
+
/**
|
|
22
|
+
* Options for the pre-stage build hook.
|
|
23
|
+
*
|
|
24
|
+
* `spawnFn` is injectable for unit testing — production code uses the default
|
|
25
|
+
* (node:child_process spawnSync). Tests inject a vi.fn() mock.
|
|
26
|
+
*/
|
|
27
|
+
export interface BuildHookOptions {
|
|
28
|
+
/** Shell command to run (from `test.build` in vibe-agent-toolkit.config.yaml). */
|
|
29
|
+
buildCommand: string | undefined;
|
|
30
|
+
/** Absolute path to the config root (cwd for the build command). */
|
|
31
|
+
configRoot: string;
|
|
32
|
+
/**
|
|
33
|
+
* Injectable spawn function for unit testing.
|
|
34
|
+
* Defaults to node:child_process spawnSync when not provided.
|
|
35
|
+
*/
|
|
36
|
+
spawnFn?: (cmd: string, opts: {
|
|
37
|
+
shell: boolean;
|
|
38
|
+
cwd: string;
|
|
39
|
+
stdio: 'inherit';
|
|
40
|
+
}) => {
|
|
41
|
+
status: number | null;
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
/** Thrown when the pre-stage build command exits with a non-zero code. Maps to preflight (exit 2). */
|
|
45
|
+
export declare class BuildHookError extends Error {
|
|
46
|
+
readonly buildExitCode: number;
|
|
47
|
+
readonly exitCode: 2;
|
|
48
|
+
constructor(message: string, buildExitCode: number);
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Run the pre-stage build hook if configured.
|
|
52
|
+
*
|
|
53
|
+
* Runs `buildCommand` in a shell with `cwd = configRoot`. On non-zero exit,
|
|
54
|
+
* throws `BuildHookError` with a clear message naming the command and exit code.
|
|
55
|
+
* When `buildCommand` is undefined, this is a no-op (behavior unchanged).
|
|
56
|
+
*/
|
|
57
|
+
export declare function runPreStageBuild(opts: BuildHookOptions): void;
|
|
58
|
+
//# sourceMappingURL=build-hook.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"build-hook.d.ts","sourceRoot":"","sources":["../../src/skill-test/build-hook.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAIH;;;;;GAKG;AACH,MAAM,WAAW,gBAAgB;IAC/B,kFAAkF;IAClF,YAAY,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC,oEAAoE;IACpE,UAAU,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE;QAAE,KAAK,EAAE,OAAO,CAAC;QAAC,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,SAAS,CAAA;KAAE,KAAK;QAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,CAAC;CAC/G;AAED,sGAAsG;AACtG,qBAAa,cAAe,SAAQ,KAAK;aAEM,aAAa,EAAE,MAAM;IADlE,QAAQ,CAAC,QAAQ,EAAG,CAAC,CAAU;gBACnB,OAAO,EAAE,MAAM,EAAkB,aAAa,EAAE,MAAM;CAInE;AAeD;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,gBAAgB,GAAG,IAAI,CAe7D"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* build-hook.ts — optional pre-stage build step for `vat skill test run`.
|
|
3
|
+
*
|
|
4
|
+
* When the test config includes a `build:` field, this module runs that shell
|
|
5
|
+
* command ONCE before staging so that generated artifacts (e.g. bundled scripts
|
|
6
|
+
* not committed to source) are present in the source tree for staging to copy.
|
|
7
|
+
*
|
|
8
|
+
* The command runs with cwd = the CONFIG ROOT (the directory containing
|
|
9
|
+
* vibe-agent-toolkit.config.yaml), because real build commands are root-level
|
|
10
|
+
* package scripts (e.g. `pnpm bundle:report`).
|
|
11
|
+
*
|
|
12
|
+
* Security note: the `build:` field is a developer-authored value from the
|
|
13
|
+
* project's own vibe-agent-toolkit.config.yaml — a trusted source under the
|
|
14
|
+
* adopter's source control. The command is passed directly to the OS shell
|
|
15
|
+
* (shell: true) intentionally, because build commands frequently include shell
|
|
16
|
+
* syntax (npm script chaining, env vars, etc.). This is equivalent to running
|
|
17
|
+
* `npm run build` or `pnpm bundle:report` from the terminal; it is NOT arbitrary
|
|
18
|
+
* user input. The adopter is already executing skill code via this command
|
|
19
|
+
* (`vat skill test run` requires --i-understand-this-runs-skill-code).
|
|
20
|
+
*/
|
|
21
|
+
import { spawnSync } from 'node:child_process';
|
|
22
|
+
/** Thrown when the pre-stage build command exits with a non-zero code. Maps to preflight (exit 2). */
|
|
23
|
+
export class BuildHookError extends Error {
|
|
24
|
+
buildExitCode;
|
|
25
|
+
exitCode = 2;
|
|
26
|
+
constructor(message, buildExitCode) {
|
|
27
|
+
super(message);
|
|
28
|
+
this.buildExitCode = buildExitCode;
|
|
29
|
+
this.name = 'BuildHookError';
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Default spawn implementation: runs the command in a shell with stdio inherited.
|
|
34
|
+
*
|
|
35
|
+
* `build:` is a developer-authored shell command from the adopter's own config
|
|
36
|
+
* (vibe-agent-toolkit.config.yaml), equivalent to running `pnpm bundle:report` at the
|
|
37
|
+
* terminal. It is NOT arbitrary user input. The adopter already acknowledges running
|
|
38
|
+
* skill code via --i-understand-this-runs-skill-code.
|
|
39
|
+
*/
|
|
40
|
+
function defaultSpawn(cmd, opts) {
|
|
41
|
+
// eslint-disable-next-line sonarjs/os-command -- developer-authored build command from trusted project config; equivalent to running pnpm/npm build manually
|
|
42
|
+
return spawnSync(cmd, { ...opts, shell: true });
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Run the pre-stage build hook if configured.
|
|
46
|
+
*
|
|
47
|
+
* Runs `buildCommand` in a shell with `cwd = configRoot`. On non-zero exit,
|
|
48
|
+
* throws `BuildHookError` with a clear message naming the command and exit code.
|
|
49
|
+
* When `buildCommand` is undefined, this is a no-op (behavior unchanged).
|
|
50
|
+
*/
|
|
51
|
+
export function runPreStageBuild(opts) {
|
|
52
|
+
const { buildCommand, configRoot } = opts;
|
|
53
|
+
if (buildCommand === undefined)
|
|
54
|
+
return;
|
|
55
|
+
const spawn = opts.spawnFn ?? defaultSpawn;
|
|
56
|
+
const result = spawn(buildCommand, { shell: true, cwd: configRoot, stdio: 'inherit' });
|
|
57
|
+
const status = result.status ?? -1;
|
|
58
|
+
if (status !== 0) {
|
|
59
|
+
throw new BuildHookError(`Pre-stage build hook failed: command "${buildCommand}" exited with code ${status}. ` +
|
|
60
|
+
`Resolve the build error before running vat skill test run.`, status);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=build-hook.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"build-hook.js","sourceRoot":"","sources":["../../src/skill-test/build-hook.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAoB/C,sGAAsG;AACtG,MAAM,OAAO,cAAe,SAAQ,KAAK;IAEM;IADpC,QAAQ,GAAG,CAAU,CAAC;IAC/B,YAAY,OAAe,EAAkB,aAAqB;QAChE,KAAK,CAAC,OAAO,CAAC,CAAC;QAD4B,kBAAa,GAAb,aAAa,CAAQ;QAEhE,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED;;;;;;;GAOG;AACH,SAAS,YAAY,CAAC,GAAW,EAAE,IAAuD;IACxF,6JAA6J;IAC7J,OAAO,SAAS,CAAC,GAAG,EAAE,EAAE,GAAG,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;AAClD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAsB;IACrD,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;IAC1C,IAAI,YAAY,KAAK,SAAS;QAAE,OAAO;IAEvC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,IAAI,YAAY,CAAC;IAC3C,MAAM,MAAM,GAAG,KAAK,CAAC,YAAY,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;IACvF,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;IAEnC,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;QACjB,MAAM,IAAI,cAAc,CACtB,yCAAyC,YAAY,sBAAsB,MAAM,IAAI;YACnF,4DAA4D,EAC9D,MAAM,CACP,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -25,8 +25,9 @@ export declare class InternalHarnessError extends Error {
|
|
|
25
25
|
* Map any thrown error to the process exit code. Errors that carry their own
|
|
26
26
|
* `exitCode` (Bootstrap/Auth/HarnessLocation/Internal) are authoritative;
|
|
27
27
|
* a PromptInvariantError is a user-correctable preflight problem (a supplied
|
|
28
|
-
* prompt override is missing a required safety instruction) → 2;
|
|
29
|
-
* is a
|
|
28
|
+
* prompt override is missing a required safety instruction) → 2; a BuildHookError
|
|
29
|
+
* is a pre-stage build failure → 2; GradingSkewError is a parse failure → 1;
|
|
30
|
+
* everything unknown → 1.
|
|
30
31
|
*/
|
|
31
32
|
export declare function mapErrorToExitCode(err: unknown): number;
|
|
32
33
|
//# sourceMappingURL=exit-codes.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"exit-codes.d.ts","sourceRoot":"","sources":["../../src/skill-test/exit-codes.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"exit-codes.d.ts","sourceRoot":"","sources":["../../src/skill-test/exit-codes.ts"],"names":[],"mappings":"AAOA,kDAAkD;AAClD,eAAO,MAAM,iBAAiB;;;;;CAKpB,CAAC;AAEX,MAAM,MAAM,sBAAsB,GAAG,CAAC,OAAO,iBAAiB,CAAC,CAAC,MAAM,OAAO,iBAAiB,CAAC,CAAC;AAEhG;;;;GAIG;AACH,qBAAa,oBAAqB,SAAQ,KAAK;aAEjB,YAAY,EAAE,MAAM;IADhD,QAAQ,CAAC,QAAQ,EAAG,CAAC,CAAU;gBACH,YAAY,EAAE,MAAM;CAIjD;AAED,gGAAgG;AAChG,qBAAa,oBAAqB,SAAQ,KAAK;IAC7C,QAAQ,CAAC,QAAQ,EAAG,CAAC,CAAU;gBACnB,OAAO,EAAE,MAAM;CAI5B;AAED;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,OAAO,GAAG,MAAM,CAYvD"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { AuthPreflightError } from '@vibe-agent-toolkit/utils';
|
|
2
|
+
import { BuildHookError } from './build-hook.js';
|
|
2
3
|
import { PromptInvariantError } from './experimenter-prompt.js';
|
|
3
4
|
import { GradingSkewError } from './grading-adapter.js';
|
|
4
5
|
import { HarnessLocationError } from './harness-location.js';
|
|
@@ -35,13 +36,15 @@ export class InternalHarnessError extends Error {
|
|
|
35
36
|
* Map any thrown error to the process exit code. Errors that carry their own
|
|
36
37
|
* `exitCode` (Bootstrap/Auth/HarnessLocation/Internal) are authoritative;
|
|
37
38
|
* a PromptInvariantError is a user-correctable preflight problem (a supplied
|
|
38
|
-
* prompt override is missing a required safety instruction) → 2;
|
|
39
|
-
* is a
|
|
39
|
+
* prompt override is missing a required safety instruction) → 2; a BuildHookError
|
|
40
|
+
* is a pre-stage build failure → 2; GradingSkewError is a parse failure → 1;
|
|
41
|
+
* everything unknown → 1.
|
|
40
42
|
*/
|
|
41
43
|
export function mapErrorToExitCode(err) {
|
|
42
44
|
if (err instanceof BootstrapNeededError)
|
|
43
45
|
return SkillTestExitCode.Bootstrap;
|
|
44
46
|
if (err instanceof AuthPreflightError ||
|
|
47
|
+
err instanceof BuildHookError ||
|
|
45
48
|
err instanceof HarnessLocationError ||
|
|
46
49
|
err instanceof PromptInvariantError) {
|
|
47
50
|
return SkillTestExitCode.Preflight;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"exit-codes.js","sourceRoot":"","sources":["../../src/skill-test/exit-codes.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,0BAA0B,CAAC;AAChE,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAE7D,kDAAkD;AAClD,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,EAAE,EAAE,CAAC;IACL,QAAQ,EAAE,CAAC;IACX,SAAS,EAAE,CAAC;IACZ,SAAS,EAAE,CAAC;CACJ,CAAC;AAIX;;;;GAIG;AACH,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IAEjB;IADnB,QAAQ,GAAG,CAAU,CAAC;IAC/B,YAA4B,YAAoB;QAC9C,KAAK,CAAC,mCAAmC,YAAY,2BAA2B,CAAC,CAAC;QADxD,iBAAY,GAAZ,YAAY,CAAQ;QAE9C,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAED,gGAAgG;AAChG,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IACpC,QAAQ,GAAG,CAAU,CAAC;IAC/B,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAED
|
|
1
|
+
{"version":3,"file":"exit-codes.js","sourceRoot":"","sources":["../../src/skill-test/exit-codes.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,oBAAoB,EAAE,MAAM,0BAA0B,CAAC;AAChE,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAE7D,kDAAkD;AAClD,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,EAAE,EAAE,CAAC;IACL,QAAQ,EAAE,CAAC;IACX,SAAS,EAAE,CAAC;IACZ,SAAS,EAAE,CAAC;CACJ,CAAC;AAIX;;;;GAIG;AACH,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IAEjB;IADnB,QAAQ,GAAG,CAAU,CAAC;IAC/B,YAA4B,YAAoB;QAC9C,KAAK,CAAC,mCAAmC,YAAY,2BAA2B,CAAC,CAAC;QADxD,iBAAY,GAAZ,YAAY,CAAQ;QAE9C,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAED,gGAAgG;AAChG,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IACpC,QAAQ,GAAG,CAAU,CAAC;IAC/B,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,kBAAkB,CAAC,GAAY;IAC7C,IAAI,GAAG,YAAY,oBAAoB;QAAE,OAAO,iBAAiB,CAAC,SAAS,CAAC;IAC5E,IACE,GAAG,YAAY,kBAAkB;QACjC,GAAG,YAAY,cAAc;QAC7B,GAAG,YAAY,oBAAoB;QACnC,GAAG,YAAY,oBAAoB,EACnC,CAAC;QACD,OAAO,iBAAiB,CAAC,SAAS,CAAC;IACrC,CAAC;IACD,IAAI,GAAG,YAAY,gBAAgB,IAAI,GAAG,YAAY,oBAAoB;QAAE,OAAO,iBAAiB,CAAC,QAAQ,CAAC;IAC9G,OAAO,iBAAiB,CAAC,QAAQ,CAAC;AACpC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"experimenter-prompt.d.ts","sourceRoot":"","sources":["../../src/skill-test/experimenter-prompt.ts"],"names":[],"mappings":"AAAA,qBAAa,oBAAqB,SAAQ,KAAK;gBACjC,OAAO,EAAE,MAAM;CAI5B;AAED,MAAM,WAAW,kBAAkB;IACjC,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED;;;;;GAKG;AACH,eAAO,MAAM,2BAA2B,
|
|
1
|
+
{"version":3,"file":"experimenter-prompt.d.ts","sourceRoot":"","sources":["../../src/skill-test/experimenter-prompt.ts"],"names":[],"mappings":"AAAA,qBAAa,oBAAqB,SAAQ,KAAK;gBACjC,OAAO,EAAE,MAAM;CAI5B;AAED,MAAM,WAAW,kBAAkB;IACjC,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED;;;;;GAKG;AACH,eAAO,MAAM,2BAA2B,QAuB5B,CAAC;AAKb,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,kBAAkB,GAAG,MAAM,CAOxE;AAmBD,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAI3D"}
|
|
@@ -17,10 +17,18 @@ export const DEFAULT_EXPERIMENTER_PROMPT = [
|
|
|
17
17
|
' 1. Dispatch ONE executor subagent. Tell it ONLY the task prompt and the staged subject path {{SUBJECT_PATH}}.',
|
|
18
18
|
' Never tell the executor it is being tested.',
|
|
19
19
|
' 2. Grade the executor output against the eval\'s `expectations` using skill-creator\'s grader.md rubric.',
|
|
20
|
-
' 3. Append the
|
|
20
|
+
' 3. Append each graded expectation to the SINGLE top-level `expectations` array in {{GRADING_OUT}} IMMEDIATELY',
|
|
21
|
+
' (incremental flush — a mid-run kill must leave partial results).',
|
|
21
22
|
' 4. Record any packaging-fidelity friction to {{FRICTION_OUT}} using the vat friction schema.',
|
|
23
|
+
' If a file referenced by the skill is absent from the staged tree, record a `missing-bundled-file` friction entry.',
|
|
22
24
|
'',
|
|
23
|
-
'
|
|
25
|
+
'{{GRADING_OUT}} MUST be ONE flat JSON object in skill-creator\'s grading.json shape (references/schemas.md):',
|
|
26
|
+
'a top-level `expectations` array — one entry {"text","passed","evidence"} per expectation across ALL evals —',
|
|
27
|
+
'and a top-level `summary` {"passed","total"}. Do NOT wrap results in an `evals` array or any per-eval nesting;',
|
|
28
|
+
'vat reads the flat top-level shape and rejects anything else. Example:',
|
|
29
|
+
' {"expectations":[{"text":"...","passed":true,"evidence":"..."}],"summary":{"passed":1,"total":1}}',
|
|
30
|
+
'',
|
|
31
|
+
'When all evals are graded, write the final `summary` to {{GRADING_OUT}} and STOP.',
|
|
24
32
|
'',
|
|
25
33
|
'You are FORBIDDEN to: open a browser or viewer; run aggregation/optimizer scripts; wait for human feedback;',
|
|
26
34
|
'or iterate/improve the skill. This is a downstream packaging check, not an authoring loop.',
|
|
@@ -42,6 +50,14 @@ const REQUIRED_PATTERNS = [
|
|
|
42
50
|
{ test: /forbidden|do not|never/i, label: 'must forbid browser/aggregation/feedback/iteration' },
|
|
43
51
|
{ test: /browser|viewer/i, label: 'must explicitly forbid opening a browser/viewer' },
|
|
44
52
|
{ test: /increment/i, label: 'must emit incrementally' },
|
|
53
|
+
{
|
|
54
|
+
test: /top-level\s+`?expectations`?/i,
|
|
55
|
+
label: 'must pin grading.json to the flat top-level `expectations`/`summary` shape',
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
test: /`?evals`?\s+array|per-eval nesting/i,
|
|
59
|
+
label: 'must forbid wrapping grading results in an `evals` array',
|
|
60
|
+
},
|
|
45
61
|
];
|
|
46
62
|
export function assertPromptInvariants(prompt) {
|
|
47
63
|
for (const { test, label } of REQUIRED_PATTERNS) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"experimenter-prompt.js","sourceRoot":"","sources":["../../src/skill-test/experimenter-prompt.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IAC7C,YAAY,OAAe;QACzB,KAAK,CAAC,2CAA2C,OAAO,EAAE,CAAC,CAAC;QAC5D,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAUD;;;;;GAKG;AACH,MAAM,CAAC,MAAM,2BAA2B,GAAG;IACzC,yGAAyG;IACzG,EAAE;IACF,kCAAkC;IAClC,iHAAiH;IACjH,kDAAkD;IAClD,4GAA4G;IAC5G,
|
|
1
|
+
{"version":3,"file":"experimenter-prompt.js","sourceRoot":"","sources":["../../src/skill-test/experimenter-prompt.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IAC7C,YAAY,OAAe;QACzB,KAAK,CAAC,2CAA2C,OAAO,EAAE,CAAC,CAAC;QAC5D,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAUD;;;;;GAKG;AACH,MAAM,CAAC,MAAM,2BAA2B,GAAG;IACzC,yGAAyG;IACzG,EAAE;IACF,kCAAkC;IAClC,iHAAiH;IACjH,kDAAkD;IAClD,4GAA4G;IAC5G,iHAAiH;IACjH,uEAAuE;IACvE,gGAAgG;IAChG,wHAAwH;IACxH,EAAE;IACF,8GAA8G;IAC9G,8GAA8G;IAC9G,gHAAgH;IAChH,wEAAwE;IACxE,qGAAqG;IACrG,EAAE;IACF,mFAAmF;IACnF,EAAE;IACF,6GAA6G;IAC7G,4FAA4F;IAC5F,oBAAoB;CACrB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,cAAc,GAClB,sIAAsI,CAAC;AAEzI,MAAM,UAAU,uBAAuB,CAAC,IAAwB;IAC9D,OAAO,2BAA2B;SAC/B,OAAO,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC;SACzC,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,WAAW,CAAC;SAC7C,UAAU,CAAC,iBAAiB,EAAE,IAAI,CAAC,UAAU,CAAC;SAC9C,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,WAAW,CAAC;SAC7C,OAAO,CAAC,oBAAoB,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AACxE,CAAC;AAED,MAAM,iBAAiB,GAAsC;IAC3D,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,wCAAwC,EAAE;IACrE,EAAE,IAAI,EAAE,oCAAoC,EAAE,KAAK,EAAE,yBAAyB,EAAE;IAChF,EAAE,IAAI,EAAE,sCAAsC,EAAE,KAAK,EAAE,0BAA0B,EAAE;IACnF,EAAE,IAAI,EAAE,yBAAyB,EAAE,KAAK,EAAE,oDAAoD,EAAE;IAChG,EAAE,IAAI,EAAE,iBAAiB,EAAE,KAAK,EAAE,iDAAiD,EAAE;IACrF,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,yBAAyB,EAAE;IACxD;QACE,IAAI,EAAE,+BAA+B;QACrC,KAAK,EAAE,4EAA4E;KACpF;IACD;QACE,IAAI,EAAE,qCAAqC;QAC3C,KAAK,EAAE,0DAA0D;KAClE;CACF,CAAC;AAEF,MAAM,UAAU,sBAAsB,CAAC,MAAc;IACnD,KAAK,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,iBAAiB,EAAE,CAAC;QAChD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;YAAE,MAAM,IAAI,oBAAoB,CAAC,KAAK,CAAC,CAAC;IAChE,CAAC;AACH,CAAC"}
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
/**
|
|
1
|
+
/**
|
|
2
|
+
* Thrown when grading.json does not match the canonical skill-creator shape
|
|
3
|
+
* (see grading-schema.ts / skill-creator references/schemas.md). vat refuses to
|
|
4
|
+
* limp along on malformed grading data — a wrong shape silently flowing
|
|
5
|
+
* downstream causes confusing failures far from the real cause.
|
|
6
|
+
*/
|
|
2
7
|
export declare class GradingSkewError extends Error {
|
|
3
8
|
constructor(message: string);
|
|
4
9
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"grading-adapter.d.ts","sourceRoot":"","sources":["../../src/skill-test/grading-adapter.ts"],"names":[],"mappings":"AAEA
|
|
1
|
+
{"version":3,"file":"grading-adapter.d.ts","sourceRoot":"","sources":["../../src/skill-test/grading-adapter.ts"],"names":[],"mappings":"AAEA;;;;;GAKG;AACH,qBAAa,gBAAiB,SAAQ,KAAK;gBAC7B,OAAO,EAAE,MAAM;CAQ5B;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAC3C,YAAY,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;CACtE;AAaD,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,OAAO,GAAG,iBAAiB,CAuBhE"}
|
|
@@ -1,30 +1,37 @@
|
|
|
1
|
-
import {
|
|
2
|
-
/**
|
|
1
|
+
import { GradingReportSchema } from './grading-schema.js';
|
|
2
|
+
/**
|
|
3
|
+
* Thrown when grading.json does not match the canonical skill-creator shape
|
|
4
|
+
* (see grading-schema.ts / skill-creator references/schemas.md). vat refuses to
|
|
5
|
+
* limp along on malformed grading data — a wrong shape silently flowing
|
|
6
|
+
* downstream causes confusing failures far from the real cause.
|
|
7
|
+
*/
|
|
3
8
|
export class GradingSkewError extends Error {
|
|
4
9
|
constructor(message) {
|
|
5
|
-
super(`grading.json shape skew: ${message}.
|
|
10
|
+
super(`grading.json shape skew: ${message}. Expected skill-creator's grading.json shape ` +
|
|
11
|
+
'(a single flat object with top-level `expectations` and `summary`); see ' +
|
|
12
|
+
'docs/skill-test-grading-schema.md. Re-sync the vendored skill-creator / adopted shapes.');
|
|
6
13
|
this.name = 'GradingSkewError';
|
|
7
14
|
}
|
|
8
15
|
}
|
|
9
16
|
/**
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
17
|
+
* Detect the common per-eval nested mistake: `{ evals: [ { expectations, ... } ] }`
|
|
18
|
+
* with no top-level `expectations`. The grader (an LLM) reaches for this when the
|
|
19
|
+
* top-level shape is under-specified; we name it explicitly so the fix is obvious.
|
|
13
20
|
*/
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
text: z.string(),
|
|
21
|
-
passed: z.boolean(),
|
|
22
|
-
evidence: z.string().optional(),
|
|
23
|
-
}).passthrough()),
|
|
24
|
-
}).passthrough();
|
|
21
|
+
function looksPerEvalNested(raw) {
|
|
22
|
+
if (typeof raw !== 'object' || raw === null)
|
|
23
|
+
return false;
|
|
24
|
+
const obj = raw;
|
|
25
|
+
return Array.isArray(obj['evals']) && !('expectations' in obj);
|
|
26
|
+
}
|
|
25
27
|
export function parseGradingJson(raw) {
|
|
26
|
-
const result =
|
|
28
|
+
const result = GradingReportSchema.safeParse(raw);
|
|
27
29
|
if (!result.success) {
|
|
30
|
+
if (looksPerEvalNested(raw)) {
|
|
31
|
+
throw new GradingSkewError('top-level `expectations` is missing — results were nested under an `evals` array. ' +
|
|
32
|
+
'grading.json must be ONE flat object whose top-level `expectations` lists every ' +
|
|
33
|
+
'graded expectation across all evals');
|
|
34
|
+
}
|
|
28
35
|
const firstIssue = result.error.issues[0];
|
|
29
36
|
const path = firstIssue?.path.join('.') ?? '(root)';
|
|
30
37
|
throw new GradingSkewError(`missing/invalid field at "${path}" (${firstIssue?.message ?? 'unknown'})`);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"grading-adapter.js","sourceRoot":"","sources":["../../src/skill-test/grading-adapter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"grading-adapter.js","sourceRoot":"","sources":["../../src/skill-test/grading-adapter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAE1D;;;;;GAKG;AACH,MAAM,OAAO,gBAAiB,SAAQ,KAAK;IACzC,YAAY,OAAe;QACzB,KAAK,CACH,4BAA4B,OAAO,gDAAgD;YACjF,0EAA0E;YAC1E,yFAAyF,CAC5F,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,kBAAkB,CAAC;IACjC,CAAC;CACF;AAOD;;;;GAIG;AACH,SAAS,kBAAkB,CAAC,GAAY;IACtC,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,KAAK,IAAI;QAAE,OAAO,KAAK,CAAC;IAC1D,MAAM,GAAG,GAAG,GAA8B,CAAC;IAC3C,OAAO,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,cAAc,IAAI,GAAG,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,GAAY;IAC3C,MAAM,MAAM,GAAG,mBAAmB,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IAClD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,IAAI,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,IAAI,gBAAgB,CACxB,oFAAoF;gBAClF,kFAAkF;gBAClF,qCAAqC,CACxC,CAAC;QACJ,CAAC;QACD,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,IAAI,GAAG,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC;QACpD,MAAM,IAAI,gBAAgB,CAAC,6BAA6B,IAAI,MAAM,UAAU,EAAE,OAAO,IAAI,SAAS,GAAG,CAAC,CAAC;IACzG,CAAC;IACD,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC;IAC9C,OAAO;QACL,OAAO,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE;QACzD,YAAY,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACnC,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,MAAM,EAAE,CAAC,CAAC,MAAM;YAChB,GAAG,CAAC,CAAC,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC;SAC9D,CAAC,CAAC;KACJ,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
/**
|
|
3
|
+
* Canonical schema for the grader agent's `grading.json` output.
|
|
4
|
+
*
|
|
5
|
+
* SOURCE OF TRUTH: skill-creator's `references/schemas.md` (the `grading.json`
|
|
6
|
+
* section). vat consumes that exact shape; this module is the machine-checkable
|
|
7
|
+
* encoding of it (skill-creator ships prose + an example, but no JSON Schema).
|
|
8
|
+
* The published JSON Schema is {@link GradingReportJsonSchema}.
|
|
9
|
+
*
|
|
10
|
+
* SHAPE: a SINGLE flat JSON object with two load-bearing top-level fields —
|
|
11
|
+
*
|
|
12
|
+
* {
|
|
13
|
+
* "expectations": [ { "text": string, "passed": boolean, "evidence"?: string }, ... ],
|
|
14
|
+
* "summary": { "passed": number, "total": number, "failed"?: number, "pass_rate"?: number }
|
|
15
|
+
* }
|
|
16
|
+
*
|
|
17
|
+
* `expectations` holds ONE entry per graded expectation across ALL evals — it is
|
|
18
|
+
* NOT grouped per-eval and is NEVER wrapped in an `evals` array. A per-eval
|
|
19
|
+
* nested shape (`{ evals: [ { expectations, summary } ] }`) is a contract
|
|
20
|
+
* violation and is rejected loudly (see grading-adapter.ts); tolerating it would
|
|
21
|
+
* push malformed data downstream and create confusion.
|
|
22
|
+
*
|
|
23
|
+
* LIBERAL ON EXTRAS (Postel): the grader legitimately emits additional documented
|
|
24
|
+
* sections — `execution_metrics`, `timing`, `claims`, `user_notes_summary`,
|
|
25
|
+
* `eval_feedback` — plus viewer URLs and other adornments. We `.passthrough()`
|
|
26
|
+
* those: validate the two fields we depend on, carry the rest untouched. Extra
|
|
27
|
+
* fields are NOT "bad JSON"; a wrong top-level STRUCTURE is.
|
|
28
|
+
*/
|
|
29
|
+
/** One graded expectation. `evidence` is recommended but not load-bearing for vat. */
|
|
30
|
+
export declare const GradedExpectationSchema: z.ZodObject<{
|
|
31
|
+
text: z.ZodString;
|
|
32
|
+
passed: z.ZodBoolean;
|
|
33
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
34
|
+
}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
|
|
35
|
+
text: z.ZodString;
|
|
36
|
+
passed: z.ZodBoolean;
|
|
37
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
38
|
+
}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
|
|
39
|
+
text: z.ZodString;
|
|
40
|
+
passed: z.ZodBoolean;
|
|
41
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
42
|
+
}, z.ZodTypeAny, "passthrough">>;
|
|
43
|
+
export type GradedExpectation = z.infer<typeof GradedExpectationSchema>;
|
|
44
|
+
/** Aggregate pass/fail counts. `failed`/`pass_rate` are documented but optional. */
|
|
45
|
+
export declare const GradingSummarySchema: z.ZodObject<{
|
|
46
|
+
passed: z.ZodNumber;
|
|
47
|
+
total: z.ZodNumber;
|
|
48
|
+
failed: z.ZodOptional<z.ZodNumber>;
|
|
49
|
+
pass_rate: z.ZodOptional<z.ZodNumber>;
|
|
50
|
+
}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
|
|
51
|
+
passed: z.ZodNumber;
|
|
52
|
+
total: z.ZodNumber;
|
|
53
|
+
failed: z.ZodOptional<z.ZodNumber>;
|
|
54
|
+
pass_rate: z.ZodOptional<z.ZodNumber>;
|
|
55
|
+
}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
|
|
56
|
+
passed: z.ZodNumber;
|
|
57
|
+
total: z.ZodNumber;
|
|
58
|
+
failed: z.ZodOptional<z.ZodNumber>;
|
|
59
|
+
pass_rate: z.ZodOptional<z.ZodNumber>;
|
|
60
|
+
}, z.ZodTypeAny, "passthrough">>;
|
|
61
|
+
export type GradingSummary = z.infer<typeof GradingSummarySchema>;
|
|
62
|
+
/**
|
|
63
|
+
* The full grading.json contract. Required: top-level `expectations[]` and
|
|
64
|
+
* `summary`. Everything else passes through untouched (forward-compatible with
|
|
65
|
+
* skill-creator additions).
|
|
66
|
+
*/
|
|
67
|
+
export declare const GradingReportSchema: z.ZodObject<{
|
|
68
|
+
expectations: z.ZodArray<z.ZodObject<{
|
|
69
|
+
text: z.ZodString;
|
|
70
|
+
passed: z.ZodBoolean;
|
|
71
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
72
|
+
}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
|
|
73
|
+
text: z.ZodString;
|
|
74
|
+
passed: z.ZodBoolean;
|
|
75
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
76
|
+
}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
|
|
77
|
+
text: z.ZodString;
|
|
78
|
+
passed: z.ZodBoolean;
|
|
79
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
80
|
+
}, z.ZodTypeAny, "passthrough">>, "many">;
|
|
81
|
+
summary: z.ZodObject<{
|
|
82
|
+
passed: z.ZodNumber;
|
|
83
|
+
total: z.ZodNumber;
|
|
84
|
+
failed: z.ZodOptional<z.ZodNumber>;
|
|
85
|
+
pass_rate: z.ZodOptional<z.ZodNumber>;
|
|
86
|
+
}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
|
|
87
|
+
passed: z.ZodNumber;
|
|
88
|
+
total: z.ZodNumber;
|
|
89
|
+
failed: z.ZodOptional<z.ZodNumber>;
|
|
90
|
+
pass_rate: z.ZodOptional<z.ZodNumber>;
|
|
91
|
+
}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
|
|
92
|
+
passed: z.ZodNumber;
|
|
93
|
+
total: z.ZodNumber;
|
|
94
|
+
failed: z.ZodOptional<z.ZodNumber>;
|
|
95
|
+
pass_rate: z.ZodOptional<z.ZodNumber>;
|
|
96
|
+
}, z.ZodTypeAny, "passthrough">>;
|
|
97
|
+
}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
|
|
98
|
+
expectations: z.ZodArray<z.ZodObject<{
|
|
99
|
+
text: z.ZodString;
|
|
100
|
+
passed: z.ZodBoolean;
|
|
101
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
102
|
+
}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
|
|
103
|
+
text: z.ZodString;
|
|
104
|
+
passed: z.ZodBoolean;
|
|
105
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
106
|
+
}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
|
|
107
|
+
text: z.ZodString;
|
|
108
|
+
passed: z.ZodBoolean;
|
|
109
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
110
|
+
}, z.ZodTypeAny, "passthrough">>, "many">;
|
|
111
|
+
summary: z.ZodObject<{
|
|
112
|
+
passed: z.ZodNumber;
|
|
113
|
+
total: z.ZodNumber;
|
|
114
|
+
failed: z.ZodOptional<z.ZodNumber>;
|
|
115
|
+
pass_rate: z.ZodOptional<z.ZodNumber>;
|
|
116
|
+
}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
|
|
117
|
+
passed: z.ZodNumber;
|
|
118
|
+
total: z.ZodNumber;
|
|
119
|
+
failed: z.ZodOptional<z.ZodNumber>;
|
|
120
|
+
pass_rate: z.ZodOptional<z.ZodNumber>;
|
|
121
|
+
}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
|
|
122
|
+
passed: z.ZodNumber;
|
|
123
|
+
total: z.ZodNumber;
|
|
124
|
+
failed: z.ZodOptional<z.ZodNumber>;
|
|
125
|
+
pass_rate: z.ZodOptional<z.ZodNumber>;
|
|
126
|
+
}, z.ZodTypeAny, "passthrough">>;
|
|
127
|
+
}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
|
|
128
|
+
expectations: z.ZodArray<z.ZodObject<{
|
|
129
|
+
text: z.ZodString;
|
|
130
|
+
passed: z.ZodBoolean;
|
|
131
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
132
|
+
}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
|
|
133
|
+
text: z.ZodString;
|
|
134
|
+
passed: z.ZodBoolean;
|
|
135
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
136
|
+
}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
|
|
137
|
+
text: z.ZodString;
|
|
138
|
+
passed: z.ZodBoolean;
|
|
139
|
+
evidence: z.ZodOptional<z.ZodString>;
|
|
140
|
+
}, z.ZodTypeAny, "passthrough">>, "many">;
|
|
141
|
+
summary: z.ZodObject<{
|
|
142
|
+
passed: z.ZodNumber;
|
|
143
|
+
total: z.ZodNumber;
|
|
144
|
+
failed: z.ZodOptional<z.ZodNumber>;
|
|
145
|
+
pass_rate: z.ZodOptional<z.ZodNumber>;
|
|
146
|
+
}, "passthrough", z.ZodTypeAny, z.objectOutputType<{
|
|
147
|
+
passed: z.ZodNumber;
|
|
148
|
+
total: z.ZodNumber;
|
|
149
|
+
failed: z.ZodOptional<z.ZodNumber>;
|
|
150
|
+
pass_rate: z.ZodOptional<z.ZodNumber>;
|
|
151
|
+
}, z.ZodTypeAny, "passthrough">, z.objectInputType<{
|
|
152
|
+
passed: z.ZodNumber;
|
|
153
|
+
total: z.ZodNumber;
|
|
154
|
+
failed: z.ZodOptional<z.ZodNumber>;
|
|
155
|
+
pass_rate: z.ZodOptional<z.ZodNumber>;
|
|
156
|
+
}, z.ZodTypeAny, "passthrough">>;
|
|
157
|
+
}, z.ZodTypeAny, "passthrough">>;
|
|
158
|
+
export type GradingReport = z.infer<typeof GradingReportSchema>;
|
|
159
|
+
/**
|
|
160
|
+
* Published JSON Schema for grading.json — generated from {@link GradingReportSchema}
|
|
161
|
+
* so the two never drift. Importable by external tooling that wants to validate a
|
|
162
|
+
* grading.json without depending on Zod. Documented in
|
|
163
|
+
* docs/skill-test-grading-schema.md.
|
|
164
|
+
*/
|
|
165
|
+
export declare const GradingReportJsonSchema: import("zod-to-json-schema").JsonSchema7Type & {
|
|
166
|
+
$schema?: string | undefined;
|
|
167
|
+
definitions?: {
|
|
168
|
+
[key: string]: import("zod-to-json-schema").JsonSchema7Type;
|
|
169
|
+
} | undefined;
|
|
170
|
+
};
|
|
171
|
+
//# sourceMappingURL=grading-schema.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"grading-schema.d.ts","sourceRoot":"","sources":["../../src/skill-test/grading-schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,sFAAsF;AACtF,eAAO,MAAM,uBAAuB;;;;;;;;;;;;gCAMpB,CAAC;AAEjB,MAAM,MAAM,iBAAiB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,uBAAuB,CAAC,CAAC;AAExE,oFAAoF;AACpF,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;gCAOjB,CAAC;AAEjB,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAElE;;;;GAIG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gCAKhB,CAAC;AAEjB,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEhE;;;;;GAKG;AACH,eAAO,MAAM,uBAAuB;;;;;CAAyD,CAAC"}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { zodToJsonSchema } from 'zod-to-json-schema';
|
|
3
|
+
/**
|
|
4
|
+
* Canonical schema for the grader agent's `grading.json` output.
|
|
5
|
+
*
|
|
6
|
+
* SOURCE OF TRUTH: skill-creator's `references/schemas.md` (the `grading.json`
|
|
7
|
+
* section). vat consumes that exact shape; this module is the machine-checkable
|
|
8
|
+
* encoding of it (skill-creator ships prose + an example, but no JSON Schema).
|
|
9
|
+
* The published JSON Schema is {@link GradingReportJsonSchema}.
|
|
10
|
+
*
|
|
11
|
+
* SHAPE: a SINGLE flat JSON object with two load-bearing top-level fields —
|
|
12
|
+
*
|
|
13
|
+
* {
|
|
14
|
+
* "expectations": [ { "text": string, "passed": boolean, "evidence"?: string }, ... ],
|
|
15
|
+
* "summary": { "passed": number, "total": number, "failed"?: number, "pass_rate"?: number }
|
|
16
|
+
* }
|
|
17
|
+
*
|
|
18
|
+
* `expectations` holds ONE entry per graded expectation across ALL evals — it is
|
|
19
|
+
* NOT grouped per-eval and is NEVER wrapped in an `evals` array. A per-eval
|
|
20
|
+
* nested shape (`{ evals: [ { expectations, summary } ] }`) is a contract
|
|
21
|
+
* violation and is rejected loudly (see grading-adapter.ts); tolerating it would
|
|
22
|
+
* push malformed data downstream and create confusion.
|
|
23
|
+
*
|
|
24
|
+
* LIBERAL ON EXTRAS (Postel): the grader legitimately emits additional documented
|
|
25
|
+
* sections — `execution_metrics`, `timing`, `claims`, `user_notes_summary`,
|
|
26
|
+
* `eval_feedback` — plus viewer URLs and other adornments. We `.passthrough()`
|
|
27
|
+
* those: validate the two fields we depend on, carry the rest untouched. Extra
|
|
28
|
+
* fields are NOT "bad JSON"; a wrong top-level STRUCTURE is.
|
|
29
|
+
*/
|
|
30
|
+
/** One graded expectation. `evidence` is recommended but not load-bearing for vat. */
|
|
31
|
+
export const GradedExpectationSchema = z
|
|
32
|
+
.object({
|
|
33
|
+
text: z.string(),
|
|
34
|
+
passed: z.boolean(),
|
|
35
|
+
evidence: z.string().optional(),
|
|
36
|
+
})
|
|
37
|
+
.passthrough();
|
|
38
|
+
/** Aggregate pass/fail counts. `failed`/`pass_rate` are documented but optional. */
|
|
39
|
+
export const GradingSummarySchema = z
|
|
40
|
+
.object({
|
|
41
|
+
passed: z.number(),
|
|
42
|
+
total: z.number(),
|
|
43
|
+
failed: z.number().optional(),
|
|
44
|
+
pass_rate: z.number().optional(),
|
|
45
|
+
})
|
|
46
|
+
.passthrough();
|
|
47
|
+
/**
|
|
48
|
+
* The full grading.json contract. Required: top-level `expectations[]` and
|
|
49
|
+
* `summary`. Everything else passes through untouched (forward-compatible with
|
|
50
|
+
* skill-creator additions).
|
|
51
|
+
*/
|
|
52
|
+
export const GradingReportSchema = z
|
|
53
|
+
.object({
|
|
54
|
+
expectations: z.array(GradedExpectationSchema),
|
|
55
|
+
summary: GradingSummarySchema,
|
|
56
|
+
})
|
|
57
|
+
.passthrough();
|
|
58
|
+
/**
|
|
59
|
+
* Published JSON Schema for grading.json — generated from {@link GradingReportSchema}
|
|
60
|
+
* so the two never drift. Importable by external tooling that wants to validate a
|
|
61
|
+
* grading.json without depending on Zod. Documented in
|
|
62
|
+
* docs/skill-test-grading-schema.md.
|
|
63
|
+
*/
|
|
64
|
+
export const GradingReportJsonSchema = zodToJsonSchema(GradingReportSchema, 'grading-report');
|
|
65
|
+
//# sourceMappingURL=grading-schema.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"grading-schema.js","sourceRoot":"","sources":["../../src/skill-test/grading-schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAErD;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,sFAAsF;AACtF,MAAM,CAAC,MAAM,uBAAuB,GAAG,CAAC;KACrC,MAAM,CAAC;IACN,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE;IAChB,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE;IACnB,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CAChC,CAAC;KACD,WAAW,EAAE,CAAC;AAIjB,oFAAoF;AACpF,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAAC;KAClC,MAAM,CAAC;IACN,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE;IAClB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC7B,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CACjC,CAAC;KACD,WAAW,EAAE,CAAC;AAIjB;;;;GAIG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG,CAAC;KACjC,MAAM,CAAC;IACN,YAAY,EAAE,CAAC,CAAC,KAAK,CAAC,uBAAuB,CAAC;IAC9C,OAAO,EAAE,oBAAoB;CAC9B,CAAC;KACD,WAAW,EAAE,CAAC;AAIjB;;;;;GAKG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,eAAe,CAAC,mBAAmB,EAAE,gBAAgB,CAAC,CAAC"}
|
|
@@ -17,6 +17,19 @@ export declare function resolveHarnessRoot(skillNames: string[], tmpRoot?: strin
|
|
|
17
17
|
* --setting-sources "".
|
|
18
18
|
*/
|
|
19
19
|
export declare function assertSafeWorkdir(dir: string): void;
|
|
20
|
+
/**
|
|
21
|
+
* Prepare the harness root directory so that `assertSafeHarnessRoot` will
|
|
22
|
+
* pass on the next call. If the path does not exist, this is a no-op (the
|
|
23
|
+
* caller creates it at 0700 via mkdirSyncReal). If it exists:
|
|
24
|
+
*
|
|
25
|
+
* - Symlink → throw HarnessLocationError (security gate; never relax).
|
|
26
|
+
* - Real directory whose mode != 0700 → chmod to 0700. Removing group/other
|
|
27
|
+
* access is strictly safer, never a relaxation.
|
|
28
|
+
*
|
|
29
|
+
* Mode checks/changes are only performed on non-win32 (matching
|
|
30
|
+
* assertSafeHarnessRoot's platform guard).
|
|
31
|
+
*/
|
|
32
|
+
export declare function prepareHarnessRoot(dir: string): void;
|
|
20
33
|
/**
|
|
21
34
|
* FS-bound hardening for the shared-tmp harness root (spec §7): the root must
|
|
22
35
|
* be 0700 and owned by the current uid, and no path component may be a symlink.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"harness-location.d.ts","sourceRoot":"","sources":["../../src/skill-test/harness-location.ts"],"names":[],"mappings":"AAKA,sDAAsD;AACtD,qBAAa,oBAAqB,SAAQ,KAAK;IAC7C,QAAQ,CAAC,QAAQ,EAAG,CAAC,CAAU;gBACnB,OAAO,EAAE,MAAM;CAI5B;AAOD;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,MAAM,EAAE,GAAG,MAAM,CAQ7D;AAED,mEAAmE;AACnE,wBAAgB,kBAAkB,CAAC,UAAU,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,CAGjF;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAenD;AAED;;;;;GAKG;AACH,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI,CAiB3E"}
|
|
1
|
+
{"version":3,"file":"harness-location.d.ts","sourceRoot":"","sources":["../../src/skill-test/harness-location.ts"],"names":[],"mappings":"AAKA,sDAAsD;AACtD,qBAAa,oBAAqB,SAAQ,KAAK;IAC7C,QAAQ,CAAC,QAAQ,EAAG,CAAC,CAAU;gBACnB,OAAO,EAAE,MAAM;CAI5B;AAOD;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,MAAM,EAAE,GAAG,MAAM,CAQ7D;AAED,mEAAmE;AACnE,wBAAgB,kBAAkB,CAAC,UAAU,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,CAGjF;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAenD;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAkBpD;AAED;;;;;GAKG;AACH,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI,CAiB3E"}
|