@vibe-agent-toolkit/agent-skills 0.1.39-rc.7 → 0.1.39-rc.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/dist/skill-test/build-hook.d.ts +58 -0
  2. package/dist/skill-test/build-hook.d.ts.map +1 -0
  3. package/dist/skill-test/build-hook.js +63 -0
  4. package/dist/skill-test/build-hook.js.map +1 -0
  5. package/dist/skill-test/exit-codes.d.ts +3 -2
  6. package/dist/skill-test/exit-codes.d.ts.map +1 -1
  7. package/dist/skill-test/exit-codes.js +5 -2
  8. package/dist/skill-test/exit-codes.js.map +1 -1
  9. package/dist/skill-test/experimenter-prompt.d.ts.map +1 -1
  10. package/dist/skill-test/experimenter-prompt.js +18 -2
  11. package/dist/skill-test/experimenter-prompt.js.map +1 -1
  12. package/dist/skill-test/grading-adapter.d.ts +6 -1
  13. package/dist/skill-test/grading-adapter.d.ts.map +1 -1
  14. package/dist/skill-test/grading-adapter.js +25 -18
  15. package/dist/skill-test/grading-adapter.js.map +1 -1
  16. package/dist/skill-test/grading-schema.d.ts +171 -0
  17. package/dist/skill-test/grading-schema.d.ts.map +1 -0
  18. package/dist/skill-test/grading-schema.js +65 -0
  19. package/dist/skill-test/grading-schema.js.map +1 -0
  20. package/dist/skill-test/harness-location.d.ts +13 -0
  21. package/dist/skill-test/harness-location.d.ts.map +1 -1
  22. package/dist/skill-test/harness-location.js +31 -1
  23. package/dist/skill-test/harness-location.js.map +1 -1
  24. package/dist/skill-test/index.d.ts +1 -0
  25. package/dist/skill-test/index.d.ts.map +1 -1
  26. package/dist/skill-test/index.js +1 -0
  27. package/dist/skill-test/index.js.map +1 -1
  28. package/dist/skill-test/lock.js +1 -1
  29. package/dist/skill-test/lock.js.map +1 -1
  30. package/dist/skill-test/plugin-env.d.ts +20 -0
  31. package/dist/skill-test/plugin-env.d.ts.map +1 -0
  32. package/dist/skill-test/plugin-env.js +24 -0
  33. package/dist/skill-test/plugin-env.js.map +1 -0
  34. package/dist/skill-test/plugin-layout.d.ts +41 -0
  35. package/dist/skill-test/plugin-layout.d.ts.map +1 -0
  36. package/dist/skill-test/plugin-layout.js +49 -0
  37. package/dist/skill-test/plugin-layout.js.map +1 -0
  38. package/dist/skill-test/run-harness.d.ts +12 -0
  39. package/dist/skill-test/run-harness.d.ts.map +1 -1
  40. package/dist/skill-test/run-harness.js +60 -14
  41. package/dist/skill-test/run-harness.js.map +1 -1
  42. package/dist/skill-test/staging.d.ts +20 -1
  43. package/dist/skill-test/staging.d.ts.map +1 -1
  44. package/dist/skill-test/staging.js +56 -13
  45. package/dist/skill-test/staging.js.map +1 -1
  46. package/package.json +5 -5
@@ -0,0 +1,58 @@
1
+ /**
2
+ * build-hook.ts — optional pre-stage build step for `vat skill test run`.
3
+ *
4
+ * When the test config includes a `build:` field, this module runs that shell
5
+ * command ONCE before staging so that generated artifacts (e.g. bundled scripts
6
+ * not committed to source) are present in the source tree for staging to copy.
7
+ *
8
+ * The command runs with cwd = the CONFIG ROOT (the directory containing
9
+ * vibe-agent-toolkit.config.yaml), because real build commands are root-level
10
+ * package scripts (e.g. `pnpm bundle:report`).
11
+ *
12
+ * Security note: the `build:` field is a developer-authored value from the
13
+ * project's own vibe-agent-toolkit.config.yaml — a trusted source under the
14
+ * adopter's source control. The command is passed directly to the OS shell
15
+ * (shell: true) intentionally, because build commands frequently include shell
16
+ * syntax (npm script chaining, env vars, etc.). This is equivalent to running
17
+ * `npm run build` or `pnpm bundle:report` from the terminal; it is NOT arbitrary
18
+ * user input. The adopter is already executing skill code via this command
19
+ * (`vat skill test run` requires --i-understand-this-runs-skill-code).
20
+ */
21
+ /**
22
+ * Options for the pre-stage build hook.
23
+ *
24
+ * `spawnFn` is injectable for unit testing — production code uses the default
25
+ * (node:child_process spawnSync). Tests inject a vi.fn() mock.
26
+ */
27
+ export interface BuildHookOptions {
28
+ /** Shell command to run (from `test.build` in vibe-agent-toolkit.config.yaml). */
29
+ buildCommand: string | undefined;
30
+ /** Absolute path to the config root (cwd for the build command). */
31
+ configRoot: string;
32
+ /**
33
+ * Injectable spawn function for unit testing.
34
+ * Defaults to node:child_process spawnSync when not provided.
35
+ */
36
+ spawnFn?: (cmd: string, opts: {
37
+ shell: boolean;
38
+ cwd: string;
39
+ stdio: 'inherit';
40
+ }) => {
41
+ status: number | null;
42
+ };
43
+ }
44
+ /** Thrown when the pre-stage build command exits with a non-zero code. Maps to preflight (exit 2). */
45
+ export declare class BuildHookError extends Error {
46
+ readonly buildExitCode: number;
47
+ readonly exitCode: 2;
48
+ constructor(message: string, buildExitCode: number);
49
+ }
50
+ /**
51
+ * Run the pre-stage build hook if configured.
52
+ *
53
+ * Runs `buildCommand` in a shell with `cwd = configRoot`. On non-zero exit,
54
+ * throws `BuildHookError` with a clear message naming the command and exit code.
55
+ * When `buildCommand` is undefined, this is a no-op (behavior unchanged).
56
+ */
57
+ export declare function runPreStageBuild(opts: BuildHookOptions): void;
58
+ //# sourceMappingURL=build-hook.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"build-hook.d.ts","sourceRoot":"","sources":["../../src/skill-test/build-hook.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAIH;;;;;GAKG;AACH,MAAM,WAAW,gBAAgB;IAC/B,kFAAkF;IAClF,YAAY,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC,oEAAoE;IACpE,UAAU,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,IAAI,EAAE;QAAE,KAAK,EAAE,OAAO,CAAC;QAAC,GAAG,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,SAAS,CAAA;KAAE,KAAK;QAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAA;KAAE,CAAC;CAC/G;AAED,sGAAsG;AACtG,qBAAa,cAAe,SAAQ,KAAK;aAEM,aAAa,EAAE,MAAM;IADlE,QAAQ,CAAC,QAAQ,EAAG,CAAC,CAAU;gBACnB,OAAO,EAAE,MAAM,EAAkB,aAAa,EAAE,MAAM;CAInE;AAeD;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,gBAAgB,GAAG,IAAI,CAe7D"}
@@ -0,0 +1,63 @@
1
+ /**
2
+ * build-hook.ts — optional pre-stage build step for `vat skill test run`.
3
+ *
4
+ * When the test config includes a `build:` field, this module runs that shell
5
+ * command ONCE before staging so that generated artifacts (e.g. bundled scripts
6
+ * not committed to source) are present in the source tree for staging to copy.
7
+ *
8
+ * The command runs with cwd = the CONFIG ROOT (the directory containing
9
+ * vibe-agent-toolkit.config.yaml), because real build commands are root-level
10
+ * package scripts (e.g. `pnpm bundle:report`).
11
+ *
12
+ * Security note: the `build:` field is a developer-authored value from the
13
+ * project's own vibe-agent-toolkit.config.yaml — a trusted source under the
14
+ * adopter's source control. The command is passed directly to the OS shell
15
+ * (shell: true) intentionally, because build commands frequently include shell
16
+ * syntax (npm script chaining, env vars, etc.). This is equivalent to running
17
+ * `npm run build` or `pnpm bundle:report` from the terminal; it is NOT arbitrary
18
+ * user input. The adopter is already executing skill code via this command
19
+ * (`vat skill test run` requires --i-understand-this-runs-skill-code).
20
+ */
21
+ import { spawnSync } from 'node:child_process';
22
+ /** Thrown when the pre-stage build command exits with a non-zero code. Maps to preflight (exit 2). */
23
+ export class BuildHookError extends Error {
24
+ buildExitCode;
25
+ exitCode = 2;
26
+ constructor(message, buildExitCode) {
27
+ super(message);
28
+ this.buildExitCode = buildExitCode;
29
+ this.name = 'BuildHookError';
30
+ }
31
+ }
32
+ /**
33
+ * Default spawn implementation: runs the command in a shell with stdio inherited.
34
+ *
35
+ * `build:` is a developer-authored shell command from the adopter's own config
36
+ * (vibe-agent-toolkit.config.yaml), equivalent to running `pnpm bundle:report` at the
37
+ * terminal. It is NOT arbitrary user input. The adopter already acknowledges running
38
+ * skill code via --i-understand-this-runs-skill-code.
39
+ */
40
+ function defaultSpawn(cmd, opts) {
41
+ // eslint-disable-next-line sonarjs/os-command -- developer-authored build command from trusted project config; equivalent to running pnpm/npm build manually
42
+ return spawnSync(cmd, { ...opts, shell: true });
43
+ }
44
+ /**
45
+ * Run the pre-stage build hook if configured.
46
+ *
47
+ * Runs `buildCommand` in a shell with `cwd = configRoot`. On non-zero exit,
48
+ * throws `BuildHookError` with a clear message naming the command and exit code.
49
+ * When `buildCommand` is undefined, this is a no-op (behavior unchanged).
50
+ */
51
+ export function runPreStageBuild(opts) {
52
+ const { buildCommand, configRoot } = opts;
53
+ if (buildCommand === undefined)
54
+ return;
55
+ const spawn = opts.spawnFn ?? defaultSpawn;
56
+ const result = spawn(buildCommand, { shell: true, cwd: configRoot, stdio: 'inherit' });
57
+ const status = result.status ?? -1;
58
+ if (status !== 0) {
59
+ throw new BuildHookError(`Pre-stage build hook failed: command "${buildCommand}" exited with code ${status}. ` +
60
+ `Resolve the build error before running vat skill test run.`, status);
61
+ }
62
+ }
63
+ //# sourceMappingURL=build-hook.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"build-hook.js","sourceRoot":"","sources":["../../src/skill-test/build-hook.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAoB/C,sGAAsG;AACtG,MAAM,OAAO,cAAe,SAAQ,KAAK;IAEM;IADpC,QAAQ,GAAG,CAAU,CAAC;IAC/B,YAAY,OAAe,EAAkB,aAAqB;QAChE,KAAK,CAAC,OAAO,CAAC,CAAC;QAD4B,kBAAa,GAAb,aAAa,CAAQ;QAEhE,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAC;IAC/B,CAAC;CACF;AAED;;;;;;;GAOG;AACH,SAAS,YAAY,CAAC,GAAW,EAAE,IAAuD;IACxF,6JAA6J;IAC7J,OAAO,SAAS,CAAC,GAAG,EAAE,EAAE,GAAG,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;AAClD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAAC,IAAsB;IACrD,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,GAAG,IAAI,CAAC;IAC1C,IAAI,YAAY,KAAK,SAAS;QAAE,OAAO;IAEvC,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,IAAI,YAAY,CAAC;IAC3C,MAAM,MAAM,GAAG,KAAK,CAAC,YAAY,EAAE,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;IACvF,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC;IAEnC,IAAI,MAAM,KAAK,CAAC,EAAE,CAAC;QACjB,MAAM,IAAI,cAAc,CACtB,yCAAyC,YAAY,sBAAsB,MAAM,IAAI;YACnF,4DAA4D,EAC9D,MAAM,CACP,CAAC;IACJ,CAAC;AACH,CAAC"}
@@ -25,8 +25,9 @@ export declare class InternalHarnessError extends Error {
25
25
  * Map any thrown error to the process exit code. Errors that carry their own
26
26
  * `exitCode` (Bootstrap/Auth/HarnessLocation/Internal) are authoritative;
27
27
  * a PromptInvariantError is a user-correctable preflight problem (a supplied
28
- * prompt override is missing a required safety instruction) → 2; GradingSkewError
29
- * is a parse failure → 1; everything unknown → 1.
28
+ * prompt override is missing a required safety instruction) → 2; a BuildHookError
29
+ * is a pre-stage build failure → 2; GradingSkewError is a parse failure → 1;
30
+ * everything unknown → 1.
30
31
  */
31
32
  export declare function mapErrorToExitCode(err: unknown): number;
32
33
  //# sourceMappingURL=exit-codes.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"exit-codes.d.ts","sourceRoot":"","sources":["../../src/skill-test/exit-codes.ts"],"names":[],"mappings":"AAMA,kDAAkD;AAClD,eAAO,MAAM,iBAAiB;;;;;CAKpB,CAAC;AAEX,MAAM,MAAM,sBAAsB,GAAG,CAAC,OAAO,iBAAiB,CAAC,CAAC,MAAM,OAAO,iBAAiB,CAAC,CAAC;AAEhG;;;;GAIG;AACH,qBAAa,oBAAqB,SAAQ,KAAK;aAEjB,YAAY,EAAE,MAAM;IADhD,QAAQ,CAAC,QAAQ,EAAG,CAAC,CAAU;gBACH,YAAY,EAAE,MAAM;CAIjD;AAED,gGAAgG;AAChG,qBAAa,oBAAqB,SAAQ,KAAK;IAC7C,QAAQ,CAAC,QAAQ,EAAG,CAAC,CAAU;gBACnB,OAAO,EAAE,MAAM;CAI5B;AAED;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,OAAO,GAAG,MAAM,CAWvD"}
1
+ {"version":3,"file":"exit-codes.d.ts","sourceRoot":"","sources":["../../src/skill-test/exit-codes.ts"],"names":[],"mappings":"AAOA,kDAAkD;AAClD,eAAO,MAAM,iBAAiB;;;;;CAKpB,CAAC;AAEX,MAAM,MAAM,sBAAsB,GAAG,CAAC,OAAO,iBAAiB,CAAC,CAAC,MAAM,OAAO,iBAAiB,CAAC,CAAC;AAEhG;;;;GAIG;AACH,qBAAa,oBAAqB,SAAQ,KAAK;aAEjB,YAAY,EAAE,MAAM;IADhD,QAAQ,CAAC,QAAQ,EAAG,CAAC,CAAU;gBACH,YAAY,EAAE,MAAM;CAIjD;AAED,gGAAgG;AAChG,qBAAa,oBAAqB,SAAQ,KAAK;IAC7C,QAAQ,CAAC,QAAQ,EAAG,CAAC,CAAU;gBACnB,OAAO,EAAE,MAAM;CAI5B;AAED;;;;;;;GAOG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,OAAO,GAAG,MAAM,CAYvD"}
@@ -1,4 +1,5 @@
1
1
  import { AuthPreflightError } from '@vibe-agent-toolkit/utils';
2
+ import { BuildHookError } from './build-hook.js';
2
3
  import { PromptInvariantError } from './experimenter-prompt.js';
3
4
  import { GradingSkewError } from './grading-adapter.js';
4
5
  import { HarnessLocationError } from './harness-location.js';
@@ -35,13 +36,15 @@ export class InternalHarnessError extends Error {
35
36
  * Map any thrown error to the process exit code. Errors that carry their own
36
37
  * `exitCode` (Bootstrap/Auth/HarnessLocation/Internal) are authoritative;
37
38
  * a PromptInvariantError is a user-correctable preflight problem (a supplied
38
- * prompt override is missing a required safety instruction) → 2; GradingSkewError
39
- * is a parse failure → 1; everything unknown → 1.
39
+ * prompt override is missing a required safety instruction) → 2; a BuildHookError
40
+ * is a pre-stage build failure → 2; GradingSkewError is a parse failure → 1;
41
+ * everything unknown → 1.
40
42
  */
41
43
  export function mapErrorToExitCode(err) {
42
44
  if (err instanceof BootstrapNeededError)
43
45
  return SkillTestExitCode.Bootstrap;
44
46
  if (err instanceof AuthPreflightError ||
47
+ err instanceof BuildHookError ||
45
48
  err instanceof HarnessLocationError ||
46
49
  err instanceof PromptInvariantError) {
47
50
  return SkillTestExitCode.Preflight;
@@ -1 +1 @@
1
- {"version":3,"file":"exit-codes.js","sourceRoot":"","sources":["../../src/skill-test/exit-codes.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,EAAE,oBAAoB,EAAE,MAAM,0BAA0B,CAAC;AAChE,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAE7D,kDAAkD;AAClD,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,EAAE,EAAE,CAAC;IACL,QAAQ,EAAE,CAAC;IACX,SAAS,EAAE,CAAC;IACZ,SAAS,EAAE,CAAC;CACJ,CAAC;AAIX;;;;GAIG;AACH,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IAEjB;IADnB,QAAQ,GAAG,CAAU,CAAC;IAC/B,YAA4B,YAAoB;QAC9C,KAAK,CAAC,mCAAmC,YAAY,2BAA2B,CAAC,CAAC;QADxD,iBAAY,GAAZ,YAAY,CAAQ;QAE9C,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAED,gGAAgG;AAChG,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IACpC,QAAQ,GAAG,CAAU,CAAC;IAC/B,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAED;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAAC,GAAY;IAC7C,IAAI,GAAG,YAAY,oBAAoB;QAAE,OAAO,iBAAiB,CAAC,SAAS,CAAC;IAC5E,IACE,GAAG,YAAY,kBAAkB;QACjC,GAAG,YAAY,oBAAoB;QACnC,GAAG,YAAY,oBAAoB,EACnC,CAAC;QACD,OAAO,iBAAiB,CAAC,SAAS,CAAC;IACrC,CAAC;IACD,IAAI,GAAG,YAAY,gBAAgB,IAAI,GAAG,YAAY,oBAAoB;QAAE,OAAO,iBAAiB,CAAC,QAAQ,CAAC;IAC9G,OAAO,iBAAiB,CAAC,QAAQ,CAAC;AACpC,CAAC"}
1
+ {"version":3,"file":"exit-codes.js","sourceRoot":"","sources":["../../src/skill-test/exit-codes.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AACjD,OAAO,EAAE,oBAAoB,EAAE,MAAM,0BAA0B,CAAC;AAChE,OAAO,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,uBAAuB,CAAC;AAE7D,kDAAkD;AAClD,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,EAAE,EAAE,CAAC;IACL,QAAQ,EAAE,CAAC;IACX,SAAS,EAAE,CAAC;IACZ,SAAS,EAAE,CAAC;CACJ,CAAC;AAIX;;;;GAIG;AACH,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IAEjB;IADnB,QAAQ,GAAG,CAAU,CAAC;IAC/B,YAA4B,YAAoB;QAC9C,KAAK,CAAC,mCAAmC,YAAY,2BAA2B,CAAC,CAAC;QADxD,iBAAY,GAAZ,YAAY,CAAQ;QAE9C,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAED,gGAAgG;AAChG,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IACpC,QAAQ,GAAG,CAAU,CAAC;IAC/B,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,kBAAkB,CAAC,GAAY;IAC7C,IAAI,GAAG,YAAY,oBAAoB;QAAE,OAAO,iBAAiB,CAAC,SAAS,CAAC;IAC5E,IACE,GAAG,YAAY,kBAAkB;QACjC,GAAG,YAAY,cAAc;QAC7B,GAAG,YAAY,oBAAoB;QACnC,GAAG,YAAY,oBAAoB,EACnC,CAAC;QACD,OAAO,iBAAiB,CAAC,SAAS,CAAC;IACrC,CAAC;IACD,IAAI,GAAG,YAAY,gBAAgB,IAAI,GAAG,YAAY,oBAAoB;QAAE,OAAO,iBAAiB,CAAC,QAAQ,CAAC;IAC9G,OAAO,iBAAiB,CAAC,QAAQ,CAAC;AACpC,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"experimenter-prompt.d.ts","sourceRoot":"","sources":["../../src/skill-test/experimenter-prompt.ts"],"names":[],"mappings":"AAAA,qBAAa,oBAAqB,SAAQ,KAAK;gBACjC,OAAO,EAAE,MAAM;CAI5B;AAED,MAAM,WAAW,kBAAkB;IACjC,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED;;;;;GAKG;AACH,eAAO,MAAM,2BAA2B,QAe5B,CAAC;AAKb,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,kBAAkB,GAAG,MAAM,CAOxE;AAWD,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAI3D"}
1
+ {"version":3,"file":"experimenter-prompt.d.ts","sourceRoot":"","sources":["../../src/skill-test/experimenter-prompt.ts"],"names":[],"mappings":"AAAA,qBAAa,oBAAqB,SAAQ,KAAK;gBACjC,OAAO,EAAE,MAAM;CAI5B;AAED,MAAM,WAAW,kBAAkB;IACjC,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,OAAO,CAAC;CACnB;AAED;;;;;GAKG;AACH,eAAO,MAAM,2BAA2B,QAuB5B,CAAC;AAKb,wBAAgB,uBAAuB,CAAC,IAAI,EAAE,kBAAkB,GAAG,MAAM,CAOxE;AAmBD,wBAAgB,sBAAsB,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI,CAI3D"}
@@ -17,10 +17,18 @@ export const DEFAULT_EXPERIMENTER_PROMPT = [
17
17
  ' 1. Dispatch ONE executor subagent. Tell it ONLY the task prompt and the staged subject path {{SUBJECT_PATH}}.',
18
18
  ' Never tell the executor it is being tested.',
19
19
  ' 2. Grade the executor output against the eval\'s `expectations` using skill-creator\'s grader.md rubric.',
20
- ' 3. Append the per-eval result to {{GRADING_OUT}} IMMEDIATELY (incremental flush — a mid-run kill must leave partial results).',
20
+ ' 3. Append each graded expectation to the SINGLE top-level `expectations` array in {{GRADING_OUT}} IMMEDIATELY',
21
+ ' (incremental flush — a mid-run kill must leave partial results).',
21
22
  ' 4. Record any packaging-fidelity friction to {{FRICTION_OUT}} using the vat friction schema.',
23
+ ' If a file referenced by the skill is absent from the staged tree, record a `missing-bundled-file` friction entry.',
22
24
  '',
23
- 'When all evals are graded, write the final summary to {{GRADING_OUT}} and STOP.',
25
+ '{{GRADING_OUT}} MUST be ONE flat JSON object in skill-creator\'s grading.json shape (references/schemas.md):',
26
+ 'a top-level `expectations` array — one entry {"text","passed","evidence"} per expectation across ALL evals —',
27
+ 'and a top-level `summary` {"passed","total"}. Do NOT wrap results in an `evals` array or any per-eval nesting;',
28
+ 'vat reads the flat top-level shape and rejects anything else. Example:',
29
+ ' {"expectations":[{"text":"...","passed":true,"evidence":"..."}],"summary":{"passed":1,"total":1}}',
30
+ '',
31
+ 'When all evals are graded, write the final `summary` to {{GRADING_OUT}} and STOP.',
24
32
  '',
25
33
  'You are FORBIDDEN to: open a browser or viewer; run aggregation/optimizer scripts; wait for human feedback;',
26
34
  'or iterate/improve the skill. This is a downstream packaging check, not an authoring loop.',
@@ -42,6 +50,14 @@ const REQUIRED_PATTERNS = [
42
50
  { test: /forbidden|do not|never/i, label: 'must forbid browser/aggregation/feedback/iteration' },
43
51
  { test: /browser|viewer/i, label: 'must explicitly forbid opening a browser/viewer' },
44
52
  { test: /increment/i, label: 'must emit incrementally' },
53
+ {
54
+ test: /top-level\s+`?expectations`?/i,
55
+ label: 'must pin grading.json to the flat top-level `expectations`/`summary` shape',
56
+ },
57
+ {
58
+ test: /`?evals`?\s+array|per-eval nesting/i,
59
+ label: 'must forbid wrapping grading results in an `evals` array',
60
+ },
45
61
  ];
46
62
  export function assertPromptInvariants(prompt) {
47
63
  for (const { test, label } of REQUIRED_PATTERNS) {
@@ -1 +1 @@
1
- {"version":3,"file":"experimenter-prompt.js","sourceRoot":"","sources":["../../src/skill-test/experimenter-prompt.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IAC7C,YAAY,OAAe;QACzB,KAAK,CAAC,2CAA2C,OAAO,EAAE,CAAC,CAAC;QAC5D,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAUD;;;;;GAKG;AACH,MAAM,CAAC,MAAM,2BAA2B,GAAG;IACzC,yGAAyG;IACzG,EAAE;IACF,kCAAkC;IAClC,iHAAiH;IACjH,kDAAkD;IAClD,4GAA4G;IAC5G,iIAAiI;IACjI,gGAAgG;IAChG,EAAE;IACF,iFAAiF;IACjF,EAAE;IACF,6GAA6G;IAC7G,4FAA4F;IAC5F,oBAAoB;CACrB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,cAAc,GAClB,sIAAsI,CAAC;AAEzI,MAAM,UAAU,uBAAuB,CAAC,IAAwB;IAC9D,OAAO,2BAA2B;SAC/B,OAAO,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC;SACzC,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,WAAW,CAAC;SAC7C,UAAU,CAAC,iBAAiB,EAAE,IAAI,CAAC,UAAU,CAAC;SAC9C,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,WAAW,CAAC;SAC7C,OAAO,CAAC,oBAAoB,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AACxE,CAAC;AAED,MAAM,iBAAiB,GAAsC;IAC3D,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,wCAAwC,EAAE;IACrE,EAAE,IAAI,EAAE,oCAAoC,EAAE,KAAK,EAAE,yBAAyB,EAAE;IAChF,EAAE,IAAI,EAAE,sCAAsC,EAAE,KAAK,EAAE,0BAA0B,EAAE;IACnF,EAAE,IAAI,EAAE,yBAAyB,EAAE,KAAK,EAAE,oDAAoD,EAAE;IAChG,EAAE,IAAI,EAAE,iBAAiB,EAAE,KAAK,EAAE,iDAAiD,EAAE;IACrF,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,yBAAyB,EAAE;CACzD,CAAC;AAEF,MAAM,UAAU,sBAAsB,CAAC,MAAc;IACnD,KAAK,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,iBAAiB,EAAE,CAAC;QAChD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;YAAE,MAAM,IAAI,oBAAoB,CAAC,KAAK,CAAC,CAAC;IAChE,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"experimenter-prompt.js","sourceRoot":"","sources":["../../src/skill-test/experimenter-prompt.ts"],"names":[],"mappings":"AAAA,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IAC7C,YAAY,OAAe;QACzB,KAAK,CAAC,2CAA2C,OAAO,EAAE,CAAC,CAAC;QAC5D,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAUD;;;;;GAKG;AACH,MAAM,CAAC,MAAM,2BAA2B,GAAG;IACzC,yGAAyG;IACzG,EAAE;IACF,kCAAkC;IAClC,iHAAiH;IACjH,kDAAkD;IAClD,4GAA4G;IAC5G,iHAAiH;IACjH,uEAAuE;IACvE,gGAAgG;IAChG,wHAAwH;IACxH,EAAE;IACF,8GAA8G;IAC9G,8GAA8G;IAC9G,gHAAgH;IAChH,wEAAwE;IACxE,qGAAqG;IACrG,EAAE;IACF,mFAAmF;IACnF,EAAE;IACF,6GAA6G;IAC7G,4FAA4F;IAC5F,oBAAoB;CACrB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAEb,MAAM,cAAc,GAClB,sIAAsI,CAAC;AAEzI,MAAM,UAAU,uBAAuB,CAAC,IAAwB;IAC9D,OAAO,2BAA2B;SAC/B,OAAO,CAAC,gBAAgB,EAAE,IAAI,CAAC,SAAS,CAAC;SACzC,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,WAAW,CAAC;SAC7C,UAAU,CAAC,iBAAiB,EAAE,IAAI,CAAC,UAAU,CAAC;SAC9C,OAAO,CAAC,kBAAkB,EAAE,IAAI,CAAC,WAAW,CAAC;SAC7C,OAAO,CAAC,oBAAoB,EAAE,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;AACxE,CAAC;AAED,MAAM,iBAAiB,GAAsC;IAC3D,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,wCAAwC,EAAE;IACrE,EAAE,IAAI,EAAE,oCAAoC,EAAE,KAAK,EAAE,yBAAyB,EAAE;IAChF,EAAE,IAAI,EAAE,sCAAsC,EAAE,KAAK,EAAE,0BAA0B,EAAE;IACnF,EAAE,IAAI,EAAE,yBAAyB,EAAE,KAAK,EAAE,oDAAoD,EAAE;IAChG,EAAE,IAAI,EAAE,iBAAiB,EAAE,KAAK,EAAE,iDAAiD,EAAE;IACrF,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,yBAAyB,EAAE;IACxD;QACE,IAAI,EAAE,+BAA+B;QACrC,KAAK,EAAE,4EAA4E;KACpF;IACD;QACE,IAAI,EAAE,qCAAqC;QAC3C,KAAK,EAAE,0DAA0D;KAClE;CACF,CAAC;AAEF,MAAM,UAAU,sBAAsB,CAAC,MAAc;IACnD,KAAK,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,iBAAiB,EAAE,CAAC;QAChD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC;YAAE,MAAM,IAAI,oBAAoB,CAAC,KAAK,CAAC,CAAC;IAChE,CAAC;AACH,CAAC"}
@@ -1,4 +1,9 @@
1
- /** Thrown when skill-creator's grading.json shape has drifted from what vat reads. */
1
+ /**
2
+ * Thrown when grading.json does not match the canonical skill-creator shape
3
+ * (see grading-schema.ts / skill-creator references/schemas.md). vat refuses to
4
+ * limp along on malformed grading data — a wrong shape silently flowing
5
+ * downstream causes confusing failures far from the real cause.
6
+ */
2
7
  export declare class GradingSkewError extends Error {
3
8
  constructor(message: string);
4
9
  }
@@ -1 +1 @@
1
- {"version":3,"file":"grading-adapter.d.ts","sourceRoot":"","sources":["../../src/skill-test/grading-adapter.ts"],"names":[],"mappings":"AAEA,sFAAsF;AACtF,qBAAa,gBAAiB,SAAQ,KAAK;gBAC7B,OAAO,EAAE,MAAM;CAI5B;AAqBD,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAC3C,YAAY,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;CACtE;AAED,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,OAAO,GAAG,iBAAiB,CAgBhE"}
1
+ {"version":3,"file":"grading-adapter.d.ts","sourceRoot":"","sources":["../../src/skill-test/grading-adapter.ts"],"names":[],"mappings":"AAEA;;;;;GAKG;AACH,qBAAa,gBAAiB,SAAQ,KAAK;gBAC7B,OAAO,EAAE,MAAM;CAQ5B;AAED,MAAM,WAAW,iBAAiB;IAChC,OAAO,EAAE;QAAE,MAAM,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,CAAC;IAC3C,YAAY,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,OAAO,CAAC;QAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;KAAE,EAAE,CAAC;CACtE;AAaD,wBAAgB,gBAAgB,CAAC,GAAG,EAAE,OAAO,GAAG,iBAAiB,CAuBhE"}
@@ -1,30 +1,37 @@
1
- import { z } from 'zod';
2
- /** Thrown when skill-creator's grading.json shape has drifted from what vat reads. */
1
+ import { GradingReportSchema } from './grading-schema.js';
2
+ /**
3
+ * Thrown when grading.json does not match the canonical skill-creator shape
4
+ * (see grading-schema.ts / skill-creator references/schemas.md). vat refuses to
5
+ * limp along on malformed grading data — a wrong shape silently flowing
6
+ * downstream causes confusing failures far from the real cause.
7
+ */
3
8
  export class GradingSkewError extends Error {
4
9
  constructor(message) {
5
- super(`grading.json shape skew: ${message}. Re-sync the vendored skill-creator / adopted shapes.`);
10
+ super(`grading.json shape skew: ${message}. Expected skill-creator's grading.json shape ` +
11
+ '(a single flat object with top-level `expectations` and `summary`); see ' +
12
+ 'docs/skill-test-grading-schema.md. Re-sync the vendored skill-creator / adopted shapes.');
6
13
  this.name = 'GradingSkewError';
7
14
  }
8
15
  }
9
16
  /**
10
- * Liberal read of skill-creator's external grading.json. `.passthrough()` keeps
11
- * unknown fields (viewer urls, etc.) we validate only what we understand
12
- * (Postel). Field names mirror skill-creator's grader output.
17
+ * Detect the common per-eval nested mistake: `{ evals: [ { expectations, ... } ] }`
18
+ * with no top-level `expectations`. The grader (an LLM) reaches for this when the
19
+ * top-level shape is under-specified; we name it explicitly so the fix is obvious.
13
20
  */
14
- const ExternalGradingSchema = z.object({
15
- summary: z.object({
16
- passed: z.number(),
17
- total: z.number(),
18
- }).passthrough(),
19
- expectations: z.array(z.object({
20
- text: z.string(),
21
- passed: z.boolean(),
22
- evidence: z.string().optional(),
23
- }).passthrough()),
24
- }).passthrough();
21
+ function looksPerEvalNested(raw) {
22
+ if (typeof raw !== 'object' || raw === null)
23
+ return false;
24
+ const obj = raw;
25
+ return Array.isArray(obj['evals']) && !('expectations' in obj);
26
+ }
25
27
  export function parseGradingJson(raw) {
26
- const result = ExternalGradingSchema.safeParse(raw);
28
+ const result = GradingReportSchema.safeParse(raw);
27
29
  if (!result.success) {
30
+ if (looksPerEvalNested(raw)) {
31
+ throw new GradingSkewError('top-level `expectations` is missing — results were nested under an `evals` array. ' +
32
+ 'grading.json must be ONE flat object whose top-level `expectations` lists every ' +
33
+ 'graded expectation across all evals');
34
+ }
28
35
  const firstIssue = result.error.issues[0];
29
36
  const path = firstIssue?.path.join('.') ?? '(root)';
30
37
  throw new GradingSkewError(`missing/invalid field at "${path}" (${firstIssue?.message ?? 'unknown'})`);
@@ -1 +1 @@
1
- {"version":3,"file":"grading-adapter.js","sourceRoot":"","sources":["../../src/skill-test/grading-adapter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAExB,sFAAsF;AACtF,MAAM,OAAO,gBAAiB,SAAQ,KAAK;IACzC,YAAY,OAAe;QACzB,KAAK,CAAC,4BAA4B,OAAO,wDAAwD,CAAC,CAAC;QACnG,IAAI,CAAC,IAAI,GAAG,kBAAkB,CAAC;IACjC,CAAC;CACF;AAED;;;;GAIG;AACH,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;IACrC,OAAO,EAAE,CAAC,CAAC,MAAM,CAAC;QAChB,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE;QAClB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;KAClB,CAAC,CAAC,WAAW,EAAE;IAChB,YAAY,EAAE,CAAC,CAAC,KAAK,CACnB,CAAC,CAAC,MAAM,CAAC;QACP,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE;QAChB,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE;QACnB,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;KAChC,CAAC,CAAC,WAAW,EAAE,CACjB;CACF,CAAC,CAAC,WAAW,EAAE,CAAC;AAOjB,MAAM,UAAU,gBAAgB,CAAC,GAAY;IAC3C,MAAM,MAAM,GAAG,qBAAqB,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IACpD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,IAAI,GAAG,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC;QACpD,MAAM,IAAI,gBAAgB,CAAC,6BAA6B,IAAI,MAAM,UAAU,EAAE,OAAO,IAAI,SAAS,GAAG,CAAC,CAAC;IACzG,CAAC;IACD,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC;IAC9C,OAAO;QACL,OAAO,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE;QACzD,YAAY,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACnC,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,MAAM,EAAE,CAAC,CAAC,MAAM;YAChB,GAAG,CAAC,CAAC,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC;SAC9D,CAAC,CAAC;KACJ,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"grading-adapter.js","sourceRoot":"","sources":["../../src/skill-test/grading-adapter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAE1D;;;;;GAKG;AACH,MAAM,OAAO,gBAAiB,SAAQ,KAAK;IACzC,YAAY,OAAe;QACzB,KAAK,CACH,4BAA4B,OAAO,gDAAgD;YACjF,0EAA0E;YAC1E,yFAAyF,CAC5F,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,kBAAkB,CAAC;IACjC,CAAC;CACF;AAOD;;;;GAIG;AACH,SAAS,kBAAkB,CAAC,GAAY;IACtC,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,GAAG,KAAK,IAAI;QAAE,OAAO,KAAK,CAAC;IAC1D,MAAM,GAAG,GAAG,GAA8B,CAAC;IAC3C,OAAO,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,cAAc,IAAI,GAAG,CAAC,CAAC;AACjE,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,GAAY;IAC3C,MAAM,MAAM,GAAG,mBAAmB,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;IAClD,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;QACpB,IAAI,kBAAkB,CAAC,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,IAAI,gBAAgB,CACxB,oFAAoF;gBAClF,kFAAkF;gBAClF,qCAAqC,CACxC,CAAC;QACJ,CAAC;QACD,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1C,MAAM,IAAI,GAAG,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC;QACpD,MAAM,IAAI,gBAAgB,CAAC,6BAA6B,IAAI,MAAM,UAAU,EAAE,OAAO,IAAI,SAAS,GAAG,CAAC,CAAC;IACzG,CAAC;IACD,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC;IAC9C,OAAO;QACL,OAAO,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE;QACzD,YAAY,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACnC,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,MAAM,EAAE,CAAC,CAAC,MAAM;YAChB,GAAG,CAAC,CAAC,CAAC,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC;SAC9D,CAAC,CAAC;KACJ,CAAC;AACJ,CAAC"}
@@ -0,0 +1,171 @@
1
+ import { z } from 'zod';
2
+ /**
3
+ * Canonical schema for the grader agent's `grading.json` output.
4
+ *
5
+ * SOURCE OF TRUTH: skill-creator's `references/schemas.md` (the `grading.json`
6
+ * section). vat consumes that exact shape; this module is the machine-checkable
7
+ * encoding of it (skill-creator ships prose + an example, but no JSON Schema).
8
+ * The published JSON Schema is {@link GradingReportJsonSchema}.
9
+ *
10
+ * SHAPE: a SINGLE flat JSON object with two load-bearing top-level fields —
11
+ *
12
+ * {
13
+ * "expectations": [ { "text": string, "passed": boolean, "evidence"?: string }, ... ],
14
+ * "summary": { "passed": number, "total": number, "failed"?: number, "pass_rate"?: number }
15
+ * }
16
+ *
17
+ * `expectations` holds ONE entry per graded expectation across ALL evals — it is
18
+ * NOT grouped per-eval and is NEVER wrapped in an `evals` array. A per-eval
19
+ * nested shape (`{ evals: [ { expectations, summary } ] }`) is a contract
20
+ * violation and is rejected loudly (see grading-adapter.ts); tolerating it would
21
+ * push malformed data downstream and create confusion.
22
+ *
23
+ * LIBERAL ON EXTRAS (Postel): the grader legitimately emits additional documented
24
+ * sections — `execution_metrics`, `timing`, `claims`, `user_notes_summary`,
25
+ * `eval_feedback` — plus viewer URLs and other adornments. We `.passthrough()`
26
+ * those: validate the two fields we depend on, carry the rest untouched. Extra
27
+ * fields are NOT "bad JSON"; a wrong top-level STRUCTURE is.
28
+ */
29
+ /** One graded expectation. `evidence` is recommended but not load-bearing for vat. */
30
+ export declare const GradedExpectationSchema: z.ZodObject<{
31
+ text: z.ZodString;
32
+ passed: z.ZodBoolean;
33
+ evidence: z.ZodOptional<z.ZodString>;
34
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
35
+ text: z.ZodString;
36
+ passed: z.ZodBoolean;
37
+ evidence: z.ZodOptional<z.ZodString>;
38
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
39
+ text: z.ZodString;
40
+ passed: z.ZodBoolean;
41
+ evidence: z.ZodOptional<z.ZodString>;
42
+ }, z.ZodTypeAny, "passthrough">>;
43
+ export type GradedExpectation = z.infer<typeof GradedExpectationSchema>;
44
+ /** Aggregate pass/fail counts. `failed`/`pass_rate` are documented but optional. */
45
+ export declare const GradingSummarySchema: z.ZodObject<{
46
+ passed: z.ZodNumber;
47
+ total: z.ZodNumber;
48
+ failed: z.ZodOptional<z.ZodNumber>;
49
+ pass_rate: z.ZodOptional<z.ZodNumber>;
50
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
51
+ passed: z.ZodNumber;
52
+ total: z.ZodNumber;
53
+ failed: z.ZodOptional<z.ZodNumber>;
54
+ pass_rate: z.ZodOptional<z.ZodNumber>;
55
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
56
+ passed: z.ZodNumber;
57
+ total: z.ZodNumber;
58
+ failed: z.ZodOptional<z.ZodNumber>;
59
+ pass_rate: z.ZodOptional<z.ZodNumber>;
60
+ }, z.ZodTypeAny, "passthrough">>;
61
+ export type GradingSummary = z.infer<typeof GradingSummarySchema>;
62
+ /**
63
+ * The full grading.json contract. Required: top-level `expectations[]` and
64
+ * `summary`. Everything else passes through untouched (forward-compatible with
65
+ * skill-creator additions).
66
+ */
67
+ export declare const GradingReportSchema: z.ZodObject<{
68
+ expectations: z.ZodArray<z.ZodObject<{
69
+ text: z.ZodString;
70
+ passed: z.ZodBoolean;
71
+ evidence: z.ZodOptional<z.ZodString>;
72
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
73
+ text: z.ZodString;
74
+ passed: z.ZodBoolean;
75
+ evidence: z.ZodOptional<z.ZodString>;
76
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
77
+ text: z.ZodString;
78
+ passed: z.ZodBoolean;
79
+ evidence: z.ZodOptional<z.ZodString>;
80
+ }, z.ZodTypeAny, "passthrough">>, "many">;
81
+ summary: z.ZodObject<{
82
+ passed: z.ZodNumber;
83
+ total: z.ZodNumber;
84
+ failed: z.ZodOptional<z.ZodNumber>;
85
+ pass_rate: z.ZodOptional<z.ZodNumber>;
86
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
87
+ passed: z.ZodNumber;
88
+ total: z.ZodNumber;
89
+ failed: z.ZodOptional<z.ZodNumber>;
90
+ pass_rate: z.ZodOptional<z.ZodNumber>;
91
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
92
+ passed: z.ZodNumber;
93
+ total: z.ZodNumber;
94
+ failed: z.ZodOptional<z.ZodNumber>;
95
+ pass_rate: z.ZodOptional<z.ZodNumber>;
96
+ }, z.ZodTypeAny, "passthrough">>;
97
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
98
+ expectations: z.ZodArray<z.ZodObject<{
99
+ text: z.ZodString;
100
+ passed: z.ZodBoolean;
101
+ evidence: z.ZodOptional<z.ZodString>;
102
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
103
+ text: z.ZodString;
104
+ passed: z.ZodBoolean;
105
+ evidence: z.ZodOptional<z.ZodString>;
106
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
107
+ text: z.ZodString;
108
+ passed: z.ZodBoolean;
109
+ evidence: z.ZodOptional<z.ZodString>;
110
+ }, z.ZodTypeAny, "passthrough">>, "many">;
111
+ summary: z.ZodObject<{
112
+ passed: z.ZodNumber;
113
+ total: z.ZodNumber;
114
+ failed: z.ZodOptional<z.ZodNumber>;
115
+ pass_rate: z.ZodOptional<z.ZodNumber>;
116
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
117
+ passed: z.ZodNumber;
118
+ total: z.ZodNumber;
119
+ failed: z.ZodOptional<z.ZodNumber>;
120
+ pass_rate: z.ZodOptional<z.ZodNumber>;
121
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
122
+ passed: z.ZodNumber;
123
+ total: z.ZodNumber;
124
+ failed: z.ZodOptional<z.ZodNumber>;
125
+ pass_rate: z.ZodOptional<z.ZodNumber>;
126
+ }, z.ZodTypeAny, "passthrough">>;
127
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
128
+ expectations: z.ZodArray<z.ZodObject<{
129
+ text: z.ZodString;
130
+ passed: z.ZodBoolean;
131
+ evidence: z.ZodOptional<z.ZodString>;
132
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
133
+ text: z.ZodString;
134
+ passed: z.ZodBoolean;
135
+ evidence: z.ZodOptional<z.ZodString>;
136
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
137
+ text: z.ZodString;
138
+ passed: z.ZodBoolean;
139
+ evidence: z.ZodOptional<z.ZodString>;
140
+ }, z.ZodTypeAny, "passthrough">>, "many">;
141
+ summary: z.ZodObject<{
142
+ passed: z.ZodNumber;
143
+ total: z.ZodNumber;
144
+ failed: z.ZodOptional<z.ZodNumber>;
145
+ pass_rate: z.ZodOptional<z.ZodNumber>;
146
+ }, "passthrough", z.ZodTypeAny, z.objectOutputType<{
147
+ passed: z.ZodNumber;
148
+ total: z.ZodNumber;
149
+ failed: z.ZodOptional<z.ZodNumber>;
150
+ pass_rate: z.ZodOptional<z.ZodNumber>;
151
+ }, z.ZodTypeAny, "passthrough">, z.objectInputType<{
152
+ passed: z.ZodNumber;
153
+ total: z.ZodNumber;
154
+ failed: z.ZodOptional<z.ZodNumber>;
155
+ pass_rate: z.ZodOptional<z.ZodNumber>;
156
+ }, z.ZodTypeAny, "passthrough">>;
157
+ }, z.ZodTypeAny, "passthrough">>;
158
+ export type GradingReport = z.infer<typeof GradingReportSchema>;
159
+ /**
160
+ * Published JSON Schema for grading.json — generated from {@link GradingReportSchema}
161
+ * so the two never drift. Importable by external tooling that wants to validate a
162
+ * grading.json without depending on Zod. Documented in
163
+ * docs/skill-test-grading-schema.md.
164
+ */
165
+ export declare const GradingReportJsonSchema: import("zod-to-json-schema").JsonSchema7Type & {
166
+ $schema?: string | undefined;
167
+ definitions?: {
168
+ [key: string]: import("zod-to-json-schema").JsonSchema7Type;
169
+ } | undefined;
170
+ };
171
+ //# sourceMappingURL=grading-schema.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"grading-schema.d.ts","sourceRoot":"","sources":["../../src/skill-test/grading-schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,sFAAsF;AACtF,eAAO,MAAM,uBAAuB;;;;;;;;;;;;gCAMpB,CAAC;AAEjB,MAAM,MAAM,iBAAiB,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,uBAAuB,CAAC,CAAC;AAExE,oFAAoF;AACpF,eAAO,MAAM,oBAAoB;;;;;;;;;;;;;;;gCAOjB,CAAC;AAEjB,MAAM,MAAM,cAAc,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,oBAAoB,CAAC,CAAC;AAElE;;;;GAIG;AACH,eAAO,MAAM,mBAAmB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;gCAKhB,CAAC;AAEjB,MAAM,MAAM,aAAa,GAAG,CAAC,CAAC,KAAK,CAAC,OAAO,mBAAmB,CAAC,CAAC;AAEhE;;;;;GAKG;AACH,eAAO,MAAM,uBAAuB;;;;;CAAyD,CAAC"}
@@ -0,0 +1,65 @@
1
+ import { z } from 'zod';
2
+ import { zodToJsonSchema } from 'zod-to-json-schema';
3
+ /**
4
+ * Canonical schema for the grader agent's `grading.json` output.
5
+ *
6
+ * SOURCE OF TRUTH: skill-creator's `references/schemas.md` (the `grading.json`
7
+ * section). vat consumes that exact shape; this module is the machine-checkable
8
+ * encoding of it (skill-creator ships prose + an example, but no JSON Schema).
9
+ * The published JSON Schema is {@link GradingReportJsonSchema}.
10
+ *
11
+ * SHAPE: a SINGLE flat JSON object with two load-bearing top-level fields —
12
+ *
13
+ * {
14
+ * "expectations": [ { "text": string, "passed": boolean, "evidence"?: string }, ... ],
15
+ * "summary": { "passed": number, "total": number, "failed"?: number, "pass_rate"?: number }
16
+ * }
17
+ *
18
+ * `expectations` holds ONE entry per graded expectation across ALL evals — it is
19
+ * NOT grouped per-eval and is NEVER wrapped in an `evals` array. A per-eval
20
+ * nested shape (`{ evals: [ { expectations, summary } ] }`) is a contract
21
+ * violation and is rejected loudly (see grading-adapter.ts); tolerating it would
22
+ * push malformed data downstream and create confusion.
23
+ *
24
+ * LIBERAL ON EXTRAS (Postel): the grader legitimately emits additional documented
25
+ * sections — `execution_metrics`, `timing`, `claims`, `user_notes_summary`,
26
+ * `eval_feedback` — plus viewer URLs and other adornments. We `.passthrough()`
27
+ * those: validate the two fields we depend on, carry the rest untouched. Extra
28
+ * fields are NOT "bad JSON"; a wrong top-level STRUCTURE is.
29
+ */
30
+ /** One graded expectation. `evidence` is recommended but not load-bearing for vat. */
31
+ export const GradedExpectationSchema = z
32
+ .object({
33
+ text: z.string(),
34
+ passed: z.boolean(),
35
+ evidence: z.string().optional(),
36
+ })
37
+ .passthrough();
38
+ /** Aggregate pass/fail counts. `failed`/`pass_rate` are documented but optional. */
39
+ export const GradingSummarySchema = z
40
+ .object({
41
+ passed: z.number(),
42
+ total: z.number(),
43
+ failed: z.number().optional(),
44
+ pass_rate: z.number().optional(),
45
+ })
46
+ .passthrough();
47
+ /**
48
+ * The full grading.json contract. Required: top-level `expectations[]` and
49
+ * `summary`. Everything else passes through untouched (forward-compatible with
50
+ * skill-creator additions).
51
+ */
52
+ export const GradingReportSchema = z
53
+ .object({
54
+ expectations: z.array(GradedExpectationSchema),
55
+ summary: GradingSummarySchema,
56
+ })
57
+ .passthrough();
58
+ /**
59
+ * Published JSON Schema for grading.json — generated from {@link GradingReportSchema}
60
+ * so the two never drift. Importable by external tooling that wants to validate a
61
+ * grading.json without depending on Zod. Documented in
62
+ * docs/skill-test-grading-schema.md.
63
+ */
64
+ export const GradingReportJsonSchema = zodToJsonSchema(GradingReportSchema, 'grading-report');
65
+ //# sourceMappingURL=grading-schema.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"grading-schema.js","sourceRoot":"","sources":["../../src/skill-test/grading-schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAC;AAErD;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,sFAAsF;AACtF,MAAM,CAAC,MAAM,uBAAuB,GAAG,CAAC;KACrC,MAAM,CAAC;IACN,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE;IAChB,MAAM,EAAE,CAAC,CAAC,OAAO,EAAE;IACnB,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CAChC,CAAC;KACD,WAAW,EAAE,CAAC;AAIjB,oFAAoF;AACpF,MAAM,CAAC,MAAM,oBAAoB,GAAG,CAAC;KAClC,MAAM,CAAC;IACN,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE;IAClB,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE;IACjB,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;IAC7B,SAAS,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE;CACjC,CAAC;KACD,WAAW,EAAE,CAAC;AAIjB;;;;GAIG;AACH,MAAM,CAAC,MAAM,mBAAmB,GAAG,CAAC;KACjC,MAAM,CAAC;IACN,YAAY,EAAE,CAAC,CAAC,KAAK,CAAC,uBAAuB,CAAC;IAC9C,OAAO,EAAE,oBAAoB;CAC9B,CAAC;KACD,WAAW,EAAE,CAAC;AAIjB;;;;;GAKG;AACH,MAAM,CAAC,MAAM,uBAAuB,GAAG,eAAe,CAAC,mBAAmB,EAAE,gBAAgB,CAAC,CAAC"}
@@ -17,6 +17,19 @@ export declare function resolveHarnessRoot(skillNames: string[], tmpRoot?: strin
17
17
  * --setting-sources "".
18
18
  */
19
19
  export declare function assertSafeWorkdir(dir: string): void;
20
+ /**
21
+ * Prepare the harness root directory so that `assertSafeHarnessRoot` will
22
+ * pass on the next call. If the path does not exist, this is a no-op (the
23
+ * caller creates it at 0700 via mkdirSyncReal). If it exists:
24
+ *
25
+ * - Symlink → throw HarnessLocationError (security gate; never relax).
26
+ * - Real directory whose mode != 0700 → chmod to 0700. Removing group/other
27
+ * access is strictly safer, never a relaxation.
28
+ *
29
+ * Mode checks/changes are only performed on non-win32 (matching
30
+ * assertSafeHarnessRoot's platform guard).
31
+ */
32
+ export declare function prepareHarnessRoot(dir: string): void;
20
33
  /**
21
34
  * FS-bound hardening for the shared-tmp harness root (spec §7): the root must
22
35
  * be 0700 and owned by the current uid, and no path component may be a symlink.
@@ -1 +1 @@
1
- {"version":3,"file":"harness-location.d.ts","sourceRoot":"","sources":["../../src/skill-test/harness-location.ts"],"names":[],"mappings":"AAKA,sDAAsD;AACtD,qBAAa,oBAAqB,SAAQ,KAAK;IAC7C,QAAQ,CAAC,QAAQ,EAAG,CAAC,CAAU;gBACnB,OAAO,EAAE,MAAM;CAI5B;AAOD;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,MAAM,EAAE,GAAG,MAAM,CAQ7D;AAED,mEAAmE;AACnE,wBAAgB,kBAAkB,CAAC,UAAU,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,CAGjF;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAenD;AAED;;;;;GAKG;AACH,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI,CAiB3E"}
1
+ {"version":3,"file":"harness-location.d.ts","sourceRoot":"","sources":["../../src/skill-test/harness-location.ts"],"names":[],"mappings":"AAKA,sDAAsD;AACtD,qBAAa,oBAAqB,SAAQ,KAAK;IAC7C,QAAQ,CAAC,QAAQ,EAAG,CAAC,CAAU;gBACnB,OAAO,EAAE,MAAM;CAI5B;AAOD;;;;GAIG;AACH,wBAAgB,gBAAgB,CAAC,UAAU,EAAE,MAAM,EAAE,GAAG,MAAM,CAQ7D;AAED,mEAAmE;AACnE,wBAAgB,kBAAkB,CAAC,UAAU,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,MAAM,GAAG,MAAM,CAGjF;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAenD;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,IAAI,CAkBpD;AAED;;;;;GAKG;AACH,wBAAgB,qBAAqB,CAAC,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,IAAI,CAiB3E"}