@united-workforce/eval 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,7 +32,7 @@ export function registerRunCommand(program) {
32
32
  program
33
33
  .command("run <task>")
34
34
  .description("Run eval on a task directory or tarball")
35
- .option("--agent <name>", "agent adapter to use", "hermes")
35
+ .option("--agent <name>", "agent adapter to use", "uwf-hermes")
36
36
  .option("--model <model>", "model override")
37
37
  .option("--count <n>", "number of eval runs", "1")
38
38
  .action(async (task, opts) => {
@@ -1 +1 @@
1
- {"version":3,"file":"run.js","sourceRoot":"","sources":["../../src/commands/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAIpC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,gBAAgB,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAEjF,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAQtD,KAAK,UAAU,OAAO,CACpB,OAAe,EACf,KAAa,EACb,KAAa,EACb,aAAqB;IAErB,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,CAAC;IACxC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,QAAQ,CAAC;IAEvC,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,OAAO,CAAC;QACjC,OAAO;QACP,QAAQ,EAAE,QAAQ,CAAC,QAAQ;QAC3B,MAAM,EAAE,QAAQ,CAAC,MAAM;QACvB,KAAK;QACL,QAAQ,EAAE,QAAQ,CAAC,MAAM,CAAC,QAAQ;KACnC,CAAC,CAAC;IAEH,MAAM,SAAS,GAAG,MAAM,eAAe,EAAE,CAAC;IAC1C,MAAM,MAAM,GAAkB,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,EAAE,CAAC;IAC9D,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC;QAC9B,SAAS;QACT,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,OAAO;QACP,QAAQ;QACR,QAAQ;QACR,MAAM;KACP,CAAC,CAAC;IAEH,OAAO;QACL,OAAO,EAAE,SAAS,CAAC,OAAO;QAC1B,OAAO,EAAE,SAAS,CAAC,OAAO;QAC1B,IAAI,EAAE,QAAQ,CAAC,IAAI;QACnB,MAAM,EAAE,SAAS,CAAC,MAAM;KACzB,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,OAAgB;IACjD,OAAO;SACJ,OAAO,CAAC,YAAY,CAAC;SACrB,WAAW,CAAC,yCAAyC,CAAC;SACtD,MAAM,CAAC,gBAAgB,EAAE,sBAAsB,EAAE,QAAQ,CAAC;SAC1D,MAAM,CAAC,iBAAiB,EAAE,gBAAgB,CAAC;SAC3C,MAAM,CAAC,aAAa,EAAE,qBAAqB,EAAE,GAAG,CAAC;SACjD,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,IAAmB,EAAE,EAAE;QAClD,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC9C,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YAC1C,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;YAC7D,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;YACrB,OAAO;QACT,CAAC;QAED,MAAM,aAAa,GAAG,gBAAgB,EAAE,CAAC;QAEzC,IAAI,CAAC;YACH,MAAM,OAAO,GAAgB,EAAE,CAAC;YAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,MAAM,OAAO,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC;YACpE,CAAC;YACD,MAAM,MAAM,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YAClD,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACtD,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YAC3D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;YACrC,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;AACP,CAAC"}
1
+ {"version":3,"file":"run.js","sourceRoot":"","sources":["../../src/commands/run.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAIpC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,gBAAgB,EAAE,OAAO,EAAE,MAAM,oBAAoB,CAAC;AAEjF,OAAO,EAAE,eAAe,EAAE,MAAM,qBAAqB,CAAC;AAQtD,KAAK,UAAU,OAAO,CACpB,OAAe,EACf,KAAa,EACb,KAAa,EACb,aAAqB;IAErB,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,CAAC;IACxC,MAAM,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,QAAQ,CAAC;IAEvC,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,OAAO,CAAC;QACjC,OAAO;QACP,QAAQ,EAAE,QAAQ,CAAC,QAAQ;QAC3B,MAAM,EAAE,QAAQ,CAAC,MAAM;QACvB,KAAK;QACL,QAAQ,EAAE,QAAQ,CAAC,MAAM,CAAC,QAAQ;KACnC,CAAC,CAAC;IAEH,MAAM,SAAS,GAAG,MAAM,eAAe,EAAE,CAAC;IAC1C,MAAM,MAAM,GAAkB,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,EAAE,CAAC;IAC9D,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC;QAC9B,SAAS;QACT,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,OAAO;QACP,QAAQ;QACR,QAAQ;QACR,MAAM;KACP,CAAC,CAAC;IAEH,OAAO;QACL,OAAO,EAAE,SAAS,CAAC,OAAO;QAC1B,OAAO,EAAE,SAAS,CAAC,OAAO;QAC1B,IAAI,EAAE,QAAQ,CAAC,IAAI;QACnB,MAAM,EAAE,SAAS,CAAC,MAAM;KACzB,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,OAAgB;IACjD,OAAO;SACJ,OAAO,CAAC,YAAY,CAAC;SACrB,WAAW,CAAC,yCAAyC,CAAC;SACtD,MAAM,CAAC,gBAAgB,EAAE,sBAAsB,EAAE,YAAY,CAAC;SAC9D,MAAM,CAAC,iBAAiB,EAAE,gBAAgB,CAAC;SAC3C,MAAM,CAAC,aAAa,EAAE,qBAAqB,EAAE,GAAG,CAAC;SACjD,MAAM,CAAC,KAAK,EAAE,IAAY,EAAE,IAAmB,EAAE,EAAE;QAClD,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC;QAC/B,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC9C,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YAC1C,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;YAC7D,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;YACrB,OAAO;QACT,CAAC;QAED,MAAM,aAAa,GAAG,gBAAgB,EAAE,CAAC;QAEzC,IAAI,CAAC;YACH,MAAM,OAAO,GAAgB,EAAE,CAAC;YAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC/B,OAAO,CAAC,IAAI,CAAC,MAAM,OAAO,CAAC,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,aAAa,CAAC,CAAC,CAAC;YACpE,CAAC;YACD,MAAM,MAAM,GAAG,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YAClD,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACtD,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;YAC3D,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAC;YACrC,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACvB,CAAC;IACH,CAAC,CAAC,CAAC;AACP,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"frontmatter.d.ts","sourceRoot":"","sources":["../../../src/judge/builtin/frontmatter.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AA6DrD;;;;GAIG;AACH,wBAAsB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAuBvF"}
1
+ {"version":3,"file":"frontmatter.d.ts","sourceRoot":"","sources":["../../../src/judge/builtin/frontmatter.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAuErD;;;;GAIG;AACH,wBAAsB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,kBAAkB,CAAC,CAuBvF"}
@@ -26,6 +26,15 @@ function extractFrontmatterYaml(output) {
26
26
  }
27
27
  /** Validate a single step's frontmatter, returning a list of errors (empty = valid). */
28
28
  function validateStepFrontmatter(output) {
29
+ // CAS stores the extracted output as a JSON object after the extract pipeline.
30
+ // Accept both: parsed object (from step.output) or raw markdown string.
31
+ if (typeof output === "object" && output !== null && !Array.isArray(output)) {
32
+ const status = output.$status;
33
+ if (typeof status !== "string" || status.trim() === "") {
34
+ return ["$status field is missing or not a non-empty string"];
35
+ }
36
+ return [];
37
+ }
29
38
  const yaml = extractFrontmatterYaml(output);
30
39
  if (yaml === null) {
31
40
  return ["output does not begin with a valid '---' frontmatter block"];
@@ -1 +1 @@
1
- {"version":3,"file":"frontmatter.js","sourceRoot":"","sources":["../../../src/judge/builtin/frontmatter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAE1C,OAAO,EAAE,6BAA6B,EAAE,MAAM,wBAAwB,CAAC;AACvE,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAGlD,MAAM,GAAG,GAAG,YAAY,CAAC,EAAE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC,CAAC;AAEvD,MAAM,UAAU,GAAG,UAAU,CAAC;AAE9B,MAAM,KAAK,GAAG,KAAK,CAAC;AAQpB;;;;GAIG;AACH,SAAS,sBAAsB,CAAC,MAAe;IAC7C,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC/B,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,GAAG,KAAK,IAAI,CAAC,EAAE,CAAC;QACrC,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,KAAK,EAAE,CAAC,CAAC;IAC9C,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;QACtB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;AACnC,CAAC;AAED,wFAAwF;AACxF,SAAS,uBAAuB,CAAC,MAAe;IAC9C,MAAM,IAAI,GAAG,sBAAsB,CAAC,MAAM,CAAC,CAAC;IAC5C,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,OAAO,CAAC,4DAA4D,CAAC,CAAC;IACxE,CAAC;IAED,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC3B,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC3D,OAAO,CAAC,qCAAqC,OAAO,EAAE,CAAC,CAAC;IAC1D,CAAC;IAED,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3E,OAAO,CAAC,mCAAmC,CAAC,CAAC;IAC/C,CAAC;IAED,MAAM,MAAM,GAAI,MAAkC,CAAC,OAAO,CAAC;IAC3D,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACvD,OAAO,CAAC,oDAAoD,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,QAAgB;IACxD,MAAM,KAAK,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;IAExC,MAAM,YAAY,GAAkB,EAAE,CAAC;IACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,MAAM,GAAG,uBAAuB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACpD,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,YAAY,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC;IAChC,MAAM,UAAU,GAAG,UAAU,GAAG,YAAY,CAAC,MAAM,CAAC;IACpD,MAAM,KAAK,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;IAE3D,GAAG,CAAC,UAAU,EAAE,sBAAsB,QAAQ,UAAU,UAAU,IAAI,UAAU,EAAE,CAAC,CAAC;IAEpF,OAAO;QACL,KAAK;QACL,IAAI,EAAE,EAAE,UAAU,EAAE,UAAU,EAAE,YAAY,EAAE;QAC9C,MAAM,EAAE,6BAA6B;KACtC,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"frontmatter.js","sourceRoot":"","sources":["../../../src/judge/builtin/frontmatter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAE1C,OAAO,EAAE,6BAA6B,EAAE,MAAM,wBAAwB,CAAC;AACvE,OAAO,EAAE,eAAe,EAAE,MAAM,iBAAiB,CAAC;AAGlD,MAAM,GAAG,GAAG,YAAY,CAAC,EAAE,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,EAAE,CAAC,CAAC;AAEvD,MAAM,UAAU,GAAG,UAAU,CAAC;AAE9B,MAAM,KAAK,GAAG,KAAK,CAAC;AAQpB;;;;GAIG;AACH,SAAS,sBAAsB,CAAC,MAAe;IAC7C,IAAI,OAAO,MAAM,KAAK,QAAQ,EAAE,CAAC;QAC/B,OAAO,IAAI,CAAC;IACd,CAAC;IACD,IAAI,CAAC,MAAM,CAAC,UAAU,CAAC,GAAG,KAAK,IAAI,CAAC,EAAE,CAAC;QACrC,OAAO,IAAI,CAAC;IACd,CAAC;IACD,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC5C,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,KAAK,EAAE,CAAC,CAAC;IAC9C,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;QACtB,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC;AACnC,CAAC;AAED,wFAAwF;AACxF,SAAS,uBAAuB,CAAC,MAAe;IAC9C,+EAA+E;IAC/E,wEAAwE;IACxE,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5E,MAAM,MAAM,GAAI,MAAkC,CAAC,OAAO,CAAC;QAC3D,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;YACvD,OAAO,CAAC,oDAAoD,CAAC,CAAC;QAChE,CAAC;QACD,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,MAAM,IAAI,GAAG,sBAAsB,CAAC,MAAM,CAAC,CAAC;IAC5C,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;QAClB,OAAO,CAAC,4DAA4D,CAAC,CAAC;IACxE,CAAC;IAED,IAAI,MAAe,CAAC;IACpB,IAAI,CAAC;QACH,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC3B,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,MAAM,OAAO,GAAG,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAC3D,OAAO,CAAC,qCAAqC,OAAO,EAAE,CAAC,CAAC;IAC1D,CAAC;IAED,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,IAAI,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC3E,OAAO,CAAC,mCAAmC,CAAC,CAAC;IAC/C,CAAC;IAED,MAAM,MAAM,GAAI,MAAkC,CAAC,OAAO,CAAC;IAC3D,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACvD,OAAO,CAAC,oDAAoD,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,QAAgB;IACxD,MAAM,KAAK,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;IAExC,MAAM,YAAY,GAAkB,EAAE,CAAC;IACvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,MAAM,GAAG,uBAAuB,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACpD,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,YAAY,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC;IAChC,MAAM,UAAU,GAAG,UAAU,GAAG,YAAY,CAAC,MAAM,CAAC;IACpD,MAAM,KAAK,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;IAE3D,GAAG,CAAC,UAAU,EAAE,sBAAsB,QAAQ,UAAU,UAAU,IAAI,UAAU,EAAE,CAAC,CAAC;IAEpF,OAAO;QACL,KAAK;QACL,IAAI,EAAE,EAAE,UAAU,EAAE,UAAU,EAAE,YAAY,EAAE;QAC9C,MAAM,EAAE,6BAA6B;KACtC,CAAC;AACJ,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@united-workforce/eval",
3
- "version": "0.1.0",
3
+ "version": "0.1.2",
4
4
  "private": false,
5
5
  "files": [
6
6
  "src",
@@ -52,7 +52,7 @@ export function registerRunCommand(program: Command): void {
52
52
  program
53
53
  .command("run <task>")
54
54
  .description("Run eval on a task directory or tarball")
55
- .option("--agent <name>", "agent adapter to use", "hermes")
55
+ .option("--agent <name>", "agent adapter to use", "uwf-hermes")
56
56
  .option("--model <model>", "model override")
57
57
  .option("--count <n>", "number of eval runs", "1")
58
58
  .action(async (task: string, opts: RunCliOptions) => {
@@ -39,6 +39,16 @@ function extractFrontmatterYaml(output: unknown): string | null {
39
39
 
40
40
  /** Validate a single step's frontmatter, returning a list of errors (empty = valid). */
41
41
  function validateStepFrontmatter(output: unknown): string[] {
42
+ // CAS stores the extracted output as a JSON object after the extract pipeline.
43
+ // Accept both: parsed object (from step.output) or raw markdown string.
44
+ if (typeof output === "object" && output !== null && !Array.isArray(output)) {
45
+ const status = (output as Record<string, unknown>).$status;
46
+ if (typeof status !== "string" || status.trim() === "") {
47
+ return ["$status field is missing or not a non-empty string"];
48
+ }
49
+ return [];
50
+ }
51
+
42
52
  const yaml = extractFrontmatterYaml(output);
43
53
  if (yaml === null) {
44
54
  return ["output does not begin with a valid '---' frontmatter block"];