@vercel/agent-eval 0.4.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"classifier.js","sourceRoot":"","sources":["../../src/lib/classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AACxE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACrC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,CAAC;AAC1B,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB,MAAM,wBAAwB,GAAG;;;;;;;;;;;;;;;;;qVAiBoT,CAAC;AAEtV;;GAEG;AACH,SAAS,QAAQ,CAAC,IAAY,EAAE,YAAoB;IAClD,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;IAC7C,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAC5C,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,aAAqB;IACzD,OAAO;QACL,UAAU,EAAE,IAAI,CAAC;YACf,WAAW,EACT,8FAA8F;YAChG,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;gBACpB,IAAI,EAAE,CAAC;qBACJ,MAAM,EAAE;qBACR,QAAQ,CAAC,+DAA+D,CAAC;aAC7E,CAAC;YACF,OAAO,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE;gBACnC,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;gBAChD,IAAI,CAAC,MAAM;oBAAE,OAAO,EAAE,KAAK,EAAE,gCAAgC,EAAE,CAAC;gBAChE,IAAI,CAAC;oBACH,MAAM,OAAO,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;oBACpC,MAAM,OAAO,GAAkD,EAAE,CAAC;oBAClE,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;wBACnC,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC;wBAC3C,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;oBAC3E,CAAC;oBACD,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC;gBAC9B,CAAC;gBAAC,MAAM,CAAC;oBACP,OAAO,EAAE,KAAK,EAAE,gBAAgB,OAAO,EAAE,EAAE,CAAC;gBAC9C,CAAC;YACH,CAAC;SACF,CAAC;QAEF,SAAS,EAAE,IAAI,CAAC;YACd,WAAW,EACT,8FAA8F;YAChG,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;gBACpB,IAAI,EAAE,CAAC;qBACJ,MAAM,EAAE;qBACR,QAAQ,CAAC,qDAAqD,CAAC;gBAClE,MAAM,EAAE,CAAC;qBACN,MAAM,EAAE;qBACR,QAAQ,CAAC,6CAA6C,CAAC;qBACvD,QAAQ,EAAE;gBACb,KAAK,EAAE,CAAC;qBACL,MAAM,EAAE;qBACR,QAAQ,CAAC,+BAA+B,CAAC;qBACzC,QAAQ,EAAE;aACd,CAAC;YACF,OAAO,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,EAAE;gBACvE,MAAM,MAAM,GAAG,SAAS,IAAI,CAAC,CAAC;gBAC9B,MAAM,KAAK,GAAG,QAAQ,IAAI,GAAG,CAAC;gBAC9B,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;gBAChD,IAAI,CAAC,MAAM;oBAAE,OAAO,EAAE,KAAK,EAAE,gCAAgC,EAAE,CAAC;gBAChE,IAAI,CAAC;oBACH,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;oBAC9C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;oBAClC,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK,CAAC,CAAC;oBACnD,OAAO;wBACL,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC;wBAC1B,UAAU,EAAE,KAAK,CAAC,MAAM;wBACxB,OAAO,EAAE,SAAS,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,OAAO,KAAK,CAAC,MAAM,EAAE;qBACxF,CAAC;gBACJ,CAAC;gBAAC,MAAM,CAAC;oBACP,OAAO,EAAE,KAAK,EAAE,gBAAgB,OAAO,EAAE,EAAE,CAAC;gBAC9C,CAAC;YACH,CAAC;SACF,CAAC;QAEF,IAAI,EAAE,IAAI,CAAC;YACT,WAAW,EACT,uFAAuF;YACzF,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;gBACpB,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,qCAAqC,CAAC;gBACnE,IAAI,EAAE,CAAC;qBACJ,MAAM,EAAE;qBACR,QAAQ,CAAC,8DAA8D,CAAC;gBAC3E,UAAU,EAAE,CAAC;qBACV,MAAM,EAAE;qBACR,QAAQ,CAAC,iCAAiC,CAAC;qBAC3C,QAAQ,EAAE;aACd,CAAC;YACF,OAAO,EAAE,KAAK,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,EAAE,EAAE;gBAChE,MAAM,UAAU,GAAG,MAAM,IAAI,EAAE,CAAC;gBAChC,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;gBAChD,IAAI,CAAC,MAAM;oBAAE,OAAO,EAAE,KAAK,EAAE,gCAAgC,EAAE,CAAC;gBAChE,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;gBACvC,MAAM,OAAO,GAAwD,EAAE,CAAC;gBAExE,KAAK,UAAU,UAAU,CAAC,QAAgB,EAAE,OAAe;oBACzD,IAAI,CAAC;wBACH,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;wBAChD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;wBAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;4BACrE,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gCACzB,OAAO,CAAC,IAAI,CAAC;oCACX,IAAI,EAAE,OAAO;oCACb,IAAI,EAAE,CAAC,GAAG,CAAC;oCACX,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;iCAC7B,CAAC,CAAC;4BACL,CAAC;wBACH,CAAC;oBACH,CAAC;oBAAC,MAAM,CAAC;wBACP,wBAAwB;oBAC1B,CAAC;gBACH,CAAC;gBAED,KAAK,UAAU,SAAS,CAAC,OAAe,EAAE,MAAc;oBACtD,IAAI,CAAC;wBACH,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;wBACrC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;4BAC5B,IAAI,OAAO,CAAC,MAAM,IAAI,UAAU;gCAAE,MAAM;4BACxC,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;4BAClC,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;4BAClD,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;4BAC5B,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;gCACvB,MAAM,SAAS,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;4BAC7B,CAAC;iCAAM,CAAC;gCACN,MAAM,UAAU,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;4BAC9B,CAAC;wBACH,CAAC;oBACH,CAAC;oBAAC,MAAM,CAAC;wBACP,uBAAuB;oBACzB,CAAC;gBACH,CAAC;gBAED,IAAI,CAAC;oBACH,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;oBAC9B,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;wBACvB,MAAM,SAAS,CAAC,MAAM,EAAE,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;oBAC1D,CAAC;yBAAM,CAAC;wBACN,MAAM,UAAU,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;oBACpC,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,OAAO,EAAE,KAAK,EAAE,mBAAmB,OAAO,EAAE,EAAE,CAAC;gBACjD,CAAC;gBAED,OAAO;oBACL,OAAO;oBACP,UAAU,EAAE,OAAO,CAAC,MAAM;oBAC1B,SAAS,EAAE,OAAO,CAAC,MAAM,IAAI,UAAU;iBACxC,CAAC;YACJ,CAAC;SACF,CAAC;KACH,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,aAAqB,EACrB,QAAgB,EAChB,cAAsB;IAEtB,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC;IAExE,MAAM,OAAO,GAAG,aAAa,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,kBAAkB,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE,EAAE,CAAC,CAAC;IAEjH,IAAI,cAAc,GAA0B,IAAI,CAAC;IAEjD,MAAM,gBAAgB,GAAG,qBAAqB,CAAC,aAAa,CAAC,CAAC;IAC9D,MAAM,QAAQ,GAAG;QACf,GAAG,gBAAgB;QACnB,QAAQ,EAAE,IAAI,CAAC;YACb,WAAW,EAAE,4EAA4E;YACzF,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;gBACpB,WAAW,EAAE,CAAC;qBACX,IAAI,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;qBACnC,QAAQ,CAAC,sBAAsB,CAAC;gBACnC,aAAa,EAAE,CAAC;qBACb,MAAM,EAAE;qBACR,QAAQ,CAAC,uCAAuC,CAAC;aACrD,CAAC;YACF,OAAO,EAAE,KAAK,EAAE,EAAE,WAAW,EAAE,aAAa,EAAE,EAAE,EAAE;gBAChD,cAAc,GAAG,EAAE,WAAW,EAAE,WAA0B,EAAE,aAAa,EAAE,CAAC;gBAC5E,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;YACtB,CAAC;SACF,CAAC;KACH,CAAC;IAEF,IAAI,CAAC;QACH,MAAM,YAAY,CAAC;YACjB,KAAK,EAAE,OAAO,CAAC,6BAA6B,CAAC;YAC7C,MAAM,EAAE,wBAAwB;YAChC,MAAM,EAAE,kCAAkC,QAAQ,kBAAkB,cAAc,sFAAsF;YACxK,KAAK,EAAE,QAAQ;YACf,QAAQ,EAAE,WAAW,CAAC,UAAU,CAAC;SAClC,CAAC,CAAC;QAEH,OAAO,cAAc,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,aAAqB,EACrB,QAAgB,EAChB,cAAsB;IAEtB,kCAAkC;IAClC,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,EAAE,qBAAqB,CAAC,CAAC;IAC9D,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC,CAAC;QAC7D,IAAI,MAAM,CAAC,WAAW,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YAC/C,OAAO,EAAE,WAAW,EAAE,MAAM,CAAC,WAAW,EAAE,aAAa,EAAE,MAAM,CAAC,aAAa,EAAE,CAAC;QAClF,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,WAAW;IACb,CAAC;IAED,mBAAmB;IACnB,MAAM,cAAc,GAAG,MAAM,cAAc,CAAC,aAAa,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC;IAErF,mBAAmB;IACnB,IAAI,cAAc,EAAE,CAAC;QACnB,IAAI,CAAC;YACH,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QACrE,CAAC;QAAC,MAAM,CAAC;YACP,4BAA4B;QAC9B,CAAC;IACH,CAAC;IAED,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,WAAW,CAAC,eAAiC;IAC3D,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IAC/C,OAAO,eAAe,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,KAAK,OAAO,CAAC,CAAC;AACjE,CAAC"}
1
+ {"version":3,"file":"classifier.js","sourceRoot":"","sources":["../../src/lib/classifier.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,IAAI,CAAC;AACxE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACrC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,CAAC;AAC1B,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AAGxB;;;;GAIG;AACH,MAAM,UAAU,mBAAmB;IACjC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,kBAAkB,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;AAC7E,CAAC;AAED,MAAM,wBAAwB,GAAG;;;;;;;;;;;;;;;;;qVAiBoT,CAAC;AAEtV;;GAEG;AACH,SAAS,QAAQ,CAAC,IAAY,EAAE,YAAoB;IAClD,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,EAAE,YAAY,CAAC,CAAC;IAC7C,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAC5C,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,aAAqB;IACzD,OAAO;QACL,UAAU,EAAE,IAAI,CAAC;YACf,WAAW,EACT,8FAA8F;YAChG,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;gBACpB,IAAI,EAAE,CAAC;qBACJ,MAAM,EAAE;qBACR,QAAQ,CAAC,+DAA+D,CAAC;aAC7E,CAAC;YACF,OAAO,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE;gBACnC,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;gBAChD,IAAI,CAAC,MAAM;oBAAE,OAAO,EAAE,KAAK,EAAE,gCAAgC,EAAE,CAAC;gBAChE,IAAI,CAAC;oBACH,MAAM,OAAO,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;oBACpC,MAAM,OAAO,GAAkD,EAAE,CAAC;oBAClE,KAAK,MAAM,KAAK,IAAI,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;wBACnC,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,CAAC;wBAC3C,OAAO,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;oBAC3E,CAAC;oBACD,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC;gBAC9B,CAAC;gBAAC,MAAM,CAAC;oBACP,OAAO,EAAE,KAAK,EAAE,gBAAgB,OAAO,EAAE,EAAE,CAAC;gBAC9C,CAAC;YACH,CAAC;SACF,CAAC;QAEF,SAAS,EAAE,IAAI,CAAC;YACd,WAAW,EACT,8FAA8F;YAChG,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;gBACpB,IAAI,EAAE,CAAC;qBACJ,MAAM,EAAE;qBACR,QAAQ,CAAC,qDAAqD,CAAC;gBAClE,MAAM,EAAE,CAAC;qBACN,MAAM,EAAE;qBACR,QAAQ,CAAC,6CAA6C,CAAC;qBACvD,QAAQ,EAAE;gBACb,KAAK,EAAE,CAAC;qBACL,MAAM,EAAE;qBACR,QAAQ,CAAC,+BAA+B,CAAC;qBACzC,QAAQ,EAAE;aACd,CAAC;YACF,OAAO,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,EAAE;gBACvE,MAAM,MAAM,GAAG,SAAS,IAAI,CAAC,CAAC;gBAC9B,MAAM,KAAK,GAAG,QAAQ,IAAI,GAAG,CAAC;gBAC9B,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;gBAChD,IAAI,CAAC,MAAM;oBAAE,OAAO,EAAE,KAAK,EAAE,gCAAgC,EAAE,CAAC;gBAChE,IAAI,CAAC;oBACH,MAAM,OAAO,GAAG,YAAY,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;oBAC9C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;oBAClC,MAAM,MAAM,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,KAAK,CAAC,CAAC;oBACnD,OAAO;wBACL,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC;wBAC1B,UAAU,EAAE,KAAK,CAAC,MAAM;wBACxB,OAAO,EAAE,SAAS,MAAM,IAAI,IAAI,CAAC,GAAG,CAAC,MAAM,GAAG,KAAK,EAAE,KAAK,CAAC,MAAM,CAAC,OAAO,KAAK,CAAC,MAAM,EAAE;qBACxF,CAAC;gBACJ,CAAC;gBAAC,MAAM,CAAC;oBACP,OAAO,EAAE,KAAK,EAAE,gBAAgB,OAAO,EAAE,EAAE,CAAC;gBAC9C,CAAC;YACH,CAAC;SACF,CAAC;QAEF,IAAI,EAAE,IAAI,CAAC;YACT,WAAW,EACT,uFAAuF;YACzF,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;gBACpB,OAAO,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,qCAAqC,CAAC;gBACnE,IAAI,EAAE,CAAC;qBACJ,MAAM,EAAE;qBACR,QAAQ,CAAC,8DAA8D,CAAC;gBAC3E,UAAU,EAAE,CAAC;qBACV,MAAM,EAAE;qBACR,QAAQ,CAAC,iCAAiC,CAAC;qBAC3C,QAAQ,EAAE;aACd,CAAC;YACF,OAAO,EAAE,KAAK,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,UAAU,EAAE,MAAM,EAAE,EAAE,EAAE;gBAChE,MAAM,UAAU,GAAG,MAAM,IAAI,EAAE,CAAC;gBAChC,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC;gBAChD,IAAI,CAAC,MAAM;oBAAE,OAAO,EAAE,KAAK,EAAE,gCAAgC,EAAE,CAAC;gBAChE,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;gBACvC,MAAM,OAAO,GAAwD,EAAE,CAAC;gBAExE,KAAK,UAAU,UAAU,CAAC,QAAgB,EAAE,OAAe;oBACzD,IAAI,CAAC;wBACH,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;wBAChD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;wBAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;4BACrE,IAAI,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gCACzB,OAAO,CAAC,IAAI,CAAC;oCACX,IAAI,EAAE,OAAO;oCACb,IAAI,EAAE,CAAC,GAAG,CAAC;oCACX,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC;iCAC7B,CAAC,CAAC;4BACL,CAAC;wBACH,CAAC;oBACH,CAAC;oBAAC,MAAM,CAAC;wBACP,wBAAwB;oBAC1B,CAAC;gBACH,CAAC;gBAED,KAAK,UAAU,SAAS,CAAC,OAAe,EAAE,MAAc;oBACtD,IAAI,CAAC;wBACH,MAAM,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;wBACrC,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;4BAC5B,IAAI,OAAO,CAAC,MAAM,IAAI,UAAU;gCAAE,MAAM;4BACxC,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;4BAClC,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;4BAClD,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;4BAC5B,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;gCACvB,MAAM,SAAS,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;4BAC7B,CAAC;iCAAM,CAAC;gCACN,MAAM,UAAU,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;4BAC9B,CAAC;wBACH,CAAC;oBACH,CAAC;oBAAC,MAAM,CAAC;wBACP,uBAAuB;oBACzB,CAAC;gBACH,CAAC;gBAED,IAAI,CAAC;oBACH,MAAM,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,CAAC;oBAC9B,IAAI,IAAI,CAAC,WAAW,EAAE,EAAE,CAAC;wBACvB,MAAM,SAAS,CAAC,MAAM,EAAE,OAAO,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;oBAC1D,CAAC;yBAAM,CAAC;wBACN,MAAM,UAAU,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;oBACpC,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,OAAO,EAAE,KAAK,EAAE,mBAAmB,OAAO,EAAE,EAAE,CAAC;gBACjD,CAAC;gBAED,OAAO;oBACL,OAAO;oBACP,UAAU,EAAE,OAAO,CAAC,MAAM;oBAC1B,SAAS,EAAE,OAAO,CAAC,MAAM,IAAI,UAAU;iBACxC,CAAC;YACJ,CAAC;SACF,CAAC;KACH,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,aAAqB,EACrB,QAAgB,EAChB,cAAsB;IAEtB,MAAM,EAAE,YAAY,EAAE,WAAW,EAAE,aAAa,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC;IAExE,MAAM,OAAO,GAAG,aAAa,CAAC,EAAE,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,kBAAkB,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE,EAAE,CAAC,CAAC;IAEjH,IAAI,cAAc,GAA0B,IAAI,CAAC;IAEjD,MAAM,gBAAgB,GAAG,qBAAqB,CAAC,aAAa,CAAC,CAAC;IAC9D,MAAM,QAAQ,GAAG;QACf,GAAG,gBAAgB;QACnB,QAAQ,EAAE,IAAI,CAAC;YACb,WAAW,EAAE,4EAA4E;YACzF,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;gBACpB,WAAW,EAAE,CAAC;qBACX,IAAI,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,SAAS,CAAC,CAAC;qBACnC,QAAQ,CAAC,sBAAsB,CAAC;gBACnC,aAAa,EAAE,CAAC;qBACb,MAAM,EAAE;qBACR,QAAQ,CAAC,uCAAuC,CAAC;aACrD,CAAC;YACF,OAAO,EAAE,KAAK,EAAE,EAAE,WAAW,EAAE,aAAa,EAAE,EAAE,EAAE;gBAChD,cAAc,GAAG,EAAE,WAAW,EAAE,WAA0B,EAAE,aAAa,EAAE,CAAC;gBAC5E,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;YACtB,CAAC;SACF,CAAC;KACH,CAAC;IAEF,IAAI,CAAC;QACH,MAAM,YAAY,CAAC;YACjB,KAAK,EAAE,OAAO,CAAC,6BAA6B,CAAC;YAC7C,MAAM,EAAE,wBAAwB;YAChC,MAAM,EAAE,kCAAkC,QAAQ,kBAAkB,cAAc,sFAAsF;YACxK,KAAK,EAAE,QAAQ;YACf,QAAQ,EAAE,WAAW,CAAC,UAAU,CAAC;SAClC,CAAC,CAAC;QAEH,OAAO,cAAc,CAAC;IACxB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,aAAqB,EACrB,QAAgB,EAChB,cAAsB;IAEtB,kCAAkC;IAClC,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,EAAE,qBAAqB,CAAC,CAAC;IAC9D,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC,CAAC;QAC7D,IAAI,MAAM,CAAC,WAAW,IAAI,MAAM,CAAC,aAAa,EAAE,CAAC;YAC/C,OAAO,EAAE,WAAW,EAAE,MAAM,CAAC,WAAW,EAAE,aAAa,EAAE,MAAM,CAAC,aAAa,EAAE,CAAC;QAClF,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,WAAW;IACb,CAAC;IAED,mBAAmB;IACnB,MAAM,cAAc,GAAG,MAAM,cAAc,CAAC,aAAa,EAAE,QAAQ,EAAE,cAAc,CAAC,CAAC;IAErF,mBAAmB;IACnB,IAAI,cAAc,EAAE,CAAC;QACnB,IAAI,CAAC;YACH,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,cAAc,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;QACrE,CAAC;QAAC,MAAM,CAAC;YACP,4BAA4B;QAC9B,CAAC;IACH,CAAC;IAED,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,iBAAiB,CAAC,aAAqB;IACrD,IAAI,CAAC;QACH,MAAM,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,aAAa,EAAE,qBAAqB,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;QACrG,IAAI,cAAc,CAAC,YAAY;YAAE,OAAO,KAAK,CAAC;QAC9C,OAAO,cAAc,CAAC,WAAW,IAAI,IAAI,IAAI,cAAc,CAAC,WAAW,KAAK,OAAO,CAAC;IACtF,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Live terminal dashboard for parallel experiment runs.
3
+ * Uses log-update for in-place terminal rendering.
4
+ * Falls back to console.log for non-TTY environments.
5
+ */
6
+ import type { ProgressEvent, ExperimentResults, Classification } from './types.js';
7
+ /**
8
+ * Tracked state for a single experiment in the dashboard.
9
+ */
10
+ interface ExperimentState {
11
+ name: string;
12
+ agent: string;
13
+ model: string;
14
+ totalEvals: number;
15
+ completedEvals: number;
16
+ passed: number;
17
+ failed: number;
18
+ runStartTime: number | null;
19
+ phase: 'waiting' | 'running' | 'classifying' | 'done';
20
+ /** Currently in-flight eval names */
21
+ activeEvals: Set<string>;
22
+ /** Recently completed eval results (kept for display, max 3) */
23
+ recentResults: Array<{
24
+ name: string;
25
+ status: 'passed' | 'failed';
26
+ }>;
27
+ }
28
+ /**
29
+ * Live terminal dashboard that renders experiment progress in-place.
30
+ */
31
+ export declare class Dashboard {
32
+ private experiments;
33
+ private intervalId;
34
+ private experimentOrder;
35
+ private tick;
36
+ addExperiment(name: string, meta: {
37
+ agent: string;
38
+ model: string;
39
+ totalEvals: number;
40
+ }): void;
41
+ handleEvent(experimentName: string, event: ProgressEvent): void;
42
+ setPhase(experimentName: string, phase: 'classifying' | 'done'): void;
43
+ completeExperiment(experimentName: string, results: ExperimentResults, classifications: Map<string, Classification>): void;
44
+ start(): void;
45
+ stop(): void;
46
+ private render;
47
+ }
48
+ /**
49
+ * Render a permanent completed-experiment block that scrolls above the dashboard.
50
+ */
51
+ export declare function renderCompletedBlock(experimentName: string, _state: ExperimentState, results: ExperimentResults, classifications: Map<string, Classification>): string;
52
+ /**
53
+ * Console-based progress handler for non-TTY / single experiment mode.
54
+ */
55
+ export declare function createConsoleProgressHandler(context: {
56
+ experimentName: string;
57
+ model: string;
58
+ agent: string;
59
+ }): (event: ProgressEvent) => void;
60
+ export {};
61
+ //# sourceMappingURL=dashboard.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dashboard.d.ts","sourceRoot":"","sources":["../../src/lib/dashboard.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAIH,OAAO,KAAK,EACV,aAAa,EACb,iBAAiB,EACjB,cAAc,EACf,MAAM,YAAY,CAAC;AASpB;;GAEG;AACH,UAAU,eAAe;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,cAAc,EAAE,MAAM,CAAC;IACvB,MAAM,EAAE,MAAM,CAAC;IACf,MAAM,EAAE,MAAM,CAAC;IACf,YAAY,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,KAAK,EAAE,SAAS,GAAG,SAAS,GAAG,aAAa,GAAG,MAAM,CAAC;IACtD,qCAAqC;IACrC,WAAW,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IACzB,gEAAgE;IAChE,aAAa,EAAE,KAAK,CAAC;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,MAAM,EAAE,QAAQ,GAAG,QAAQ,CAAA;KAAE,CAAC,CAAC;CACrE;AAED;;GAEG;AACH,qBAAa,SAAS;IACpB,OAAO,CAAC,WAAW,CAAsC;IACzD,OAAO,CAAC,UAAU,CAA6C;IAC/D,OAAO,CAAC,eAAe,CAAgB;IACvC,OAAO,CAAC,IAAI,CAAK;IAEjB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE;QAAE,KAAK,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAC;QAAC,UAAU,EAAE,MAAM,CAAA;KAAE;IAkBtF,WAAW,CAAC,cAAc,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa;IA2BxD,QAAQ,CAAC,cAAc,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,GAAG,MAAM;IAO9D,kBAAkB,CAChB,cAAc,EAAE,MAAM,EACtB,OAAO,EAAE,iBAAiB,EAC1B,eAAe,EAAE,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC;IAY9C,KAAK;IAOL,IAAI;IAQJ,OAAO,CAAC,MAAM;CAiCf;AAuDD;;GAEG;AACH,wBAAgB,oBAAoB,CAClC,cAAc,EAAE,MAAM,EACtB,MAAM,EAAE,eAAe,EACvB,OAAO,EAAE,iBAAiB,EAC1B,eAAe,EAAE,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,GAC3C,MAAM,CA2CR;AAED;;GAEG;AACH,wBAAgB,4BAA4B,CAAC,OAAO,EAAE;IACpD,cAAc,EAAE,MAAM,CAAC;IACvB,KAAK,EAAE,MAAM,CAAC;IACd,KAAK,EAAE,MAAM,CAAC;CACf,GAAG,CAAC,KAAK,EAAE,aAAa,KAAK,IAAI,CA+BjC"}
@@ -0,0 +1,225 @@
1
+ /**
2
+ * Live terminal dashboard for parallel experiment runs.
3
+ * Uses log-update for in-place terminal rendering.
4
+ * Falls back to console.log for non-TTY environments.
5
+ */
6
+ import logUpdate from 'log-update';
7
+ import chalk from 'chalk';
8
+ import { formatRunResult, createProgressDisplay, formatResultsTable, } from './results.js';
9
+ const SPINNER = ['\u280b', '\u2819', '\u2839', '\u2838', '\u283c', '\u2834', '\u2826', '\u2827', '\u2807', '\u280f'];
10
+ /**
11
+ * Live terminal dashboard that renders experiment progress in-place.
12
+ */
13
+ export class Dashboard {
14
+ experiments = new Map();
15
+ intervalId;
16
+ experimentOrder = [];
17
+ tick = 0;
18
+ addExperiment(name, meta) {
19
+ const state = {
20
+ name,
21
+ agent: meta.agent,
22
+ model: meta.model,
23
+ totalEvals: meta.totalEvals,
24
+ completedEvals: 0,
25
+ passed: 0,
26
+ failed: 0,
27
+ runStartTime: null,
28
+ phase: 'waiting',
29
+ activeEvals: new Set(),
30
+ recentResults: [],
31
+ };
32
+ this.experiments.set(name, state);
33
+ this.experimentOrder.push(name);
34
+ }
35
+ handleEvent(experimentName, event) {
36
+ const state = this.experiments.get(experimentName);
37
+ if (!state)
38
+ return;
39
+ switch (event.type) {
40
+ case 'experiment:start':
41
+ state.totalEvals = event.totalAttempts;
42
+ state.phase = 'running';
43
+ state.runStartTime = Date.now();
44
+ break;
45
+ case 'eval:start':
46
+ state.activeEvals.add(event.evalName);
47
+ break;
48
+ case 'eval:complete':
49
+ state.activeEvals.delete(event.evalName);
50
+ state.completedEvals++;
51
+ if (event.result.status === 'passed') {
52
+ state.passed++;
53
+ }
54
+ else {
55
+ state.failed++;
56
+ }
57
+ state.recentResults.push({ name: event.evalName, status: event.result.status });
58
+ if (state.recentResults.length > 3)
59
+ state.recentResults.shift();
60
+ break;
61
+ }
62
+ }
63
+ setPhase(experimentName, phase) {
64
+ const state = this.experiments.get(experimentName);
65
+ if (state) {
66
+ state.phase = phase;
67
+ }
68
+ }
69
+ completeExperiment(experimentName, results, classifications) {
70
+ const state = this.experiments.get(experimentName);
71
+ if (!state)
72
+ return;
73
+ state.phase = 'done';
74
+ logUpdate.clear();
75
+ console.log(renderCompletedBlock(experimentName, state, results, classifications));
76
+ this.render();
77
+ }
78
+ start() {
79
+ this.intervalId = setInterval(() => {
80
+ this.tick++;
81
+ this.render();
82
+ }, 100);
83
+ }
84
+ stop() {
85
+ if (this.intervalId) {
86
+ clearInterval(this.intervalId);
87
+ this.intervalId = undefined;
88
+ }
89
+ logUpdate.clear();
90
+ }
91
+ render() {
92
+ const active = this.experimentOrder.filter((name) => this.experiments.get(name).phase !== 'done');
93
+ if (active.length === 0) {
94
+ logUpdate.clear();
95
+ return;
96
+ }
97
+ const totalExperiments = this.experimentOrder.length;
98
+ const completedExperiments = totalExperiments - active.length;
99
+ const maxNameLen = Math.max(...active.map((n) => n.length));
100
+ const nameWidth = Math.max(maxNameLen + 2, 20);
101
+ const spinner = SPINNER[this.tick % SPINNER.length];
102
+ const lines = [];
103
+ lines.push('');
104
+ lines.push(chalk.bold(` ${spinner} ${completedExperiments}/${totalExperiments} experiments complete`));
105
+ lines.push('');
106
+ for (const name of active) {
107
+ const state = this.experiments.get(name);
108
+ lines.push(renderExperimentLine(state, nameWidth));
109
+ }
110
+ lines.push('');
111
+ logUpdate(lines.join('\n'));
112
+ }
113
+ }
114
+ /**
115
+ * Render a single experiment's progress bar line.
116
+ */
117
+ function renderExperimentLine(state, nameWidth) {
118
+ const nameCol = state.name.padEnd(nameWidth);
119
+ const elapsed = state.runStartTime ? Math.round((Date.now() - state.runStartTime) / 1000) : 0;
120
+ if (state.phase === 'waiting') {
121
+ const bar = renderBar(0, 1);
122
+ return ` ${chalk.gray(nameCol)} ${bar} ${chalk.gray('waiting\u2026')}`;
123
+ }
124
+ if (state.phase === 'classifying') {
125
+ const bar = renderBar(state.totalEvals, state.totalEvals);
126
+ const stats = renderStats(state);
127
+ return ` ${chalk.cyan(nameCol)} ${bar} ${stats} ${chalk.cyan('\u00b7')} ${chalk.cyan('classifying\u2026')}`;
128
+ }
129
+ const bar = renderBar(state.completedEvals, state.totalEvals);
130
+ const stats = renderStats(state);
131
+ const time = chalk.gray(formatElapsed(elapsed));
132
+ return ` ${chalk.white(nameCol)} ${bar} ${stats} ${chalk.gray('\u00b7')} ${time}`;
133
+ }
134
+ function renderStats(state) {
135
+ const parts = [];
136
+ parts.push(chalk.white(`${state.completedEvals}/${state.totalEvals}`));
137
+ if (state.passed > 0)
138
+ parts.push(chalk.green(`${state.passed}\u2713`));
139
+ if (state.failed > 0)
140
+ parts.push(chalk.red(`${state.failed}\u2717`));
141
+ return parts.join(' ');
142
+ }
143
+ /**
144
+ * Format elapsed seconds as "Xm Ys" for >= 60s, otherwise "Xs".
145
+ */
146
+ function formatElapsed(seconds) {
147
+ if (seconds < 60)
148
+ return `${seconds}s`;
149
+ const m = Math.floor(seconds / 60);
150
+ const s = seconds % 60;
151
+ return `${m}m${s > 0 ? ` ${s}s` : ''}`;
152
+ }
153
+ /**
154
+ * Render a progress bar.
155
+ */
156
+ function renderBar(completed, total) {
157
+ const width = 20;
158
+ const filled = total > 0 ? Math.min(width, Math.round((completed / total) * width)) : 0;
159
+ const empty = width - filled;
160
+ return chalk.green('\u2501'.repeat(filled)) + chalk.gray('\u2500'.repeat(empty));
161
+ }
162
+ /**
163
+ * Render a permanent completed-experiment block that scrolls above the dashboard.
164
+ */
165
+ export function renderCompletedBlock(experimentName, _state, results, classifications) {
166
+ const lines = [];
167
+ const width = Math.min(process.stdout.columns || 80, 80);
168
+ const separator = '\u2500'.repeat(width);
169
+ lines.push(chalk.gray(separator));
170
+ const totalEvals = results.evals.length;
171
+ const passedEvals = results.evals.filter((e) => e.passedRuns > 0).length;
172
+ const elapsed = Math.round((new Date(results.completedAt).getTime() - new Date(results.startedAt).getTime()) / 1000);
173
+ const passRate = totalEvals > 0 ? Math.round((passedEvals / totalEvals) * 100) : 0;
174
+ const passColor = passRate === 100 ? chalk.green : passRate >= 50 ? chalk.yellow : chalk.red;
175
+ lines.push(` ${chalk.bold(experimentName)} ${passColor(`${passedEvals}/${totalEvals} passed (${passRate}%)`)} ${chalk.gray(formatElapsed(elapsed))}`);
176
+ const passed = results.evals.filter((e) => e.passedRuns > 0);
177
+ const failed = results.evals.filter((e) => e.passedRuns === 0);
178
+ if (passed.length > 0) {
179
+ lines.push(` ${passed.map((e) => chalk.green(`\u2713 ${e.name}`)).join(' ')}`);
180
+ }
181
+ if (failed.length > 0) {
182
+ lines.push(` ${failed.map((e) => chalk.red(`\u2717 ${e.name}`)).join(' ')}`);
183
+ }
184
+ if (failed.length > 0 && classifications.size > 0) {
185
+ for (const evalSummary of failed) {
186
+ const c = classifications.get(evalSummary.name);
187
+ if (c) {
188
+ const suffix = c.failureType !== 'model'
189
+ ? chalk.gray(c.acknowledged ? ' (kept)' : ' (removed)')
190
+ : '';
191
+ lines.push(chalk.gray(` ${evalSummary.name}: ${c.failureType} \u2014 ${c.failureReason}${suffix}`));
192
+ }
193
+ }
194
+ }
195
+ lines.push(chalk.gray(separator));
196
+ return lines.join('\n');
197
+ }
198
+ /**
199
+ * Console-based progress handler for non-TTY / single experiment mode.
200
+ */
201
+ export function createConsoleProgressHandler(context) {
202
+ return (event) => {
203
+ switch (event.type) {
204
+ case 'experiment:start':
205
+ console.log(`Starting ${event.totalAttempts} eval attempts concurrently (${event.totalEvals} evals \u00d7 ${event.totalRuns} runs)`);
206
+ break;
207
+ case 'eval:start':
208
+ console.log(createProgressDisplay(event.evalName, event.runNumber, event.totalRuns, context));
209
+ break;
210
+ case 'eval:complete':
211
+ console.log(formatRunResult(event.evalName, event.runNumber, event.totalRuns, event.result, context));
212
+ break;
213
+ case 'experiment:earlyExit':
214
+ console.log(`Early exit: ${event.evalName} passed on run ${event.runNumber}, aborting remaining attempts`);
215
+ break;
216
+ case 'experiment:saved':
217
+ console.log(`\nResults saved to: ${event.outputDir}`);
218
+ break;
219
+ case 'experiment:summary':
220
+ console.log(formatResultsTable(event.results));
221
+ break;
222
+ }
223
+ };
224
+ }
225
+ //# sourceMappingURL=dashboard.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"dashboard.js","sourceRoot":"","sources":["../../src/lib/dashboard.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,SAAS,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,MAAM,OAAO,CAAC;AAM1B,OAAO,EACL,eAAe,EACf,qBAAqB,EACrB,kBAAkB,GACnB,MAAM,cAAc,CAAC;AAEtB,MAAM,OAAO,GAAG,CAAC,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC;AAqBrH;;GAEG;AACH,MAAM,OAAO,SAAS;IACZ,WAAW,GAAG,IAAI,GAAG,EAA2B,CAAC;IACjD,UAAU,CAA6C;IACvD,eAAe,GAAa,EAAE,CAAC;IAC/B,IAAI,GAAG,CAAC,CAAC;IAEjB,aAAa,CAAC,IAAY,EAAE,IAA0D;QACpF,MAAM,KAAK,GAAoB;YAC7B,IAAI;YACJ,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,cAAc,EAAE,CAAC;YACjB,MAAM,EAAE,CAAC;YACT,MAAM,EAAE,CAAC;YACT,YAAY,EAAE,IAAI;YAClB,KAAK,EAAE,SAAS;YAChB,WAAW,EAAE,IAAI,GAAG,EAAE;YACtB,aAAa,EAAE,EAAE;SAClB,CAAC;QACF,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QAClC,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAClC,CAAC;IAED,WAAW,CAAC,cAAsB,EAAE,KAAoB;QACtD,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;QACnD,IAAI,CAAC,KAAK;YAAE,OAAO;QAEnB,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,kBAAkB;gBACrB,KAAK,CAAC,UAAU,GAAG,KAAK,CAAC,aAAa,CAAC;gBACvC,KAAK,CAAC,KAAK,GAAG,SAAS,CAAC;gBACxB,KAAK,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;gBAChC,MAAM;YACR,KAAK,YAAY;gBACf,KAAK,CAAC,WAAW,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;gBACtC,MAAM;YACR,KAAK,eAAe;gBAClB,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;gBACzC,KAAK,CAAC,cAAc,EAAE,CAAC;gBACvB,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;oBACrC,KAAK,CAAC,MAAM,EAAE,CAAC;gBACjB,CAAC;qBAAM,CAAC;oBACN,KAAK,CAAC,MAAM,EAAE,CAAC;gBACjB,CAAC;gBACD,KAAK,CAAC,aAAa,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,CAAC,QAAQ,EAAE,MAAM,EAAE,KAAK,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;gBAChF,IAAI,KAAK,CAAC,aAAa,CAAC,MAAM,GAAG,CAAC;oBAAE,KAAK,CAAC,aAAa,CAAC,KAAK,EAAE,CAAC;gBAChE,MAAM;QACV,CAAC;IACH,CAAC;IAED,QAAQ,CAAC,cAAsB,EAAE,KAA6B;QAC5D,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;QACnD,IAAI,KAAK,EAAE,CAAC;YACV,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC;QACtB,CAAC;IACH,CAAC;IAED,kBAAkB,CAChB,cAAsB,EACtB,OAA0B,EAC1B,eAA4C;QAE5C,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC;QACnD,IAAI,CAAC,KAAK;YAAE,OAAO;QAEnB,KAAK,CAAC,KAAK,GAAG,MAAM,CAAC;QAErB,SAAS,CAAC,KAAK,EAAE,CAAC;QAClB,OAAO,CAAC,GAAG,CAAC,oBAAoB,CAAC,cAAc,EAAE,KAAK,EAAE,OAAO,EAAE,eAAe,CAAC,CAAC,CAAC;QACnF,IAAI,CAAC,MAAM,EAAE,CAAC;IAChB,CAAC;IAED,KAAK;QACH,IAAI,CAAC,UAAU,GAAG,WAAW,CAAC,GAAG,EAAE;YACjC,IAAI,CAAC,IAAI,EAAE,CAAC;YACZ,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,CAAC,EAAE,GAAG,CAAC,CAAC;IACV,CAAC;IAED,IAAI;QACF,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACpB,aAAa,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC/B,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;QAC9B,CAAC;QACD,SAAS,CAAC,KAAK,EAAE,CAAC;IACpB,CAAC;IAEO,MAAM;QACZ,MAAM,MAAM,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CACxC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,KAAK,KAAK,MAAM,CACvD,CAAC;QAEF,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,SAAS,CAAC,KAAK,EAAE,CAAC;YAClB,OAAO;QACT,CAAC;QAED,MAAM,gBAAgB,GAAG,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC;QACrD,MAAM,oBAAoB,GAAG,gBAAgB,GAAG,MAAM,CAAC,MAAM,CAAC;QAE9D,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAC5D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;QAE/C,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;QAEpD,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,KAAK,CAAC,IAAI,CACR,KAAK,CAAC,IAAI,CAAC,IAAI,OAAO,IAAI,oBAAoB,IAAI,gBAAgB,uBAAuB,CAAC,CAC3F,CAAC;QACF,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAEf,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;YAC1B,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC;YAC1C,KAAK,CAAC,IAAI,CAAC,oBAAoB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC;QACrD,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACf,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IAC9B,CAAC;CACF;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,KAAsB,EAAE,SAAiB;IACrE,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IAC7C,MAAM,OAAO,GAAG,KAAK,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,KAAK,CAAC,YAAY,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAE9F,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAC5B,OAAO,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,KAAK,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;IAC1E,CAAC;IAED,IAAI,KAAK,CAAC,KAAK,KAAK,aAAa,EAAE,CAAC;QAClC,MAAM,GAAG,GAAG,SAAS,CAAC,KAAK,CAAC,UAAU,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;QACjC,OAAO,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,GAAG,KAAK,KAAK,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,EAAE,CAAC;IAC/G,CAAC;IAED,MAAM,GAAG,GAAG,SAAS,CAAC,KAAK,CAAC,cAAc,EAAE,KAAK,CAAC,UAAU,CAAC,CAAC;IAC9D,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC;IACjC,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC;IAEhD,OAAO,IAAI,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,GAAG,KAAK,KAAK,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,EAAE,CAAC;AACrF,CAAC;AAED,SAAS,WAAW,CAAC,KAAsB;IACzC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,cAAc,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IACvE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;QAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC,CAAC;IACvE,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;QAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC,MAAM,QAAQ,CAAC,CAAC,CAAC;IACrE,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACzB,CAAC;AAED;;GAEG;AACH,SAAS,aAAa,CAAC,OAAe;IACpC,IAAI,OAAO,GAAG,EAAE;QAAE,OAAO,GAAG,OAAO,GAAG,CAAC;IACvC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC;IACnC,MAAM,CAAC,GAAG,OAAO,GAAG,EAAE,CAAC;IACvB,OAAO,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;AACzC,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,SAAiB,EAAE,KAAa;IACjD,MAAM,KAAK,GAAG,EAAE,CAAC;IACjB,MAAM,MAAM,GAAG,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,SAAS,GAAG,KAAK,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACxF,MAAM,KAAK,GAAG,KAAK,GAAG,MAAM,CAAC;IAC7B,OAAO,KAAK,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC;AACnF,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,oBAAoB,CAClC,cAAsB,EACtB,MAAuB,EACvB,OAA0B,EAC1B,eAA4C;IAE5C,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;IACzD,MAAM,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAEzC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;IAElC,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC;IACxC,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;IACzE,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CACxB,CAAC,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC,OAAO,EAAE,GAAG,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,OAAO,EAAE,CAAC,GAAG,IAAI,CACzF,CAAC;IACF,MAAM,QAAQ,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,WAAW,GAAG,UAAU,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACnF,MAAM,SAAS,GAAG,QAAQ,KAAK,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC;IAE7F,KAAK,CAAC,IAAI,CACR,IAAI,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,KAAK,SAAS,CAAC,GAAG,WAAW,IAAI,UAAU,YAAY,QAAQ,IAAI,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,EAAE,CAC5I,CAAC;IAEF,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;IAC7D,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,CAAC,CAAC,CAAC;IAE/D,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAClF,CAAC;IACD,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,KAAK,CAAC,IAAI,CAAC,IAAI,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChF,CAAC;IAED,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,IAAI,eAAe,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;QAClD,KAAK,MAAM,WAAW,IAAI,MAAM,EAAE,CAAC;YACjC,MAAM,CAAC,GAAG,eAAe,CAAC,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;YAChD,IAAI,CAAC,EAAE,CAAC;gBACN,MAAM,MAAM,GAAG,CAAC,CAAC,WAAW,KAAK,OAAO;oBACtC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,YAAY,CAAC;oBACvD,CAAC,CAAC,EAAE,CAAC;gBACP,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,WAAW,CAAC,IAAI,KAAK,CAAC,CAAC,WAAW,WAAW,CAAC,CAAC,aAAa,GAAG,MAAM,EAAE,CAAC,CAAC,CAAC;YACxG,CAAC;QACH,CAAC;IACH,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC;IAClC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,4BAA4B,CAAC,OAI5C;IACC,OAAO,CAAC,KAAoB,EAAE,EAAE;QAC9B,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,kBAAkB;gBACrB,OAAO,CAAC,GAAG,CACT,YAAY,KAAK,CAAC,aAAa,gCAAgC,KAAK,CAAC,UAAU,iBAAiB,KAAK,CAAC,SAAS,QAAQ,CACxH,CAAC;gBACF,MAAM;YACR,KAAK,YAAY;gBACf,OAAO,CAAC,GAAG,CACT,qBAAqB,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC,CACjF,CAAC;gBACF,MAAM;YACR,KAAK,eAAe;gBAClB,OAAO,CAAC,GAAG,CACT,eAAe,CAAC,KAAK,CAAC,QAAQ,EAAE,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,SAAS,EAAE,KAAK,CAAC,MAAM,EAAE,OAAO,CAAC,CACzF,CAAC;gBACF,MAAM;YACR,KAAK,sBAAsB;gBACzB,OAAO,CAAC,GAAG,CACT,eAAe,KAAK,CAAC,QAAQ,kBAAkB,KAAK,CAAC,SAAS,+BAA+B,CAC9F,CAAC;gBACF,MAAM;YACR,KAAK,kBAAkB;gBACrB,OAAO,CAAC,GAAG,CAAC,uBAAuB,KAAK,CAAC,SAAS,EAAE,CAAC,CAAC;gBACtD,MAAM;YACR,KAAK,oBAAoB;gBACvB,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC/C,MAAM;QACV,CAAC;IACH,CAAC,CAAC;AACJ,CAAC"}
@@ -9,6 +9,7 @@
9
9
  interface HousekeepingStats {
10
10
  removedDuplicates: number;
11
11
  removedIncomplete: number;
12
+ removedNonModelFailures: number;
12
13
  removedEmptyDirs: number;
13
14
  }
14
15
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"housekeeping.d.ts","sourceRoot":"","sources":["../../src/lib/housekeeping.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAKH,UAAU,iBAAiB;IACzB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAED;;;;;;GAMG;AACH,wBAAgB,SAAS,CACvB,UAAU,EAAE,MAAM,EAClB,cAAc,EAAE,MAAM,EACtB,OAAO,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,OAAO,CAAA;CAAE,GAC1B,iBAAiB,CAwFnB"}
1
+ {"version":3,"file":"housekeeping.d.ts","sourceRoot":"","sources":["../../src/lib/housekeeping.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAMH,UAAU,iBAAiB;IACzB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,iBAAiB,EAAE,MAAM,CAAC;IAC1B,uBAAuB,EAAE,MAAM,CAAC;IAChC,gBAAgB,EAAE,MAAM,CAAC;CAC1B;AAED;;;;;;GAMG;AACH,wBAAgB,SAAS,CACvB,UAAU,EAAE,MAAM,EAClB,cAAc,EAAE,MAAM,EACtB,OAAO,CAAC,EAAE;IAAE,GAAG,CAAC,EAAE,OAAO,CAAA;CAAE,GAC1B,iBAAiB,CAgGnB"}
@@ -8,6 +8,7 @@
8
8
  */
9
9
  import { readdirSync, rmSync, existsSync, readFileSync, statSync } from 'fs';
10
10
  import { join } from 'path';
11
+ import { isClassifierEnabled, isNonModelFailure } from './classifier.js';
11
12
  /**
12
13
  * Run housekeeping on a single experiment's results directory.
13
14
  *
@@ -19,6 +20,7 @@ export function housekeep(resultsDir, experimentName, options) {
19
20
  const stats = {
20
21
  removedDuplicates: 0,
21
22
  removedIncomplete: 0,
23
+ removedNonModelFailures: 0,
22
24
  removedEmptyDirs: 0,
23
25
  };
24
26
  const experimentDir = join(resultsDir, experimentName);
@@ -71,12 +73,20 @@ export function housekeep(resultsDir, experimentName, options) {
71
73
  stats.removedDuplicates++;
72
74
  continue;
73
75
  }
74
- // Check if this result is complete (smoke results are always cleaned up)
75
- if (isComplete(evalResultDir) && !isSmoke(evalResultDir)) {
76
+ // Check if this result is complete
77
+ // Note: non-model failures are only cleaned up if the classifier is enabled
78
+ const isNonModel = isClassifierEnabled() && isNonModelFailure(evalResultDir);
79
+ if (isComplete(evalResultDir) && !isSmoke(evalResultDir) && !isNonModel) {
76
80
  seenEvals.add(dedupeKey);
77
81
  }
82
+ else if (isNonModel) {
83
+ if (!options?.dry) {
84
+ rmSync(evalResultDir, { recursive: true });
85
+ }
86
+ stats.removedNonModelFailures++;
87
+ }
78
88
  else {
79
- // Incomplete — remove
89
+ // Incomplete or smoke — remove
80
90
  if (!options?.dry) {
81
91
  rmSync(evalResultDir, { recursive: true });
82
92
  }
@@ -1 +1 @@
1
- {"version":3,"file":"housekeeping.js","sourceRoot":"","sources":["../../src/lib/housekeeping.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC7E,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAQ5B;;;;;;GAMG;AACH,MAAM,UAAU,SAAS,CACvB,UAAkB,EAClB,cAAsB,EACtB,OAA2B;IAE3B,MAAM,KAAK,GAAsB;QAC/B,iBAAiB,EAAE,CAAC;QACpB,iBAAiB,EAAE,CAAC;QACpB,gBAAgB,EAAE,CAAC;KACpB,CAAC;IAEF,MAAM,aAAa,GAAG,IAAI,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC;IACvD,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO,KAAK,CAAC;IAE7C,yCAAyC;IACzC,IAAI,UAAoB,CAAC;IACzB,IAAI,CAAC;QACH,UAAU,GAAG,WAAW,CAAC,aAAa,CAAC;aACpC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;aACjC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;YACZ,IAAI,CAAC;gBACH,OAAO,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;YACxD,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC,CAAC;aACD,IAAI,EAAE;aACN,OAAO,EAAE,CAAC;IACf,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;IAED,0EAA0E;IAC1E,uEAAuE;IACvE,+CAA+C;IAC/C,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;IAEpC,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC;QAE7C,IAAI,QAAkB,CAAC;QACvB,IAAI,CAAC;YACH,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;QAED,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;YAE3C,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,WAAW,EAAE;gBAAE,SAAS;YAErD,yEAAyE;YACzE,MAAM,WAAW,GAAG,eAAe,CAAC,aAAa,CAAC,CAAC;YACnD,MAAM,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,OAAO,IAAI,WAAW,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;YAEtE,IAAI,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC7B,iDAAiD;gBACjD,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC;oBAClB,MAAM,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC7C,CAAC;gBACD,KAAK,CAAC,iBAAiB,EAAE,CAAC;gBAC1B,SAAS;YACX,CAAC;YAED,yEAAyE;YACzE,IAAI,UAAU,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,EAAE,CAAC;gBACzD,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACN,sBAAsB;gBACtB,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC;oBAClB,MAAM,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC7C,CAAC;gBACD,KAAK,CAAC,iBAAiB,EAAE,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,sCAAsC;QACtC,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;YACvE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC3B,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC;oBAClB,MAAM,CAAC,KAAK,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBACrC,CAAC;gBACD,KAAK,CAAC,gBAAgB,EAAE,CAAC;YAC3B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,4CAA4C;QAC9C,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,OAAO,CAAC,aAAqB;IACpC,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,aAAa,EAAE,cAAc,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;QACvF,OAAO,OAAO,CAAC,KAAK,KAAK,IAAI,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,aAAqB;IAC5C,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,aAAa,EAAE,cAAc,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;QACvF,OAAO,OAAO,CAAC,WAAW,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,UAAU,CAAC,aAAqB;IACvC,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,EAAE,cAAc,CAAC,CAAC;IACxD,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC;QAAE,OAAO,KAAK,CAAC;IAE3C,oCAAoC;IACpC,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,WAAW,CAAC,aAAa,CAAC,CAAC;QAC3C,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC;gBAAE,SAAS;YACxC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;YAC1C,IACE,UAAU,CAAC,IAAI,CAAC,MAAM,EAAE,sBAAsB,CAAC,CAAC;gBAChD,UAAU,CAAC,IAAI,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAC,EAC3C,CAAC;gBACD,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;IAED,iDAAiD;IACjD,wFAAwF;IACxF,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC;QAC/D,OAAO,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"housekeeping.js","sourceRoot":"","sources":["../../src/lib/housekeeping.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,EAAE,UAAU,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAC7E,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,mBAAmB,EAAE,iBAAiB,EAAE,MAAM,iBAAiB,CAAC;AASzE;;;;;;GAMG;AACH,MAAM,UAAU,SAAS,CACvB,UAAkB,EAClB,cAAsB,EACtB,OAA2B;IAE3B,MAAM,KAAK,GAAsB;QAC/B,iBAAiB,EAAE,CAAC;QACpB,iBAAiB,EAAE,CAAC;QACpB,uBAAuB,EAAE,CAAC;QAC1B,gBAAgB,EAAE,CAAC;KACpB,CAAC;IAEF,MAAM,aAAa,GAAG,IAAI,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC;IACvD,IAAI,CAAC,UAAU,CAAC,aAAa,CAAC;QAAE,OAAO,KAAK,CAAC;IAE7C,yCAAyC;IACzC,IAAI,UAAoB,CAAC;IACzB,IAAI,CAAC;QACH,UAAU,GAAG,WAAW,CAAC,aAAa,CAAC;aACpC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;aACjC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;YACZ,IAAI,CAAC;gBACH,OAAO,QAAQ,CAAC,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;YACxD,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,KAAK,CAAC;YACf,CAAC;QACH,CAAC,CAAC;aACD,IAAI,EAAE;aACN,OAAO,EAAE,CAAC;IACf,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;IAED,0EAA0E;IAC1E,uEAAuE;IACvE,+CAA+C;IAC/C,MAAM,SAAS,GAAG,IAAI,GAAG,EAAU,CAAC;IAEpC,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,EAAE,SAAS,CAAC,CAAC;QAE7C,IAAI,QAAkB,CAAC;QACvB,IAAI,CAAC;YACH,QAAQ,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;QAClE,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;QAED,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;YAE3C,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC,WAAW,EAAE;gBAAE,SAAS;YAErD,yEAAyE;YACzE,MAAM,WAAW,GAAG,eAAe,CAAC,aAAa,CAAC,CAAC;YACnD,MAAM,SAAS,GAAG,WAAW,CAAC,CAAC,CAAC,GAAG,OAAO,IAAI,WAAW,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;YAEtE,IAAI,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC7B,iDAAiD;gBACjD,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC;oBAClB,MAAM,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC7C,CAAC;gBACD,KAAK,CAAC,iBAAiB,EAAE,CAAC;gBAC1B,SAAS;YACX,CAAC;YAED,mCAAmC;YACnC,4EAA4E;YAC5E,MAAM,UAAU,GAAG,mBAAmB,EAAE,IAAI,iBAAiB,CAAC,aAAa,CAAC,CAAC;YAC7E,IAAI,UAAU,CAAC,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;gBACxE,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;YAC3B,CAAC;iBAAM,IAAI,UAAU,EAAE,CAAC;gBACtB,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC;oBAClB,MAAM,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC7C,CAAC;gBACD,KAAK,CAAC,uBAAuB,EAAE,CAAC;YAClC,CAAC;iBAAM,CAAC;gBACN,+BAA+B;gBAC/B,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC;oBAClB,MAAM,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC7C,CAAC;gBACD,KAAK,CAAC,iBAAiB,EAAE,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,sCAAsC;QACtC,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC;YACvE,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC3B,IAAI,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC;oBAClB,MAAM,CAAC,KAAK,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;gBACrC,CAAC;gBACD,KAAK,CAAC,gBAAgB,EAAE,CAAC;YAC3B,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,4CAA4C;QAC9C,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,OAAO,CAAC,aAAqB;IACpC,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,aAAa,EAAE,cAAc,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;QACvF,OAAO,OAAO,CAAC,KAAK,KAAK,IAAI,CAAC;IAChC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,aAAqB;IAC5C,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,CAAC,aAAa,EAAE,cAAc,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;QACvF,OAAO,OAAO,CAAC,WAAW,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,SAAS,UAAU,CAAC,aAAqB;IACvC,MAAM,WAAW,GAAG,IAAI,CAAC,aAAa,EAAE,cAAc,CAAC,CAAC;IACxD,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC;QAAE,OAAO,KAAK,CAAC;IAE3C,oCAAoC;IACpC,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,WAAW,CAAC,aAAa,CAAC,CAAC;QAC3C,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,CAAC;gBAAE,SAAS;YACxC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,EAAE,KAAK,CAAC,CAAC;YAC1C,IACE,UAAU,CAAC,IAAI,CAAC,MAAM,EAAE,sBAAsB,CAAC,CAAC;gBAChD,UAAU,CAAC,IAAI,CAAC,MAAM,EAAE,iBAAiB,CAAC,CAAC,EAC3C,CAAC;gBACD,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;IAED,iDAAiD;IACjD,wFAAwF;IACxF,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,WAAW,EAAE,OAAO,CAAC,CAAC,CAAC;QAC/D,OAAO,OAAO,CAAC,SAAS,GAAG,CAAC,CAAC;IAC/B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../../src/lib/init.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,mBAAmB;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AA+SD;;GAEG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,WAAW,GAAG,MAAM,CA0BxD;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,MAAM,CAavF"}
1
+ {"version":3,"file":"init.d.ts","sourceRoot":"","sources":["../../src/lib/init.ts"],"names":[],"mappings":"AAAA;;GAEG;AAMH;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,mBAAmB;IACnB,IAAI,EAAE,MAAM,CAAC;IACb,+DAA+D;IAC/D,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAwTD;;GAEG;AACH,wBAAgB,WAAW,CAAC,OAAO,EAAE,WAAW,GAAG,MAAM,CA0BxD;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CAAC,UAAU,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM,GAAG,MAAM,CAavF"}
package/dist/lib/init.js CHANGED
@@ -25,20 +25,29 @@ function getPackageJson(projectName) {
25
25
  * Get the .env.example template.
26
26
  */
27
27
  function getEnvExample() {
28
- return `# Required - Vercel AI Gateway API key (works for all agents)
29
- # Get yours at: https://vercel.com/dashboard -> AI Gateway
28
+ return `# API Keys (choose one based on your agent)
29
+ # For vercel-ai-gateway agents or to enable failure classification:
30
30
  AI_GATEWAY_API_KEY=your-ai-gateway-api-key
31
31
 
32
- # Required - Vercel token for sandbox access (choose ONE of the options below)
32
+ # For direct Claude Code API:
33
+ # ANTHROPIC_API_KEY=sk-ant-...
34
+
35
+ # For direct OpenAI Codex API:
36
+ # OPENAI_API_KEY=sk-proj-...
37
+
38
+ # Sandbox access - Required (choose ONE of the options below)
33
39
  # The @vercel/sandbox package automatically detects either token.
34
40
 
35
- # Option 1: Personal Access Token (for local development)
41
+ # Option 1: Vercel Token (for local development)
36
42
  # Create at: https://vercel.com/account/tokens
37
43
  VERCEL_TOKEN=your-vercel-token
38
44
 
39
45
  # Option 2: OIDC Token (for CI/CD pipelines like GitHub Actions)
40
46
  # Automatically provided by Vercel's CI integration
41
47
  # VERCEL_OIDC_TOKEN=your-oidc-token
48
+
49
+ # Alternative: Use Docker instead of Vercel sandbox (no token needed)
50
+ # Set sandbox: 'docker' in your experiment config
42
51
  `;
43
52
  }
44
53
  /**
@@ -74,9 +83,9 @@ Test AI coding agents to measure what actually works.
74
83
  cp .env.example .env.local
75
84
  \`\`\`
76
85
 
77
- Edit \`.env.local\` and add your API keys:
78
- - \`AI_GATEWAY_API_KEY\` - Vercel AI Gateway API key ([get yours](https://vercel.com/dashboard))
79
- - \`VERCEL_TOKEN\` - Vercel personal access token ([create one](https://vercel.com/account/tokens))
86
+ Edit \`.env.local\` and add your API keys (see comments in \`.env.example\` for options):
87
+ - **Choose ONE agent key**: \`AI_GATEWAY_API_KEY\` (for Vercel agents), \`ANTHROPIC_API_KEY\`, or \`OPENAI_API_KEY\`
88
+ - **Choose ONE sandbox option**: \`VERCEL_TOKEN\`, \`VERCEL_OIDC_TOKEN\`, or use Docker (set \`sandbox: 'docker'\` in config)
80
89
 
81
90
  ## Running Evals
82
91
 
@@ -1 +1 @@
1
- {"version":3,"file":"init.js","sourceRoot":"","sources":["../../src/lib/init.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAC1D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACrC,OAAO,GAAG,MAAM,oBAAoB,CAAC,OAAO,IAAI,EAAE,MAAM,EAAE,CAAC;AAoB3D;;GAEG;AACH,SAAS,cAAc,CAAC,WAAmB;IACzC,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,IAAI,EAAE,WAAW;QACjB,OAAO,EAAE,OAAO;QAChB,OAAO,EAAE,IAAI;QACb,IAAI,EAAE,QAAQ;QACd,eAAe,EAAE;YACf,oBAAoB,EAAE,IAAI,GAAG,CAAC,OAAO,EAAE;YACvC,aAAa,EAAE,SAAS;YACxB,UAAU,EAAE,QAAQ;YACpB,MAAM,EAAE,QAAQ;SACjB;KACF,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,aAAa;IACpB,OAAO;;;;;;;;;;;;;;CAcR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,YAAY;IACnB,OAAO;;;;;;;CAOR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,SAAS;IAChB,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsDR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,eAAe;IACtB,OAAO;;;;;;;;;;;CAWR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB;IACzB,OAAO;;;;;;;;;;;CAWR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB;IACvB,OAAO;;;;;;CAMR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,cAAc;IACrB,OAAO;;;;;;;;;;;;;CAaR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB;IAC5B,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,IAAI,EAAE,cAAc;QACpB,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE;YACP,KAAK,EAAE,KAAK;SACb;QACD,YAAY,EAAE;YACZ,KAAK,EAAE,SAAS;SACjB;QACD,eAAe,EAAE;YACf,cAAc,EAAE,SAAS;YACzB,UAAU,EAAE,QAAQ;YACpB,MAAM,EAAE,QAAQ;SACjB;KACF,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,eAAe;IACtB,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,eAAe,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,MAAM,EAAE,UAAU;YAClB,gBAAgB,EAAE,UAAU;YAC5B,MAAM,EAAE,IAAI;YACZ,YAAY,EAAE,IAAI;YAClB,MAAM,EAAE,IAAI;YACZ,GAAG,EAAE,CAAC,QAAQ,CAAC;SAChB;QACD,OAAO,EAAE,CAAC,aAAa,CAAC;KACzB,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB;IACzB,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,eAAe,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,MAAM,EAAE,QAAQ;YAChB,gBAAgB,EAAE,SAAS;YAC3B,GAAG,EAAE,WAAW;YAChB,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,MAAM;YACd,YAAY,EAAE,IAAI;SACnB;QACD,OAAO,EAAE,CAAC,KAAK,CAAC;KACjB,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,aAAa;IACpB,OAAO;;;;;;;;;;CAUR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,WAAmB;IAC3C,OAAO;QACL,EAAE,IAAI,EAAE,cAAc,EAAE,OAAO,EAAE,cAAc,CAAC,WAAW,CAAC,EAAE;QAC9D,EAAE,IAAI,EAAE,eAAe,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE;QACrD,EAAE,IAAI,EAAE,cAAc,EAAE,OAAO,EAAE,aAAa,EAAE,EAAE;QAClD,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,YAAY,EAAE,EAAE;QAC/C,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE;QAC3C,EAAE,IAAI,EAAE,mBAAmB,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE;QACzD,EAAE,IAAI,EAAE,sBAAsB,EAAE,OAAO,EAAE,kBAAkB,EAAE,EAAE;QAC/D,EAAE,IAAI,EAAE,8BAA8B,EAAE,OAAO,EAAE,gBAAgB,EAAE,EAAE;QACrE,EAAE,IAAI,EAAE,4BAA4B,EAAE,OAAO,EAAE,cAAc,EAAE,EAAE;QACjE,EAAE,IAAI,EAAE,iCAAiC,EAAE,OAAO,EAAE,qBAAqB,EAAE,EAAE;QAC7E,EAAE,IAAI,EAAE,kCAAkC,EAAE,OAAO,EAAE,kBAAkB,EAAE,EAAE;QAC3E,EAAE,IAAI,EAAE,gCAAgC,EAAE,OAAO,EAAE,aAAa,EAAE,EAAE;KACrE,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,OAAoB;IAC9C,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACrD,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IAEjD,oCAAoC;IACpC,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CAAC,6BAA6B,UAAU,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,2BAA2B;IAC3B,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE3C,2BAA2B;IAC3B,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAC7C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;QAElC,4BAA4B;QAC5B,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAExC,aAAa;QACb,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,UAAkB,EAAE,WAAmB;IAC7E,OAAO;sBACa,UAAU;;;UAGtB,WAAW;;;;;;;CAOpB,CAAC;AACF,CAAC"}
1
+ {"version":3,"file":"init.js","sourceRoot":"","sources":["../../src/lib/init.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,SAAS,EAAE,aAAa,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAC1D,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,MAAM,CAAC;AACrC,OAAO,GAAG,MAAM,oBAAoB,CAAC,OAAO,IAAI,EAAE,MAAM,EAAE,CAAC;AAoB3D;;GAEG;AACH,SAAS,cAAc,CAAC,WAAmB;IACzC,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,IAAI,EAAE,WAAW;QACjB,OAAO,EAAE,OAAO;QAChB,OAAO,EAAE,IAAI;QACb,IAAI,EAAE,QAAQ;QACd,eAAe,EAAE;YACf,oBAAoB,EAAE,IAAI,GAAG,CAAC,OAAO,EAAE;YACvC,aAAa,EAAE,SAAS;YACxB,UAAU,EAAE,QAAQ;YACpB,MAAM,EAAE,QAAQ;SACjB;KACF,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,aAAa;IACpB,OAAO;;;;;;;;;;;;;;;;;;;;;;;CAuBR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,YAAY;IACnB,OAAO;;;;;;;CAOR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,SAAS;IAChB,OAAO;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAsDR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,eAAe;IACtB,OAAO;;;;;;;;;;;CAWR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB;IACzB,OAAO;;;;;;;;;;;CAWR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB;IACvB,OAAO;;;;;;CAMR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,cAAc;IACrB,OAAO;;;;;;;;;;;;;CAaR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,qBAAqB;IAC5B,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,IAAI,EAAE,cAAc;QACpB,IAAI,EAAE,QAAQ;QACd,OAAO,EAAE;YACP,KAAK,EAAE,KAAK;SACb;QACD,YAAY,EAAE;YACZ,KAAK,EAAE,SAAS;SACjB;QACD,eAAe,EAAE;YACf,cAAc,EAAE,SAAS;YACzB,UAAU,EAAE,QAAQ;YACpB,MAAM,EAAE,QAAQ;SACjB;KACF,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,eAAe;IACtB,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,eAAe,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,MAAM,EAAE,UAAU;YAClB,gBAAgB,EAAE,UAAU;YAC5B,MAAM,EAAE,IAAI;YACZ,YAAY,EAAE,IAAI;YAClB,MAAM,EAAE,IAAI;YACZ,GAAG,EAAE,CAAC,QAAQ,CAAC;SAChB;QACD,OAAO,EAAE,CAAC,aAAa,CAAC;KACzB,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,kBAAkB;IACzB,OAAO,IAAI,CAAC,SAAS,CACnB;QACE,eAAe,EAAE;YACf,MAAM,EAAE,QAAQ;YAChB,MAAM,EAAE,QAAQ;YAChB,gBAAgB,EAAE,SAAS;YAC3B,GAAG,EAAE,WAAW;YAChB,MAAM,EAAE,IAAI;YACZ,MAAM,EAAE,MAAM;YACd,YAAY,EAAE,IAAI;SACnB;QACD,OAAO,EAAE,CAAC,KAAK,CAAC;KACjB,EACD,IAAI,EACJ,CAAC,CACF,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,aAAa;IACpB,OAAO;;;;;;;;;;CAUR,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,WAAmB;IAC3C,OAAO;QACL,EAAE,IAAI,EAAE,cAAc,EAAE,OAAO,EAAE,cAAc,CAAC,WAAW,CAAC,EAAE;QAC9D,EAAE,IAAI,EAAE,eAAe,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE;QACrD,EAAE,IAAI,EAAE,cAAc,EAAE,OAAO,EAAE,aAAa,EAAE,EAAE;QAClD,EAAE,IAAI,EAAE,YAAY,EAAE,OAAO,EAAE,YAAY,EAAE,EAAE;QAC/C,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,SAAS,EAAE,EAAE;QAC3C,EAAE,IAAI,EAAE,mBAAmB,EAAE,OAAO,EAAE,eAAe,EAAE,EAAE;QACzD,EAAE,IAAI,EAAE,sBAAsB,EAAE,OAAO,EAAE,kBAAkB,EAAE,EAAE;QAC/D,EAAE,IAAI,EAAE,8BAA8B,EAAE,OAAO,EAAE,gBAAgB,EAAE,EAAE;QACrE,EAAE,IAAI,EAAE,4BAA4B,EAAE,OAAO,EAAE,cAAc,EAAE,EAAE;QACjE,EAAE,IAAI,EAAE,iCAAiC,EAAE,OAAO,EAAE,qBAAqB,EAAE,EAAE;QAC7E,EAAE,IAAI,EAAE,kCAAkC,EAAE,OAAO,EAAE,kBAAkB,EAAE,EAAE;QAC3E,EAAE,IAAI,EAAE,gCAAgC,EAAE,OAAO,EAAE,aAAa,EAAE,EAAE;KACrE,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,WAAW,CAAC,OAAoB;IAC9C,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IACrD,MAAM,UAAU,GAAG,IAAI,CAAC,SAAS,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IAEjD,oCAAoC;IACpC,IAAI,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC3B,MAAM,IAAI,KAAK,CAAC,6BAA6B,UAAU,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,2BAA2B;IAC3B,SAAS,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAE3C,2BAA2B;IAC3B,MAAM,KAAK,GAAG,gBAAgB,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IAC7C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;QAElC,4BAA4B;QAC5B,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAExC,aAAa;QACb,aAAa,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,UAAkB,EAAE,WAAmB;IAC7E,OAAO;sBACa,UAAU;;;UAGtB,WAAW;;;;;;;CAOpB,CAAC;AACF,CAAC"}
@@ -25,11 +25,6 @@ export interface SaveResultsOptions {
25
25
  experimentName: string;
26
26
  /** Per-eval fingerprints (eval name -> fingerprint hash) */
27
27
  fingerprints?: Record<string, string>;
28
- /** Per-eval classification results (eval name -> classification) */
29
- classifications?: Record<string, {
30
- failureType: string;
31
- failureReason: string;
32
- }>;
33
28
  /** Per-eval validity flags (eval name -> valid). Defaults to true. */
34
29
  validity?: Record<string, boolean>;
35
30
  /** Whether this is a smoke test run. Smoke results are excluded from reuse. */
@@ -58,11 +53,19 @@ export declare function formatResultsTable(results: ExperimentResults): string;
58
53
  /**
59
54
  * Format a single eval result for terminal display (used during progress).
60
55
  */
61
- export declare function formatRunResult(evalName: string, runNumber: number, totalRuns: number, result: EvalRunResult): string;
56
+ export declare function formatRunResult(evalName: string, runNumber: number, totalRuns: number, result: EvalRunResult, context?: {
57
+ experimentName?: string;
58
+ model?: string;
59
+ agent?: string;
60
+ }): string;
62
61
  /**
63
62
  * Create a progress indicator for running evals.
64
63
  */
65
- export declare function createProgressDisplay(evalName: string, runNumber: number, totalRuns: number): string;
64
+ export declare function createProgressDisplay(evalName: string, runNumber: number, totalRuns: number, context?: {
65
+ experimentName?: string;
66
+ model?: string;
67
+ agent?: string;
68
+ }): string;
66
69
  /**
67
70
  * A reusable result found by the scanner.
68
71
  */
@@ -1 +1 @@
1
- {"version":3,"file":"results.d.ts","sourceRoot":"","sources":["../../src/lib/results.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,KAAK,EACV,aAAa,EACb,WAAW,EACX,WAAW,EACX,iBAAiB,EACjB,wBAAwB,EACzB,MAAM,YAAY,CAAC;AACpB,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAGxD;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,WAAW,EAAE,cAAc,GAAG,WAAW,CA4BjF;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,GAAG,WAAW,CAanF;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,wBAAwB,EAChC,KAAK,EAAE,WAAW,EAAE,EACpB,SAAS,EAAE,IAAI,EACf,WAAW,EAAE,IAAI,GAChB,iBAAiB,CAOnB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,iCAAiC;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,cAAc,EAAE,MAAM,CAAC;IACvB,4DAA4D;IAC5D,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC,oEAAoE;IACpE,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE;QAAE,WAAW,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,MAAM,CAAA;KAAE,CAAC,CAAC;IACjF,sEAAsE;IACtE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnC,+EAA+E;IAC/E,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,WAAW,CACzB,OAAO,EAAE,iBAAiB,EAC1B,OAAO,EAAE,kBAAkB,GAC1B,MAAM,CAqHR;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,iBAAiB,GAAG,MAAM,CAsCrE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,aAAa,GACpB,MAAM,CAYR;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACnC,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,GAChB,MAAM,CAER;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;;;;;;;GASG;AACH,wBAAgB,mBAAmB,CACjC,UAAU,EAAE,MAAM,EAClB,cAAc,EAAE,MAAM,EACtB,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GACnC,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,CAiE7B"}
1
+ {"version":3,"file":"results.d.ts","sourceRoot":"","sources":["../../src/lib/results.ts"],"names":[],"mappings":"AAAA;;GAEG;AAKH,OAAO,KAAK,EACV,aAAa,EACb,WAAW,EACX,WAAW,EACX,iBAAiB,EACjB,wBAAwB,EACzB,MAAM,YAAY,CAAC;AACpB,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AAIxD;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,WAAW,EAAE,cAAc,GAAG,WAAW,CA4BjF;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,WAAW,EAAE,GAAG,WAAW,CAanF;AAED;;GAEG;AACH,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,wBAAwB,EAChC,KAAK,EAAE,WAAW,EAAE,EACpB,SAAS,EAAE,IAAI,EACf,WAAW,EAAE,IAAI,GAChB,iBAAiB,CAOnB;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IACjC,iCAAiC;IACjC,UAAU,EAAE,MAAM,CAAC;IACnB,8CAA8C;IAC9C,cAAc,EAAE,MAAM,CAAC;IACvB,4DAA4D;IAC5D,YAAY,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACtC,sEAAsE;IACtE,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnC,+EAA+E;IAC/E,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,WAAW,CACzB,OAAO,EAAE,iBAAiB,EAC1B,OAAO,EAAE,kBAAkB,GAC1B,MAAM,CAiHR;AAED;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,iBAAiB,GAAG,MAAM,CAsCrE;AAED;;GAEG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,MAAM,EAAE,aAAa,EACrB,OAAO,CAAC,EAAE;IAAE,cAAc,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GACpE,MAAM,CAgBR;AAED;;GAEG;AACH,wBAAgB,qBAAqB,CACnC,QAAQ,EAAE,MAAM,EAChB,SAAS,EAAE,MAAM,EACjB,SAAS,EAAE,MAAM,EACjB,OAAO,CAAC,EAAE;IAAE,cAAc,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,MAAM,CAAA;CAAE,GACpE,MAAM,CAKR;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,MAAM,CAAC;IACjB,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;;;;;;;;GASG;AACH,wBAAgB,mBAAmB,CACjC,UAAU,EAAE,MAAM,EAClB,cAAc,EAAE,MAAM,EACtB,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GACnC,GAAG,CAAC,MAAM,EAAE,cAAc,CAAC,CAwE7B"}
@@ -5,6 +5,7 @@ import { mkdirSync, writeFileSync, readdirSync, readFileSync, existsSync, statSy
5
5
  import { join } from 'path';
6
6
  import chalk from 'chalk';
7
7
  import { parseTranscript } from './o11y/index.js';
8
+ import { isNonModelFailure } from './classifier.js';
8
9
  /**
9
10
  * Convert AgentRunResult to EvalRunData (result + transcript).
10
11
  */
@@ -87,7 +88,6 @@ export function saveResults(results, options) {
87
88
  mkdirSync(evalDir, { recursive: true });
88
89
  // Save summary (simplified format per design)
89
90
  const fingerprint = options.fingerprints?.[evalSummary.name];
90
- const classification = options.classifications?.[evalSummary.name];
91
91
  const valid = options.validity?.[evalSummary.name];
92
92
  const summaryForFile = {
93
93
  totalRuns: evalSummary.totalRuns,
@@ -98,9 +98,6 @@ export function saveResults(results, options) {
98
98
  if (fingerprint) {
99
99
  summaryForFile.fingerprint = fingerprint;
100
100
  }
101
- if (classification) {
102
- summaryForFile.classification = classification;
103
- }
104
101
  if (valid === false) {
105
102
  summaryForFile.valid = false;
106
103
  }
@@ -198,21 +195,28 @@ export function formatResultsTable(results) {
198
195
  /**
199
196
  * Format a single eval result for terminal display (used during progress).
200
197
  */
201
- export function formatRunResult(evalName, runNumber, totalRuns, result) {
198
+ export function formatRunResult(evalName, runNumber, totalRuns, result, context) {
202
199
  const icon = result.status === 'passed' ? '✓' : '✗';
203
200
  const color = result.status === 'passed' ? chalk.green : chalk.red;
204
- let line = color(`${icon} ${evalName} [${runNumber}/${totalRuns}]`);
201
+ const prefix = context?.experimentName ? `${context.experimentName}/${evalName}` : evalName;
202
+ let line = color(`${icon} ${prefix} [${runNumber}/${totalRuns}]`);
203
+ if (context?.model || context?.agent) {
204
+ line += chalk.gray(` (${[context.agent, context.model].filter(Boolean).join(' · ')})`);
205
+ }
205
206
  line += chalk.gray(` (${result.duration.toFixed(1)}s)`);
206
207
  if (result.error) {
207
- line += chalk.red(` - ${result.error.slice(0, 50)}${result.error.length > 50 ? '...' : ''}`);
208
+ line += chalk.red(` - ${result.error.slice(0, 200)}${result.error.length > 200 ? '...' : ''}`);
208
209
  }
209
210
  return line;
210
211
  }
211
212
  /**
212
213
  * Create a progress indicator for running evals.
213
214
  */
214
- export function createProgressDisplay(evalName, runNumber, totalRuns) {
215
- return chalk.blue(`Running ${evalName} [${runNumber}/${totalRuns}]...`);
215
+ export function createProgressDisplay(evalName, runNumber, totalRuns, context) {
216
+ const prefix = context?.experimentName ? `${context.experimentName}/${evalName}` : evalName;
217
+ const meta = [context?.agent, context?.model].filter(Boolean).join(' · ');
218
+ const suffix = meta ? ` [${meta}]` : '';
219
+ return chalk.blue(`Running ${prefix} [${runNumber}/${totalRuns}]${suffix}...`);
216
220
  }
217
221
  /**
218
222
  * Scan existing results for an experiment to find reusable eval results.
@@ -271,9 +275,16 @@ export function scanReusableResults(resultsDir, experimentName, fingerprints) {
271
275
  // Skip smoke test results
272
276
  if (summary.smoke === true)
273
277
  continue;
278
+ // Skip non-model failures (infra/timeout) — they should be re-run
279
+ if (isNonModelFailure(join(tsDir, evalDir)))
280
+ continue;
274
281
  // Check that it has completed runs (use --force to re-run failures)
275
282
  if (summary.totalRuns <= 0)
276
283
  continue;
284
+ // Unclassified failures (0% with no classification.json) are not reusable —
285
+ // they were never properly processed (e.g. interrupted run) and need re-running.
286
+ if (summary.passedRuns === 0 && !existsSync(join(tsDir, evalDir, 'classification.json')))
287
+ continue;
277
288
  reusable.set(evalDir, {
278
289
  evalName: evalDir,
279
290
  fingerprint: summary.fingerprint,