@m4trix/evals 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli-simple.cjs +1075 -0
- package/dist/cli-simple.cjs.map +1 -0
- package/dist/cli-simple.d.cts +1 -0
- package/dist/cli-simple.d.ts +1 -0
- package/dist/cli-simple.js +1072 -0
- package/dist/cli-simple.js.map +1 -0
- package/dist/cli.cjs +1981 -0
- package/dist/cli.cjs.map +1 -0
- package/dist/cli.d.cts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +1974 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.cjs +1184 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +347 -0
- package/dist/index.d.ts +347 -0
- package/dist/index.js +1165 -0
- package/dist/index.js.map +1 -0
- package/package.json +53 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/cli/data.mock.json","../src/cli/state.ts","../src/evals/test-case.ts","../src/evals/evaluator.ts","../src/evals/dataset.ts","../src/evals/metric.ts","../src/evals/score.ts","../src/evals/metrics/standard.ts","../src/evals/scores/standard.ts","../src/runner/api.ts","../src/runner/config.ts","../src/runner/discovery.ts","../src/runner/execution.ts","../src/runner/score-utils.ts","../src/runner/persistence.ts","../src/runner/search.ts"],"names":["registry","Effect","Queue","resolve","loaded","matchesAny"],"mappings":";AAAA;AAAA,EACE,UAAY;AAAA,IACV;AAAA,MACE,IAAM;AAAA,MACN,MAAQ;AAAA,MACR,UAAY;AAAA,MACZ,MAAQ;AAAA,QACN;AAAA,UACE,IAAM;AAAA,UACN,OAAS;AAAA,UACT,QAAU;AAAA,UACV,aAAe;AAAA,YACb,UAAY;AAAA,YACZ,UAAY;AAAA,YACZ,cAAgB;AAAA,YAChB,cAAgB;AAAA,YAChB,WAAa;AAAA,YACb,WAAa;AAAA,YACb,SAAW;AAAA,YACX,kBAAoB,CAAC,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,GAAG;AAAA,UACjF;AAAA,UACA,YAAc;AAAA,YACZ,EAAE,MAAQ,eAAe,OAAS,GAAG;AAAA,YACrC,EAAE,MAAQ,gBAAgB,OAAS,GAAG;AAAA,YACtC,EAAE,MAAQ,WAAW,OAAS,GAAG;AAAA,YACjC,EAAE,MAAQ,SAAS,OAAS,GAAG;AAAA,UACjC;AAAA,UACA,QAAU;AAAA,YACR,EAAE,MAAQ,eAAe,QAAU,OAAO,QAAU,eAAe;AAAA,YACnE,EAAE,MAAQ,cAAc,QAAU,MAAM,QAAU,eAAe;AAAA,YACjE,EAAE,MAAQ,YAAY,QAAU,KAAK;AAAA,YACrC,EAAE,MAAQ,aAAa,QAAU,KAAK;AAAA,UACxC;AAAA,UACA,UAAY;AAAA,YACV,EAAE,OAAS,2DAAsD;AAAA,YACjE,EAAE,OAAS,2CAAsC;AAAA,UACnD;AAAA,UACA,MAAQ;AAAA,YACN,OAAS;AAAA,YACT,UAAY;AAAA,YACZ,QAAU;AAAA,YACV,QAAU;AAAA,YACV,MAAQ;AAAA,YACR,aAAe;AAAA,YACf,UAAY;AAAA,YACZ,UAAY;AAAA,UACd;AAAA,QACF;AAAA,QACA;AAAA,UACE,IAAM;AAAA,UACN,OAAS;AAAA,UACT,QAAU;AAAA,UACV,aAAe;AAAA,YACb,UAAY;AAAA,YACZ,UAAY;AAAA,YACZ,cAAgB;AAAA,YAChB,cAAgB;AAAA,YAChB,WAAa;AAAA,YACb,WAAa;AAAA,YACb,SAAW;AAAA,YACX,kBAAoB,CAAC,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,GAAG;AAAA,UACjF;AAAA,UACA,YAAc;AAAA,YACZ,EAAE,MAAQ,eAAe,OAAS,GAAG;AAAA,YACrC,EAAE,MAAQ,gBAAgB,OAAS,GAAG;AAAA,YACtC,EAAE,MAAQ,WAAW,OAAS,GAAG;AAAA,YACjC,EAAE,MAAQ,SAAS,OAAS,GAAG;AAAA,UACjC;AAAA,UACA,QAAU;AAAA,YACR,EAAE,MAAQ,eAAe,QAAU,MAAM,QAAU,eAAe;AAAA,YAClE,EAAE,MAAQ,cAAc,QAAU,MAAM,QAAU,eAAe;AAAA,YACjE,EAAE,MAAQ,YAAY,QAAU,KAAK;AAAA,YACrC,EAAE,MAAQ,aAAa,QAAU,KAAK;AAAA,UACxC;AAAA,UACA,UAAY,CAAC;AAAA,UACb,MAAQ;AAAA,YACN,OAAS;AAAA,YACT,UAAY;AAAA,YACZ,QAAU;AAAA,YACV,QAAU;AAAA,YACV,MAAQ;AAAA,YACR,aAAe;AAAA,YACf,UAAY;AAAA,YACZ,UAAY;AAAA,UACd;AAAA,QACF;AAAA,QACA;AAAA,UACE,IAAM;AAAA,UACN,OAAS;AAAA,UACT,QAAU;AAAA,UACV,aAAe;AAAA,YACb,UAAY;AAAA,YACZ,UAAY;AAAA,YACZ,cAAgB;AAAA,YAChB,cAAgB;AAAA,YAChB,WAAa;AAAA,YACb,WAAa;AAAA,YACb,SAAW;AAAA,YACX,kBAAoB,CAAC,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,GAAG;AAAA,UACjF;AAAA,UACA,YAAc;AAAA,YACZ,EAAE,MAAQ,eAAe,OAAS,GAAG;AAAA,YACrC,EAAE,MAAQ,gBAAgB,OAAS,GAAG;AAAA,YACtC,EAAE,MAAQ,WAAW,OAAS,GAAG;AAAA,YACjC,EAAE,MAAQ,SAAS,OAAS,GAAG;AAAA,UACjC;AAAA,UACA,QAAU;AAAA,YACR,EAAE,MAAQ,eAAe,QAAU,MAAM,QAAU,eAAe;AAAA,YAClE,EAAE,MAAQ,cAAc,QAAU,MAAM,QAAU,eAAe;AAAA,YACjE,EAAE,MAAQ,YAAY,QAAU,KAAK;AAAA,YACrC,EAAE,MAAQ,aAAa,QAAU,KAAK;AAAA,UACxC;AAAA,UACA,UAAY,CAAC;AAAA,UACb,MAAQ;AAAA,YACN,OAAS;AAAA,YACT,UAAY;AAAA,YACZ,QAAU;AAAA,YACV,QAAU;AAAA,YACV,MAAQ;AAAA,YACR,aAAe;AAAA,YACf,UAAY;AAAA,YACZ,UAAY;AAAA,UACd;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,IACA;AAAA,MACE,IAAM;AAAA,MACN,MAAQ;AAAA,MACR,UAAY;AAAA,MACZ,MAAQ;AAAA,QACN;AAAA,UACE,IAAM;AAAA,UACN,OAAS;AAAA,UACT,QAAU;AAAA,UACV,aAAe;AAAA,YACb,UAAY;AAAA,YACZ,UAAY;AAAA,YACZ,cAAgB;AAAA,YAChB,cAAgB;AAAA,YAChB,WAAa;AAAA,YACb,WAAa;AAAA,YACb,SAAW;AAAA,YACX,kBAAoB,CAAC,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,KAAK,GAAG;AAAA,UACvE;AAAA,UACA,YAAc;AAAA,YACZ,EAAE,MAAQ,kBAAkB,OAAS,IAAI;AAAA,YACzC,EAAE,MAAQ,gBAAgB,OAAS,IAAI;AAAA,UACzC;AAAA,UACA,QAAU;AAAA,YACR,EAAE,MAAQ,cAAc,QAAU,MAAM,QAAU,eAAe;AAAA,UACnE;AAAA,UACA,UAAY,CAAC;AAAA,UACb,MAAQ;AAAA,YACN,OAAS;AAAA,YACT,UAAY;AAAA,YACZ,QAAU;AAAA,YACV,QAAU;AAAA,YACV,MAAQ;AAAA,YACR,aAAe;AAAA,YACf,UAAY;AAAA,YACZ,UAAY;AAAA,UACd;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAAA,IACA;AAAA,MACE,IAAM;AAAA,MACN,MAAQ;AAAA,MACR,UAAY;AAAA,MACZ,MAAQ,CAAC;AAAA,IACX;AAAA,EACF;AAAA,EACA,YAAc;AAAA,IACZ,EAAE,IAAM,yBAAyB,MAAQ,yBAAyB,eAAiB,cAAc;AAAA,IACjG,EAAE,IAAM,8BAA8B,MAAQ,8BAA8B,eAAiB,wBAAwB;AAAA,IACrH,EAAE,IAAM,gBAAgB,MAAQ,sBAAsB,eAAiB,iCAAiC;AAAA,IACxG,EAAE,IAAM,qBAAqB,MAAQ,qBAAqB,eAAiB,eAAe;AAAA,EAC5F;AACF;;;ACxKO,SAAS,eAA0B;AACxC,SAAO;AACT;AAEA,SAAS,OAAO,OAAuB;AACrC,SAAO,MAAM,YAAY,EAAE,QAAQ,eAAe,GAAG,EAAE,QAAQ,YAAY,EAAE;AAC/E;AAEA,SAAS,UAAU,UAAgC;AACjD,QAAM,QAAQ,SAAS,mBAAmB,IAAI,IAAI,SAAS;AAC3D,QAAM,WAAW,KAAK,MAAO,SAAS,kBAAkB,QAAS,GAAG;AACpE,QAAM,WAAW,SAAS,kBAAkB;AAC5C,QAAM,aAAa,SAAS,aACxB,SAAS,cAAc,SAAS,aAAa,SAAS,YACtD,KAAK,IAAI,KAAK,SAAS,aAAa,SAAS;AAEjD,SAAO;AAAA,IACL,IAAI,SAAS;AAAA,IACb,OAAO,SAAS,MAAM,MAAM,GAAG,EAAE;AAAA,IACjC,QACE,SAAS,WAAW,cAChB,SACA,SAAS,WAAW,WAClB,WACA;AAAA,IACR,aAAa;AAAA,MACX;AAAA,MACA;AAAA,MACA,cAAc,KAAK,IAAI,GAAG,KAAK,MAAM,aAAa,KAAK,IAAI,GAAG,KAAK,CAAC,CAAC;AAAA,MACrE,cAAc,KAAK,IAAI,GAAG,KAAK,MAAM,aAAa,KAAK,IAAI,GAAG,KAAK,CAAC,CAAC;AAAA,MACrE,WAAW;AAAA,MACX,WAAW;AAAA,MACX,SAAS;AAAA,MACT,kBAAkB,CAAC,UAAU;AAAA,IAC/B;AAAA,IACA,YAAY;AAAA,MACV,EAAE,MAAM,UAAU,OAAO,KAAK,MAAO,SAAS,kBAAkB,QAAS,GAAG,EAAE;AAAA,MAC9E,EAAE,MAAM,UAAU,OAAO,KAAK,MAAO,SAAS,kBAAkB,QAAS,GAAG,EAAE;AAAA,IAChF;AAAA,IACA,QAAQ;AAAA,MACN;AAAA,QACE,MAAM;AAAA,QACN,QAAQ,SAAS,WAAW;AAAA,QAC5B,QAAQ,SAAS;AAAA,MACnB;AAAA,IACF;AAAA,IACA,UACE,SAAS,gBAAgB,SAAS,aAAa,SAAS,IACpD,CAAC,EAAE,OAAO,SAAS,aAAa,CAAC,IACjC,CAAC;AAAA,IACP,MAAM;AAAA,MACJ,OAAO;AAAA,MACP,UAAU;AAAA,MACV,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR,MAAM;AAAA,MACN,aAAa;AAAA,MACb,UAAU,GAAG,UAAU;AAAA,MACvB,UAAU,SAAS;AAAA,IACrB;AAAA,EACF;AACF;AAEA,SAAS,cACP,MACA,WACa;AACb,QAAM,OAAO,UACV,OAAO,CAAC,aAAa,SAAS,cAAc,KAAK,EAAE,EACnD,KAAK,CAAC,GAAG,MAAM,EAAE,WAAW,EAAE,QAAQ,EACtC,IAAI,SAAS;AAEhB,SAAO;AAAA,IACL,IAAI,KAAK;AAAA,IACT,MAAM,KAAK,QAAQ,QAAQ;AAAA,IAC3B,UAAU,mBAAmB,KAAK,QAAQ;AAAA,IAC1C;AAAA,EACF;AACF;AAEA,SAAS,kBAAkB,MAA2D;AACpF,SAAO;AAAA,IACL,IAAI,KAAK;AAAA,IACT,MAAM,KAAK,UAAU,QAAQ,KAAK,OAAO,KAAK,EAAE;AAAA,IAChD,eAAe,WAAW,KAAK,QAAQ;AAAA,EACzC;AACF;AAEA,eAAsB,eAAe,QAAuC;AAC1E,QAAM,CAAC,UAAU,UAAU,IAAI,MAAM,QAAQ,IAAI;AAAA,IAC/C,OAAO,gBAAgB;AAAA,IACvB,OAAO,kBAAkB;AAAA,EAC3B,CAAC;AACD,QAAM,YAAY,OAAO,mBAAmB;AAE5C,MAAI,SAAS,WAAW,KAAK,WAAW,WAAW,GAAG;AACpD,WAAO,aAAa;AAAA,EACtB;AAEA,SAAO;AAAA,IACL,UAAU,SAAS,IAAI,CAAC,YAAY,cAAc,SAAS,SAAS,CAAC;AAAA,IACrE,YAAY,WAAW,IAAI,iBAAiB;AAAA,EAC9C;AACF;AA+BO,SAAS,iBAAiB,MAA6B;AAC5D,QAAM,OAAoB,EAAE,aAAa,CAAC,EAAE;AAC5C,WAAS,QAAQ,GAAG,QAAQ,KAAK,QAAQ,SAAS,GAAG;AACnD,UAAM,QAAQ,KAAK,KAAK;AACxB,QAAI,UAAU,eAAe,KAAK,QAAQ,CAAC,GAAG;AAC5C,WAAK,YAAY,KAAK,QAAQ,CAAC;AAC/B,eAAS;AACT;AAAA,IACF;AACA,QAAI,UAAU,WAAW,KAAK,QAAQ,CAAC,GAAG;AACxC,WAAK,QAAQ,KAAK,QAAQ,CAAC;AAC3B,eAAS;AACT;AAAA,IACF;AACA,QAAI,UAAU,cAAc,KAAK,QAAQ,CAAC,GAAG;AAC3C,WAAK,SAAS,KAAK,QAAQ,CAAC;AAC5B,eAAS;AACT;AAAA,IACF;AACA,SAAK,YAAY,KAAK,KAAK;AAAA,EAC7B;AACA,SAAO;AACT;;;ACrJA,SAAS,QAAW,OAA6B;AAC/C,SAAO,OAAO,UAAU,aAAc,MAAkB,IAAI;AAC9D;AAEO,IAAM,WAAN,MAAM,UAA2B;AAAA,EAG9B,YAAY,QAAgC;AAClD,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,OAAO,SACL,QAC6B;AAC7B,WAAO,IAAI,UAA4B;AAAA,MACrC,MAAM,OAAO;AAAA,MACb,MAAM,OAAO;AAAA,MACb,aAAa,OAAO;AAAA,MACpB,OAAO,OAAO;AAAA,IAChB,CAAC;AAAA,EACH;AAAA,EAEA,UAAkB;AAChB,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,UAAoB;AAClB,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,iBAA+B;AAC7B,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,WAAmB;AACjB,WAAO,QAAQ,KAAK,QAAQ,KAAK;AAAA,EACnC;AACF;;;ACjBO,IAAM,YAAN,MAAM,WAKX;AAAA,EAGQ,YACN,QACA;AACA,SAAK,UAAU;AAAA,EACjB;AAAA,EAEQ,WAA2D;AACjE,WAAO;AAAA,MACL,MAAM,KAAK,QAAQ;AAAA,MACnB,aAAa,KAAK,QAAQ;AAAA,MAC1B,cAAc,KAAK,QAAQ;AAAA,MAC3B,aAAa,KAAK,QAAQ;AAAA,MAC1B,aAAa,KAAK,QAAQ;AAAA,MAC1B,YAAY,KAAK,QAAQ;AAAA,MACzB,eAAe,KAAK,QAAQ;AAAA,MAC5B,eAAe,KAAK,QAAQ;AAAA,IAC9B;AAAA,EACF;AAAA,EAEA,OAAO,IACL,YAC4C;AAC5C,WAAO,IAAI,WAA2C;AAAA,MACpD,aAAa,CAAC,UAAqC;AAAA,IACrD,CAAC;AAAA,EACH;AAAA,EAEA,IACE,YACiD;AACjD,UAAM,QAAQ,KAAK,SAAS;AAC5B,WAAO,IAAI,WAAgD;AAAA,MACzD,GAAI;AAAA,MAMJ,aAAa,CAAC,GAAG,MAAM,aAAa,UAAqC;AAAA,IAC3E,CAAC;AAAA,EACH;AAAA,EAEA,OAKE,QAC0E;AAC1E,UAAM,EAAE,YAAY,IAAI,KAAK,SAAS;AACtC,WAAO,IAAI,WAKT;AAAA,MACA,MAAM,OAAO;AAAA,MACb,aAAa,OAAO;AAAA,MACpB,cAAc,OAAO;AAAA,MACrB,aAAa,OAAO;AAAA,MACpB;AAAA,MACA,eAAe,OAAO;AAAA,MACtB,eAAe,OAAO;AAAA,IACxB,CAAC;AAAA,EACH;AAAA,EAEA,SACE,IAC0C;AAC1C,WAAO,IAAI,WAAyC;AAAA,MAClD,GAAG,KAAK,SAAS;AAAA,MACjB,YAAY;AAAA,IACd,CAAC;AAAA,EACH;AAAA,EAEA,UAA8B;AAC5B,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,iBAA2C;AACzC,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,kBAA4C;AAC1C,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,iBAA2C;AACzC,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,iBAAyD;AACvD,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,gBAA8D;AAC5D,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,mBAAuC;AACrC,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,mBAA8D;AAC5D,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,MAAM,iBAAgC;AACpC,UAAM,QAAQ,MAAM,QAAQ;AAAA,MAC1B,KAAK,QAAQ,YAAY,IAAI,CAAC,OAAO,GAAG,QAAQ,CAAC;AAAA,IACnD;AACA,WAAO,OAAO,OAAO,CAAC,GAAG,GAAG,KAAK;AAAA,EACnC;AACF;;;AC5IA,SAAS,WACP,OACA,UACS;AACT,SAAO,SAAS;AAAA,IAAK,CAAC,YACpB,OAAO,YAAY,WAAW,UAAU,UAAU,QAAQ,KAAK,KAAK;AAAA,EACtE;AACF;AAEA,SAAS,eACP,UACA,UACS;AACT,SAAO,SAAS,KAAK,CAAC,YAAY;AAChC,QAAI,OAAO,YAAY,UAAU;AAC/B,aAAO,gBAAgB,SAAS,QAAQ;AAAA,IAC1C;AACA,WAAO,QAAQ,KAAK,QAAQ;AAAA,EAC9B,CAAC;AACH;AAEA,SAAS,gBAAgB,SAAiB,OAAwB;AAChE,QAAM,UAAU,QACb,QAAQ,qBAAqB,MAAM,EACnC,QAAQ,OAAO,MAAM,EACrB,QAAQ,WAAW,UAAU,EAC7B,QAAQ,SAAS,IAAI,EACrB,QAAQ,OAAO,OAAO;AACzB,SAAO,IAAI,OAAO,IAAI,OAAO,GAAG,EAAE,KAAK,KAAK;AAC9C;AAEO,IAAM,UAAN,MAAM,SAAQ;AAAA,EAGX,YAAY,QAAuB;AACzC,SAAK,UAAU;AAAA,EACjB;AAAA,EAEA,OAAO,OAAO,QAAsC;AAClD,WAAO,IAAI,SAAQ;AAAA,MACjB,MAAM,OAAO;AAAA,MACb,cAAc,OAAO,gBAAgB,CAAC;AAAA,MACtC,cAAc,OAAO,gBAAgB,CAAC;AAAA,MACtC,eAAe,OAAO,iBAAiB,CAAC;AAAA,MACxC,eAAe,OAAO,iBAAiB,CAAC;AAAA,IAC1C,CAAC;AAAA,EACH;AAAA,EAEA,UAAkB;AAChB,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,kBAA6C;AAC3C,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,kBAA6C;AAC3C,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,mBAA+C;AAC7C,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,mBAA+C;AAC7C,WAAO,KAAK,QAAQ;AAAA,EACtB;AAAA,EAEA,gBACE,UACA,UACS;AACT,UAAM,OAAO,SAAS,QAAQ;AAE9B,QAAI,KAAK,QAAQ,aAAa,SAAS,GAAG;AACxC,UAAI,KAAK,KAAK,CAAC,QAAQ,WAAW,KAAK,KAAK,QAAQ,YAAY,CAAC,GAAG;AAClE,eAAO;AAAA,MACT;AAAA,IACF;AAEA,QAAI,KAAK,QAAQ,cAAc,SAAS,GAAG;AACzC,UAAI,eAAe,UAAU,KAAK,QAAQ,aAAa,GAAG;AACxD,eAAO;AAAA,MACT;AAAA,IACF;AAEA,UAAM,WACJ,KAAK,QAAQ,aAAa,WAAW,KACrC,KAAK,KAAK,CAAC,QAAQ,WAAW,KAAK,KAAK,QAAQ,YAAY,CAAC;AAE/D,UAAM,YACJ,KAAK,QAAQ,cAAc,WAAW,KACtC,eAAe,UAAU,KAAK,QAAQ,aAAa;AAErD,WAAO,YAAY;AAAA,EACrB;AACF;;;ACnHA,IAAM,WAAW,oBAAI,IAAgC;AAc9C,IAAM,SAAS;AAAA,EACpB,GAAU,QAIW;AACnB,UAAM,MAAwB;AAAA,MAC5B,IAAI,OAAO;AAAA,MACX,MAAM,OAAO;AAAA,MACb,QAAQ,OAAO;AAAA,MACf,MAAM,CAAC,UAAiB,EAAE,IAAI,OAAO,IAAI,KAAK;AAAA,IAChD;AACA,aAAS,IAAI,OAAO,IAAI,GAAyB;AACjD,WAAO;AAAA,EACT;AACF;AAEO,SAAS,cAAc,IAA4C;AACxE,SAAO,SAAS,IAAI,EAAE;AACxB;;;ACjCA,IAAMA,YAAW,oBAAI,IAA+B;AAqB7C,IAAM,QAAQ;AAAA,EACnB,GAAU,QAKU;AAClB,UAAM,MAAuB;AAAA,MAC3B,IAAI,OAAO;AAAA,MACX,MAAM,OAAO;AAAA,MACb,iBAAiB,OAAO;AAAA,MACxB,QAAQ,OAAO;AAAA,MACf,MAAM,CAAC,MAAa,YAA0D;AAC5E,cAAM,SACJ,SAAS,iBAAiB,SACtB,QAAQ,aAAa,IAAI,IACzB;AACN,eAAO;AAAA,UACL,IAAI,OAAO;AAAA,UACX;AAAA,UACA,GAAI,WAAW,UAAa,EAAE,OAAO;AAAA,QACvC;AAAA,MACF;AAAA,IACF;AACA,IAAAA,UAAS,IAAI,OAAO,IAAI,GAAwB;AAChD,WAAO;AAAA,EACT;AACF;AAEO,SAAS,aAAa,IAA2C;AACtE,SAAOA,UAAS,IAAI,EAAE;AACxB;;;AC3CO,IAAM,mBAAmB,OAAO,GAAmB;AAAA,EACxD,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,QAAQ,CAAC,SAAS;AAChB,UAAM,QAAQ,KAAK,SAAS;AAC5B,UAAM,SAAS,KAAK,UAAU;AAC9B,UAAM,cAAc,KAAK,eAAe;AACxC,UAAM,eAAe,KAAK,gBAAgB;AAC1C,UAAM,SAAS,cAAc;AAC7B,WAAO,MAAM,KAAK,QAAQ,MAAM,WAAW,MAAM;AAAA,EACnD;AACF,CAAC;AAMM,IAAM,gBAAgB,OAAO,GAAgB;AAAA,EAClD,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,QAAQ,CAAC,SAAS,GAAG,KAAK,EAAE;AAC9B,CAAC;;;ACxBM,IAAM,eAAe,MAAM,GAAqB;AAAA,EACrD,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,iBAAiB;AAAA,EACjB,QAAQ,CAAC,SAAS,KAAK,MAAM,QAAQ,CAAC;AACxC,CAAC;AAMM,IAAM,cAAc,MAAM,GAAoB;AAAA,EACnD,IAAI;AAAA,EACJ,MAAM;AAAA,EACN,iBAAiB;AAAA,EACjB,QAAQ,CAAC,SAAU,KAAK,SAAS,WAAW;AAC9C,CAAC;;;ACtBD,SAAS,kBAAkB;AAE3B,SAAS,UAAAC,SAAQ,OAAO,QAAQ,SAAAC,cAAa;;;ACWtC,IAAM,sBAAoC;AAAA,EAC/C,WAAW;AAAA,IACT,SAAS,QAAQ,IAAI;AAAA,IACrB,iBAAiB,CAAC,eAAe,gBAAgB,eAAe,cAAc;AAAA,IAC9E,mBAAmB;AAAA,MACjB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA,kBAAkB;AAAA,MAChB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,IACA,oBAAoB,CAAC,gBAAgB,QAAQ,SAAS,QAAQ,aAAa;AAAA,EAC7E;AAAA,EACA,mBAAmB;AACrB;AAEO,SAAS,iBAAiB,WAAiD;AAChF,MAAI,CAAC,WAAW;AACd,WAAO;AAAA,EACT;AACA,QAAM,YAAY,UAAU,YACxB;AAAA,IACE,GAAG,oBAAoB;AAAA,IACvB,GAAG,UAAU;AAAA,EACf,IACA,oBAAoB;AAExB,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAG;AAAA,IACH;AAAA,EACF;AACF;;;ACjDA,SAAS,eAAe;AACxB,SAAS,WAAAC,UAAS,gBAAgB;AAClC,SAAS,qBAAqB;AAiB9B,IAAI;AAEJ,SAAS,KAAK,QAAgB,UAAkB,MAAuB;AACrE,QAAM,SAAS,QAAQ,KAAK,KAAK,EAAE,SAAS,IAAI,OAAO;AACvD,SAAO,GAAG,MAAM,IAAI,MAAM,GACvB,YAAY,EACZ,QAAQ,eAAe,GAAG,EAC1B,QAAQ,YAAY,EAAE;AAC3B;AAEA,SAAS,UAAU,OAAgB,YAA6B;AAC9D,SACE,OAAO,UAAU,YACjB,UAAU,QACV,cAAc,SACd,OAAQ,MAAkC,UAAU,MAAM;AAE9D;AAEA,SAAS,cAAc,OAAkC;AACvD,SAAO,UAAU,OAAO,SAAS,KAAK,UAAU,OAAO,iBAAiB;AAC1E;AAEA,SAAS,gBACP,OACwD;AACxD,SACE,UAAU,OAAO,SAAS,KAC1B,UAAU,OAAO,gBAAgB,KACjC,UAAU,OAAO,eAAe;AAEpC;AAEA,SAAS,eAAe,OAA4C;AAClE,SACE,UAAU,OAAO,SAAS,KAC1B,UAAU,OAAO,SAAS,KAC1B,UAAU,OAAO,UAAU;AAE/B;AAEA,eAAe,cACb,SACA,oBACmB;AACnB,QAAM,MAAgB,CAAC;AAEvB,iBAAe,KAAK,YAAmC;AACrD,QAAI;AACJ,QAAI;AACF,gBAAU,MAAM,QAAQ,YAAY,EAAE,eAAe,KAAK,CAAC;AAAA,IAC7D,QAAQ;AACN;AAAA,IACF;AAEA,UAAM,QAAQ;AAAA,MACZ,QAAQ,IAAI,OAAO,UAAU;AAC3B,cAAM,WAAWA,SAAQ,YAAY,MAAM,IAAI;AAC/C,YAAI,MAAM,YAAY,GAAG;AACvB,cAAI,mBAAmB,SAAS,MAAM,IAAI,GAAG;AAC3C;AAAA,UACF;AACA,gBAAM,KAAK,QAAQ;AACnB;AAAA,QACF;AAEA,YAAI,MAAM,OAAO,GAAG;AAClB,cAAI,KAAK,QAAQ;AAAA,QACnB;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAEA,QAAM,KAAK,OAAO;AAClB,SAAO;AACT;AAEA,SAAS,aACP,UACA,UACS;AACT,SAAO,SAAS,KAAK,CAAC,WAAW,SAAS,SAAS,MAAM,CAAC;AAC5D;AAEA,eAAe,kBAAkB,UAAsC;AACrE,MAAI,SAAS,SAAS,KAAK,KAAK,SAAS,SAAS,MAAM,GAAG;AACzD,QAAI,CAAC,YAAY;AACf,YAAM,aAAc,MAAM,OAAO,MAAM;AAIvC,YAAM,aAAa,WAAW,cAAc,WAAW;AACvD,UAAI,CAAC,YAAY;AACf,cAAM,IAAI,MAAM,6CAA6C;AAAA,MAC/D;AACA,mBAAa,WAAW,YAAY,KAAK;AAAA,QACvC,gBAAgB;AAAA,QAChB,aAAa;AAAA,MACf,CAAC;AAAA,IACH;AACA,UAAMC,UAAS,WAAW,SACtB,MAAM,WAAW,OAAO,QAAQ,IAChC,MAAM,QAAQ,QAAQ,WAAW,QAAQ,CAAC;AAC9C,WAAO,OAAO,OAAOA,OAAiC;AAAA,EACxD;AAEA,QAAM,YAAY,cAAc,QAAQ,EAAE;AAC1C,QAAM,SAAU,MAAM,OAAO;AAC7B,SAAO,OAAO,OAAO,MAAM;AAC7B;AAEA,eAAsB,yBACpB,QAC0C;AAC1C,QAAM,QAAQ,MAAM,cAAc,OAAO,SAAS,OAAO,kBAAkB;AAC3E,QAAM,UAAU,MAAM;AAAA,IAAO,CAAC,aAC5B,aAAa,UAAU,OAAO,eAAe;AAAA,EAC/C;AAEA,QAAM,QAAQ,MAAM,QAAQ;AAAA,IAC1B,QAAQ,IAAI,OAAO,iBAAiB;AAClC,YAAM,UAAU,MAAM,kBAAkB,YAAY;AACpD,YAAM,WAAW,QAAQ,OAAO,aAAa;AAC7C,YAAM,UAAU,SAAS,OAAO,SAAS,YAAY;AACrD,aAAO,SAAS,IAAI,CAAC,aAAa;AAAA,QAChC,IAAI,KAAK,WAAW,SAAS,QAAQ,QAAQ,CAAC;AAAA,QAC9C,UAAU;AAAA,QACV;AAAA,MACF,EAAE;AAAA,IACJ,CAAC;AAAA,EACH;AAEA,SAAO,MAAM,KAAK;AACpB;AAEA,eAAsB,2BACpB,QAC4C;AAC5C,QAAM,QAAQ,MAAM,cAAc,OAAO,SAAS,OAAO,kBAAkB;AAC3E,QAAM,UAAU,MAAM;AAAA,IAAO,CAAC,aAC5B,aAAa,UAAU,OAAO,iBAAiB;AAAA,EACjD;AAEA,QAAM,QAAQ,MAAM,QAAQ;AAAA,IAC1B,QAAQ,IAAI,OAAO,iBAAiB;AAClC,YAAM,UAAU,MAAM,kBAAkB,YAAY;AACpD,YAAM,aAAa,QAAQ,OAAO,eAAe;AACjD,YAAM,UAAU,SAAS,OAAO,SAAS,YAAY;AACrD,aAAO,WAAW,IAAI,CAAC,eAAe;AAAA,QACpC,IAAI,KAAK,aAAa,SAAS,UAAU,QAAQ,CAAC;AAAA,QAClD,UAAU;AAAA,QACV;AAAA,MACF,EAAE;AAAA,IACJ,CAAC;AAAA,EACH;AAEA,SAAO,MAAM,KAAK;AACpB;AAEA,eAAsB,0BACpB,QAC2C;AAC3C,QAAM,QAAQ,MAAM,cAAc,OAAO,SAAS,OAAO,kBAAkB;AAC3E,QAAM,UAAU,MAAM;AAAA,IAAO,CAAC,aAC5B,aAAa,UAAU,OAAO,gBAAgB;AAAA,EAChD;AAEA,QAAM,QAAQ,MAAM,QAAQ;AAAA,IAC1B,QAAQ,IAAI,OAAO,iBAAiB;AAClC,YAAM,UAAU,MAAM,kBAAkB,YAAY;AACpD,YAAM,YAAY,QAAQ,OAAO,cAAc;AAC/C,YAAM,UAAU,SAAS,OAAO,SAAS,YAAY;AACrD,aAAO,UAAU,IAAI,CAAC,cAAc;AAAA,QAClC,IAAI,KAAK,aAAa,SAAS,SAAS,QAAQ,CAAC;AAAA,QACjD,UAAU;AAAA,QACV;AAAA,MACF,EAAE;AAAA,IACJ,CAAC;AAAA,EACH;AAEA,SAAO,MAAM,KAAK;AACpB;;;ACzMA,SAAS,YAAY;AAErB,SAAS,QAAQ,aAAa;;;ACCvB,SAAS,yBACd,QACoB;AACpB,aAAW,QAAQ,QAAQ;AACzB,UAAM,MAAM,aAAa,KAAK,EAAE;AAChC,QAAI,OAAO,IAAI,oBAAoB,SAAS,OAAO,KAAK,SAAS,YAAY,KAAK,SAAS,QAAQ,WAAW,KAAK,MAAM;AACvH,YAAM,QAAS,KAAK,KAA4B;AAChD,UAAI,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK,GAAG;AACvD,eAAO;AAAA,MACT;AAAA,IACF;AACA,UAAM,UAAU,eAAe,KAAK,IAAI;AACxC,QAAI,YAAY,QAAW;AACzB,aAAO;AAAA,IACT;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,eAAe,OAAoC;AACjE,MAAI,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK,GAAG;AACvD,WAAO;AAAA,EACT;AACA,MAAI,OAAO,UAAU,YAAY,UAAU,MAAM;AAC/C,WAAO;AAAA,EACT;AACA,QAAM,MAAM;AACZ,MACE,WAAW,OACX,OAAO,IAAI,UAAU,YACrB,OAAO,SAAS,IAAI,KAAK,GACzB;AACA,WAAO,IAAI;AAAA,EACb;AACA,QAAM,eAAe,OAAO,OAAO,KAAK,EAAE;AAAA,IACxC,CAAC,UACC,OAAO,UAAU,YAAY,OAAO,SAAS,KAAK;AAAA,EACtD;AACA,MAAI,aAAa,WAAW,GAAG;AAC7B,WAAO;AAAA,EACT;AACA,SACE,aAAa,OAAO,CAAC,KAAK,UAAU,MAAM,OAAO,CAAC,IAAI,aAAa;AAEvE;;;ADnCA,SAAS,uBACP,WACA,QACA,QACS;AACT,QAAM,mBAAmB,OAAO,OAAO,CAAC,MAAM,YAAY,KAAK,EAAE,WAAW,MAAS;AACrF,MAAI,iBAAiB,SAAS,GAAG;AAC/B,WAAO,iBAAiB,MAAM,CAAC,MAAM,EAAE,WAAW,IAAI;AAAA,EACxD;AACA,QAAM,gBAAgB,UAAU,iBAAiB;AACjD,MAAI,eAAe;AACjB,WAAO,cAAc,MAAM;AAAA,EAC7B;AACA,QAAM,gBAAgB,UAAU,iBAAiB;AACjD,MAAI,kBAAkB,QAAW;AAC/B,UAAM,UAAU,yBAAyB,MAAM;AAC/C,WAAO,YAAY,UAAa,WAAW;AAAA,EAC7C;AACA,SAAO;AACT;AAEA,SAAS,gBACP,QAIA;AACA,MAAI,OAAO,WAAW,YAAY,WAAW,MAAM;AACjD,WAAO,EAAE,QAAQ,CAAC,EAAE;AAAA,EACtB;AACA,QAAM,MAAM;AACZ,QAAM,SAAS,MAAM,QAAQ,IAAI,MAAM,IAClC,IAAI,SACL,CAAC;AACL,QAAM,UAAU,MAAM,QAAQ,IAAI,OAAO,IACpC,IAAI,UACL;AACJ,SAAO,EAAE,QAAQ,QAAQ;AAC3B;AAcA,SAAS,gBAAwB;AAC/B,UAAO,oBAAI,KAAK,GAAE,YAAY,EAAE,QAAQ,SAAS,GAAG;AACtD;AAEO,SAAS,mBACd,mBACA,WACA,OACQ;AACR,SAAO;AAAA,IACL;AAAA,IACA,GAAG,SAAS,IAAI,KAAK,IAAI,cAAc,CAAC;AAAA,EAC1C;AACF;AAEO,IAAM,iBAAiB,CAC5B,MACA,cACA,kBACA,mBAKA,OAAO,IAAI,aAAa;AACtB,QAAM,YAAY,KAAK,IAAI;AAC3B,iBAAe,KAAK,OAAO,CAAC,cAAc;AAAA,IACxC,GAAG;AAAA,IACH,QAAQ;AAAA,IACR;AAAA,EACF,EAAE;AACF,SAAO,aAAa;AAAA,IAClB,MAAM;AAAA,IACN,OAAO,KAAK;AAAA,IACZ;AAAA,EACF,CAAC;AAED,MAAI,qBAAqB;AACzB,MAAI,kBAAkB;AACtB,MAAI,kBAAkB;AAEtB,aAAW,gBAAgB,KAAK,WAAW;AACzC,UAAM,UAAU,KAAK,IAAI;AACzB,UAAM,kBAKD,CAAC;AACN,QAAI;AAEJ,eAAW,EAAE,IAAI,aAAa,UAAU,KAAK,KAAK,YAAY;AAC5D,YAAM,aAAa,UAAU,cAAc;AAC3C,UAAI,CAAC,YAAY;AACf;AAAA,MACF;AAEA,UAAI;AACF,cAAM,MAAM,OAAO,OAAO;AAAA,UAAQ,MAChC,QAAQ,QAAQ,UAAU,eAAe,CAAC;AAAA,QAC5C;AACA,cAAM,SAAS,OAAO,OAAO;AAAA,UAAQ,MACnC,QAAQ,QAAQ,WAAW,aAAa,SAAS,SAAS,GAAG,GAAG,CAAC;AAAA,QACnE;AACA,cAAM,EAAE,QAAQ,QAAQ,IAAI,gBAAgB,MAAM;AAClD,cAAM,SAAS,uBAAuB,WAAW,QAAQ,MAAM;AAC/D,wBAAgB,KAAK,EAAE,aAAa,QAAQ,QAAQ,QAAQ,CAAC;AAAA,MAC/D,SAAS,OAAO;AACd,wBACE,iBAAiB,QACb,MAAM,UACN;AACN,wBAAgB,KAAK;AAAA,UACnB;AAAA,UACA,QAAQ,CAAC;AAAA,UACT,QAAQ;AAAA,QACV,CAAC;AAAA,MACH;AAAA,IACF;AAEA,UAAM,iBAAiB,gBAAgB,MAAM,CAAC,MAAM,EAAE,MAAM;AAC5D,0BAAsB;AACtB,QAAI,gBAAgB;AAClB,yBAAmB;AAAA,IACrB,OAAO;AACL,yBAAmB;AAAA,IACrB;AAEA,UAAM,gBAA6B;AAAA,MACjC,MAAM;AAAA,MACN,OAAO,KAAK;AAAA,MACZ,YAAY,aAAa;AAAA,MACzB,cAAc,aAAa,SAAS,QAAQ;AAAA,MAC5C;AAAA,MACA,gBAAgB,KAAK,UAAU;AAAA,MAC/B,QAAQ;AAAA,MACR,YAAY,KAAK,IAAI,IAAI;AAAA,MACzB;AAAA,MACA,cAAc;AAAA,IAChB;AAEA,mBAAe,KAAK,OAAO,CAAC,cAAc;AAAA,MACxC,GAAG;AAAA,MACH;AAAA,MACA;AAAA,MACA;AAAA,IACF,EAAE;AAEF,WAAO,aAAa,aAAa;AACjC,WAAO,MAAM,MAAM,kBAAkB;AAAA,MACnC,OAAO,KAAK;AAAA,MACZ,cAAc,KAAK,SAAS;AAAA,MAC5B,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AAEA,QAAM,aAAa,KAAK,IAAI;AAC5B,QAAM,iBAA8B;AAAA,IAClC,MAAM;AAAA,IACN,OAAO,KAAK;AAAA,IACZ;AAAA,IACA;AAAA,IACA;AAAA,IACA,gBAAgB,KAAK,UAAU;AAAA,IAC/B,cAAc,KAAK,SAAS;AAAA,EAC9B;AAEA,iBAAe,KAAK,OAAO,CAAC,cAAc;AAAA,IACxC,GAAG;AAAA,IACH,QAAQ;AAAA,IACR;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,EACF,EAAE;AAEF,SAAO,aAAa,cAAc;AAClC,SAAO,MAAM,MAAM,kBAAkB;AAAA,IACnC,OAAO,KAAK;AAAA,IACZ,cAAc,KAAK,SAAS;AAAA,IAC5B,SAAS;AAAA,EACX,CAAC;AACD,SAAO,aAAa;AAAA,IAClB,MAAM;AAAA,IACN,OAAO,KAAK;AAAA,IACZ,cAAc,KAAK,SAAS;AAAA,EAC9B,CAAC;AACH,CAAC;;;AEnNH,SAAS,YAAY,aAAa;AAClC,SAAS,eAAe;AAExB,SAAS,UAAAH,SAAQ,SAAAC,cAAa;AAQ9B,eAAe,eACb,cACA,SACe;AACf,QAAM,MAAM,QAAQ,YAAY,GAAG,EAAE,WAAW,KAAK,CAAC;AACtD,QAAM,WAAW,cAAc,GAAG,KAAK,UAAU,OAAO,CAAC;AAAA,GAAM,MAAM;AACvE;AAEO,IAAM,0BAA0B,CACrC,UAEAD,QAAO;AAAA,EACLA,QAAO,IAAI,aAAa;AACtB,UAAM,UAAU,OAAOC,OAAM,KAAK,KAAK;AACvC,WAAOD,QAAO;AAAA,MAAQ,MACpB,eAAe,QAAQ,cAAc;AAAA,QACnC,OAAO,QAAQ;AAAA,QACf,IAAI,KAAK,IAAI;AAAA,QACb,GAAG,QAAQ;AAAA,MACb,CAAC;AAAA,IACH;AAAA,EACF,CAAC;AACH;;;AC/BF,SAASI,YACP,OACA,UACS;AACT,MAAI,CAAC,YAAY,SAAS,WAAW,GAAG;AACtC,WAAO;AAAA,EACT;AACA,SAAO,SAAS;AAAA,IAAK,CAAC,YACpB,OAAO,YAAY,WAAW,YAAY,QAAQ,QAAQ,KAAK,KAAK;AAAA,EACtE;AACF;AAEA,SAAS,YACP,OACA,UACS;AACT,MAAI,CAAC,YAAY,SAAS,WAAW,GAAG;AACtC,WAAO;AAAA,EACT;AACA,SAAO,SAAS,KAAK,CAAC,YAAY;AAChC,QAAI,OAAO,YAAY,UAAU;AAC/B,aAAO,MAAM,SAAS,OAAO;AAAA,IAC/B;AACA,WAAO,QAAQ,KAAK,KAAK;AAAA,EAC3B,CAAC;AACH;AAEO,SAAS,yBACd,KACA,OACkC;AAClC,MAAI,CAAC,OAAO;AACV,WAAO;AAAA,EACT;AAEA,SAAO,IAAI,OAAO,CAAC,SAAS;AAC1B,UAAM,OAAO,KAAK,SAAS,QAAQ;AAEnC,QACE,MAAM,gBACN,KAAK,KAAK,CAAC,QAAQA,YAAW,KAAK,MAAM,YAAY,CAAC,GACtD;AACA,aAAO;AAAA,IACT;AACA,QACE,MAAM,iBACN,YAAY,KAAK,UAAU,MAAM,aAAa,GAC9C;AACA,aAAO;AAAA,IACT;AAEA,UAAM,oBACJ,CAAC,MAAM,gBACP,MAAM,aAAa,WAAW,KAC9B,KAAK,KAAK,CAAC,QAAQA,YAAW,KAAK,MAAM,YAAY,CAAC;AAExD,UAAM,qBACJ,CAAC,MAAM,iBACP,MAAM,cAAc,WAAW,KAC/B,YAAY,KAAK,UAAU,MAAM,aAAa;AAEhD,WAAO,qBAAqB;AAAA,EAC9B,CAAC;AACH;;;ANrCA,SAAS,kBACP,SAC+C;AAC/C,MAAI,CAAC,QAAQ,WAAW,GAAG,GAAG;AAC5B,WAAO;AAAA,EACT;AACA,QAAM,YAAY,QAAQ,YAAY,GAAG;AACzC,MAAI,aAAa,GAAG;AAClB,WAAO;AAAA,EACT;AACA,SAAO;AAAA,IACL,QAAQ,QAAQ,MAAM,GAAG,SAAS;AAAA,IAClC,OAAO,QAAQ,MAAM,YAAY,CAAC;AAAA,EACpC;AACF;AAEA,SAAS,kBAAkB,SAA6C;AACtE,QAAM,oBAAoB,QAAQ,KAAK;AACvC,QAAM,eAAe,kBAAkB,iBAAiB;AACxD,MAAI,cAAc;AAChB,UAAM,QAAQ,IAAI,OAAO,aAAa,QAAQ,aAAa,KAAK;AAChE,WAAO,CAAC,UAAkB,MAAM,KAAK,KAAK;AAAA,EAC5C;AAEA,MAAI,kBAAkB,SAAS,GAAG,GAAG;AACnC,UAAM,UAAU,kBACb,QAAQ,qBAAqB,MAAM,EACnC,QAAQ,OAAO,IAAI;AACtB,UAAM,QAAQ,IAAI,OAAO,IAAI,OAAO,KAAK,GAAG;AAC5C,WAAO,CAAC,UAAkB,MAAM,KAAK,KAAK;AAAA,EAC5C;AAEA,SAAO,CAAC,UAAkB,MAAM,YAAY,MAAM,kBAAkB,YAAY;AAClF;AAuBO,SAAS,aAAa,WAA8C;AACzE,SAAO,IAAI,aAAa,iBAAiB,SAAS,CAAC;AACrD;AAEA,IAAM,eAAN,MAAwC;AAAA,EAiCtC,YAAY,QAAsB;AA9BlC,SAAiB,WAAWJ,QAAO,QAAQ,OAAO,UAAuB,CAAC;AAE1E,SAAiB,WAAWA,QAAO,QAAQC,OAAM,UAAmB,CAAC;AAErE,SAAiB,mBAAmBD,QAAO;AAAA,MACzCC,OAAM,UAIH;AAAA,IACL;AAEA,SAAiB,YAAY,oBAAI,IAAyB;AAC1D,SAAiB,YAAY,oBAAI,IAG9B;AAEH,SAAiB,eAAe,oBAAI,IAA8B;AAElE,SAAiB,iBAAiB,oBAAI,IAAgC;AAEtE,SAAiB,iBAAiBD,QAAO;AAAA,MACvC,KAAK,sBAAsB;AAAA,IAC7B;AAEA,SAAiB,mBAAmBA,QAAO;AAAA,MACzC,wBAAwB,KAAK,gBAAgB;AAAA,IAC/C;AAGE,SAAK,SAAS;AAAA,EAChB;AAAA,EAEA,MAAM,kBAA4D;AAChE,UAAM,WAAW,MAAM,yBAAyB,KAAK,OAAO,SAAS;AACrE,SAAK,aAAa,MAAM;AACxB,eAAW,WAAW,UAAU;AAC9B,WAAK,aAAa,IAAI,QAAQ,IAAI,OAAO;AAAA,IAC3C;AACA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,oBAAgE;AACpE,UAAM,aAAa,MAAM,2BAA2B,KAAK,OAAO,SAAS;AACzE,SAAK,eAAe,MAAM;AAC1B,eAAW,aAAa,YAAY;AAClC,WAAK,eAAe,IAAI,UAAU,IAAI,SAAS;AAAA,IACjD;AACA,WAAO;AAAA,EACT;AAAA,EAEA,MAAM,qBAAqB,MAAqD;AAC9E,QAAI,KAAK,aAAa,SAAS,GAAG;AAChC,YAAM,KAAK,gBAAgB;AAAA,IAC7B;AACA,UAAM,aAAa,KAAK,KAAK,EAAE,YAAY;AAC3C,WAAO,MAAM,KAAK,KAAK,aAAa,OAAO,CAAC,EAAE;AAAA,MAC5C,CAAC,SAAS,KAAK,QAAQ,QAAQ,EAAE,YAAY,MAAM;AAAA,IACrD;AAAA,EACF;AAAA,EAEA,MAAM,+BACJ,SAC4C;AAC5C,QAAI,KAAK,eAAe,SAAS,GAAG;AAClC,YAAM,KAAK,kBAAkB;AAAA,IAC/B;AACA,UAAM,UAAU,kBAAkB,OAAO;AACzC,WAAO,MAAM,KAAK,KAAK,eAAe,OAAO,CAAC,EAAE;AAAA,MAAO,CAAC,SACtD,QAAQ,KAAK,UAAU,QAAQ,KAAK,EAAE;AAAA,IACxC;AAAA,EACF;AAAA,EAEA,MAAM,gBACJ,OAC2C;AAC3C,UAAM,YAAY,MAAM,0BAA0B,KAAK,OAAO,SAAS;AACvE,WAAO,yBAAyB,WAAW,KAAK;AAAA,EAClD;AAAA,EAEA,MAAM,wBACJ,WAC2C;AAC3C,QAAI,KAAK,aAAa,SAAS,GAAG;AAChC,YAAM,KAAK,gBAAgB;AAAA,IAC7B;AACA,UAAM,UAAU,KAAK,aAAa,IAAI,SAAS;AAC/C,QAAI,CAAC,SAAS;AACZ,YAAM,IAAI,MAAM,oBAAoB,SAAS,EAAE;AAAA,IACjD;AACA,UAAM,eAAe,MAAM,0BAA0B,KAAK,OAAO,SAAS;AAC1E,WAAO,aAAa;AAAA,MAAO,CAAC,aAC1B,QAAQ,QAAQ,gBAAgB,SAAS,UAAU,SAAS,QAAQ;AAAA,IACtE;AAAA,EACF;AAAA,EAEA,MAAM,eAAe,SAAkD;AACrE,QAAI,KAAK,aAAa,SAAS,GAAG;AAChC,YAAM,KAAK,gBAAgB;AAAA,IAC7B;AACA,QAAI,KAAK,eAAe,SAAS,GAAG;AAClC,YAAM,KAAK,kBAAkB;AAAA,IAC/B;AAEA,UAAM,UAAU,KAAK,aAAa,IAAI,QAAQ,SAAS;AACvD,QAAI,CAAC,SAAS;AACZ,YAAM,IAAI,MAAM,oBAAoB,QAAQ,SAAS,EAAE;AAAA,IACzD;AAEA,UAAM,qBAAqB,QAAQ,aAChC,IAAI,CAAC,OAAO,KAAK,eAAe,IAAI,EAAE,CAAC,EACvC,OAAO,CAAC,UAAuC,QAAQ,KAAK,CAAC,EAC7D,IAAI,CAAC,WAAW,EAAE,IAAI,MAAM,IAAI,WAAW,MAAM,UAAU,EAAE;AAEhE,QAAI,mBAAmB,WAAW,GAAG;AACnC,YAAM,IAAI,MAAM,gCAAgC;AAAA,IAClD;AAEA,UAAM,oBAAoB,MAAM,KAAK,wBAAwB,QAAQ,SAAS;AAE9E,UAAM,QAAQ,OAAO,WAAW,CAAC;AACjC,UAAM,eAAe;AAAA,MACnB,KAAK,OAAO;AAAA,MACZ,QAAQ;AAAA,MACR;AAAA,IACF;AACA,UAAM,WAAwB;AAAA,MAC5B;AAAA,MACA,WAAW,QAAQ;AAAA,MACnB,aAAa,QAAQ,QAAQ,QAAQ;AAAA,MACrC,cAAc,mBAAmB,IAAI,CAAC,SAAS,KAAK,EAAE;AAAA,MACtD,UAAU,KAAK,IAAI;AAAA,MACnB,gBAAgB,kBAAkB;AAAA,MAClC,oBAAoB;AAAA,MACpB,iBAAiB;AAAA,MACjB,iBAAiB;AAAA,MACjB,QAAQ;AAAA,MACR;AAAA,IACF;AAEA,SAAK,UAAU,IAAI,OAAO,QAAQ;AAClC,UAAM,cAA2B;AAAA,MAC/B,MAAM;AAAA,MACN;AAAA,MACA,WAAW,QAAQ;AAAA,MACnB,aAAa,QAAQ,QAAQ,QAAQ;AAAA,MACrC,cAAc,mBAAmB,IAAI,CAAC,SAAS,KAAK,EAAE;AAAA,MACtD,gBAAgB,kBAAkB;AAAA,MAClC;AAAA,IACF;AACA,UAAMA,QAAO,WAAW,KAAK,aAAa,WAAW,CAAC;AACtD,UAAMA,QAAO;AAAA,MACXC,OAAM,MAAM,KAAK,kBAAkB;AAAA,QACjC;AAAA,QACA;AAAA,QACA,SAAS;AAAA,MACX,CAAC;AAAA,IACH;AAEA,UAAMD,QAAO;AAAA,MACXC,OAAM,MAAM,KAAK,UAAU;AAAA,QACzB;AAAA,QACA,WAAW,QAAQ;AAAA,QACnB,SAAS,QAAQ;AAAA,QACjB,YAAY;AAAA,QACZ,WAAW;AAAA,QACX;AAAA,MACF,CAAC;AAAA,IACH;AAEA,WAAO;AAAA,EACT;AAAA,EAEA,mBACE,UACA,SACY;AACZ,UAAM,QAAQ,EAAE,OAAO,SAAS,OAAO,SAAS;AAChD,SAAK,UAAU,IAAI,KAAK;AACxB,WAAO,MAAM;AACX,WAAK,UAAU,OAAO,KAAK;AAAA,IAC7B;AAAA,EACF;AAAA,EAEA,eAAe,OAAwC;AACrD,WAAO,KAAK,UAAU,IAAI,KAAK;AAAA,EACjC;AAAA,EAEA,qBAAiD;AAC/C,WAAO,MAAM,KAAK,KAAK,UAAU,OAAO,CAAC,EAAE;AAAA,MACzC,CAAC,GAAG,MAAM,EAAE,WAAW,EAAE;AAAA,IAC3B;AAAA,EACF;AAAA,EAEA,MAAM,WAA0B;AAC9B,UAAMD,QAAO,WAAW,MAAM,UAAU,KAAK,cAAc,CAAC;AAC5D,UAAMA,QAAO,WAAW,MAAM,UAAU,KAAK,gBAAgB,CAAC;AAC9D,UAAMA,QAAO,WAAWC,OAAM,SAAS,KAAK,QAAQ,CAAC;AACrD,UAAMD,QAAO,WAAWC,OAAM,SAAS,KAAK,gBAAgB,CAAC;AAC7D,UAAMD,QAAO,WAAW,OAAO,SAAS,KAAK,QAAQ,CAAC;AAAA,EACxD;AAAA,EAEQ,wBAAwB;AAC9B,UAAM,OAAO;AACb,WAAOA,QAAO;AAAA,MACZA,QAAO,IAAI,aAAa;AACtB,cAAM,OAAO,OAAOC,OAAM,KAAK,KAAK,QAAQ;AAC5C,eAAOD,QAAO;AAAA,UACZ;AAAA,YACE;AAAA,YACA,KAAK,aAAa,KAAK,IAAI;AAAA,YAC3B,KAAK;AAAA,YACL,KAAK,eAAe,KAAK,IAAI;AAAA,UAC/B;AAAA,QACF;AAAA,MACF,CAAC;AAAA,IACH;AAAA,EACF;AAAA,EAEQ,eACN,OACA,SACM;AACN,UAAM,WAAW,KAAK,UAAU,IAAI,KAAK;AACzC,QAAI,CAAC,UAAU;AACb;AAAA,IACF;AACA,SAAK,UAAU,IAAI,OAAO,QAAQ,QAAQ,CAAC;AAAA,EAC7C;AAAA,EAEQ,aAAa,OAAuD;AAC1E,WAAOA,QAAO,KAAK,MAAM;AACvB,iBAAW,SAAS,KAAK,WAAW;AAClC,YAAI,MAAM,SAAS,MAAM,UAAU,MAAM,OAAO;AAC9C;AAAA,QACF;AACA,cAAM,SAAS,KAAK;AAAA,MACtB;AAAA,IACF,CAAC,EAAE;AAAA,MACDA,QAAO,QAAQ,MAAM,OAAO,QAAQ,KAAK,UAAU,KAAK,CAAC;AAAA,MACzDA,QAAO;AAAA,IACT;AAAA,EACF;AACF","sourcesContent":["{\n \"datasets\": [\n {\n \"id\": \"onboarding-flows\",\n \"name\": \"onboarding-flows\",\n \"overview\": \"Evaluate first-user journeys and schema compliance for generated onboarding payloads.\",\n \"runs\": [\n {\n \"id\": \"run_2026-02-17_2044\",\n \"label\": \"2026-02-17 20:44\",\n \"status\": \"FAILED\",\n \"performance\": {\n \"passRate\": 96,\n \"avgScore\": 0.91,\n \"latencyP95Ms\": 710,\n \"latencyAvgMs\": 502,\n \"tokensAvg\": 171,\n \"tokensP95\": 230,\n \"costUsd\": 0.0024,\n \"latencyHistoryMs\": [380, 420, 510, 480, 550, 620, 590, 710, 520, 480, 530, 600]\n },\n \"dimensions\": [\n { \"name\": \"correctness\", \"score\": 82 },\n { \"name\": \"faithfulness\", \"score\": 79 },\n { \"name\": \"brevity\", \"score\": 68 },\n { \"name\": \"style\", \"score\": 90 }\n ],\n \"checks\": [\n { \"name\": \"json_schema\", \"passed\": false, \"detail\": \"3 violations\" },\n { \"name\": \"tool_calls\", \"passed\": true, \"detail\": \"0 unexpected\" },\n { \"name\": \"pii_leak\", \"passed\": true },\n { \"name\": \"jailbreak\", \"passed\": true }\n ],\n \"failures\": [\n { \"title\": \"product_parser › conforms to schema (price: string)\" },\n { \"title\": \"checkout › tool-call count mismatch\" }\n ],\n \"meta\": {\n \"model\": \"gpt-4o-mini\",\n \"provider\": \"OpenAI\",\n \"commit\": \"2f3c1a9\",\n \"branch\": \"main\",\n \"seed\": 42,\n \"concurrency\": 4,\n \"duration\": \"00:01:12\",\n \"artifact\": \"./eval-results/run_2026-02-17.jsonl\"\n }\n },\n {\n \"id\": \"run_2026-02-16_1112\",\n \"label\": \"2026-02-16 11:12\",\n \"status\": \"PASS\",\n \"performance\": {\n \"passRate\": 99,\n \"avgScore\": 0.95,\n \"latencyP95Ms\": 650,\n \"latencyAvgMs\": 488,\n \"tokensAvg\": 168,\n \"tokensP95\": 220,\n \"costUsd\": 0.002,\n \"latencyHistoryMs\": [420, 450, 480, 460, 520, 490, 510, 650, 440, 470, 500, 480]\n },\n \"dimensions\": [\n { \"name\": \"correctness\", \"score\": 89 },\n { \"name\": \"faithfulness\", \"score\": 88 },\n { \"name\": \"brevity\", \"score\": 72 },\n { \"name\": \"style\", \"score\": 93 }\n ],\n \"checks\": [\n { \"name\": \"json_schema\", \"passed\": true, \"detail\": \"0 violations\" },\n { \"name\": \"tool_calls\", \"passed\": true, \"detail\": \"0 unexpected\" },\n { \"name\": \"pii_leak\", \"passed\": true },\n { \"name\": \"jailbreak\", \"passed\": true }\n ],\n \"failures\": [],\n \"meta\": {\n \"model\": \"gpt-4o-mini\",\n \"provider\": \"OpenAI\",\n \"commit\": \"0d24f8f\",\n \"branch\": \"main\",\n \"seed\": 42,\n \"concurrency\": 4,\n \"duration\": \"00:01:06\",\n \"artifact\": \"./eval-results/run_2026-02-16.jsonl\"\n }\n },\n {\n \"id\": \"run_2026-02-15_0921\",\n \"label\": \"2026-02-15 09:21\",\n \"status\": \"PASS\",\n \"performance\": {\n \"passRate\": 98,\n \"avgScore\": 0.93,\n \"latencyP95Ms\": 680,\n \"latencyAvgMs\": 495,\n \"tokensAvg\": 175,\n \"tokensP95\": 235,\n \"costUsd\": 0.0022,\n \"latencyHistoryMs\": [450, 480, 520, 490, 550, 580, 620, 680, 510, 470, 530, 560]\n },\n \"dimensions\": [\n { \"name\": \"correctness\", \"score\": 86 },\n { \"name\": \"faithfulness\", \"score\": 84 },\n { \"name\": \"brevity\", \"score\": 70 },\n { \"name\": \"style\", \"score\": 91 }\n ],\n \"checks\": [\n { \"name\": \"json_schema\", \"passed\": true, \"detail\": \"0 violations\" },\n { \"name\": \"tool_calls\", \"passed\": true, \"detail\": \"0 unexpected\" },\n { \"name\": \"pii_leak\", \"passed\": true },\n { \"name\": \"jailbreak\", \"passed\": true }\n ],\n \"failures\": [],\n \"meta\": {\n \"model\": \"gpt-4o-mini\",\n \"provider\": \"OpenAI\",\n \"commit\": \"a1b2c3d\",\n \"branch\": \"main\",\n \"seed\": 42,\n \"concurrency\": 4,\n \"duration\": \"00:01:08\",\n \"artifact\": \"./eval-results/run_2026-02-15.jsonl\"\n }\n }\n ]\n },\n {\n \"id\": \"tool-calls\",\n \"name\": \"tool-calls\",\n \"overview\": \"Validate function-call conformance and unexpected tool invocation behavior.\",\n \"runs\": [\n {\n \"id\": \"run_2026-02-14_1530\",\n \"label\": \"2026-02-14 15:30\",\n \"status\": \"PASS\",\n \"performance\": {\n \"passRate\": 100,\n \"avgScore\": 1.0,\n \"latencyP95Ms\": 320,\n \"latencyAvgMs\": 280,\n \"tokensAvg\": 45,\n \"tokensP95\": 62,\n \"costUsd\": 0.0008,\n \"latencyHistoryMs\": [250, 270, 290, 280, 310, 320, 265, 290, 300, 275]\n },\n \"dimensions\": [\n { \"name\": \"contract_match\", \"score\": 100 },\n { \"name\": \"arg_validity\", \"score\": 100 }\n ],\n \"checks\": [\n { \"name\": \"tool_calls\", \"passed\": true, \"detail\": \"0 unexpected\" }\n ],\n \"failures\": [],\n \"meta\": {\n \"model\": \"gpt-4o-mini\",\n \"provider\": \"OpenAI\",\n \"commit\": \"e4f5g6h\",\n \"branch\": \"feat/tools\",\n \"seed\": 42,\n \"concurrency\": 8,\n \"duration\": \"00:00:45\",\n \"artifact\": \"./eval-results/tool-calls_2026-02-14.jsonl\"\n }\n }\n ]\n },\n {\n \"id\": \"json-schema\",\n \"name\": \"json-schema\",\n \"overview\": \"Stress-test schema fidelity across generated extraction payloads.\",\n \"runs\": []\n }\n ],\n \"evaluators\": [\n { \"id\": \"json-schema-validator\", \"name\": \"JSON Schema Validator\", \"configPreview\": \"strict=true\" },\n { \"id\": \"tool-call-contract-checker\", \"name\": \"Tool-call Contract Checker\", \"configPreview\": \"unexpectedCalls=error\" },\n { \"id\": \"rubric-judge\", \"name\": \"Rubric Judge (LLM)\", \"configPreview\": \"model=gpt-4o-mini; scale=0-100\" },\n { \"id\": \"pii-leak-detector\", \"name\": \"PII Leak Detector\", \"configPreview\": \"redact=false\" }\n ]\n}\n","import type { CliState, EvalsData, EvalDataset, EvalRun, StartupArgs } from './types';\nimport type {\n CollectedDataset,\n CollectedEvaluator,\n RunSnapshot,\n RunnerApi,\n RunnerEvent,\n} from '../runner';\n\nimport mockData from './data.mock.json' with { type: 'json' };\n\nexport function loadMockData(): EvalsData {\n return mockData as EvalsData;\n}\n\nfunction toSlug(input: string): string {\n return input.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-+|-+$/g, '');\n}\n\nfunction toEvalRun(snapshot: RunSnapshot): EvalRun {\n const total = snapshot.totalTestCases === 0 ? 1 : snapshot.totalTestCases;\n const passRate = Math.round((snapshot.passedTestCases / total) * 100);\n const avgScore = snapshot.passedTestCases / total;\n const durationMs = snapshot.finishedAt\n ? snapshot.finishedAt - (snapshot.startedAt ?? snapshot.queuedAt)\n : Date.now() - (snapshot.startedAt ?? snapshot.queuedAt);\n\n return {\n id: snapshot.runId,\n label: snapshot.runId.slice(0, 12),\n status:\n snapshot.status === 'completed'\n ? 'PASS'\n : snapshot.status === 'failed'\n ? 'FAILED'\n : 'RUNNING',\n performance: {\n passRate,\n avgScore,\n latencyP95Ms: Math.max(1, Math.floor(durationMs / Math.max(1, total))),\n latencyAvgMs: Math.max(1, Math.floor(durationMs / Math.max(1, total))),\n tokensAvg: 0,\n tokensP95: 0,\n costUsd: 0,\n latencyHistoryMs: [durationMs],\n },\n dimensions: [\n { name: 'passed', score: Math.round((snapshot.passedTestCases / total) * 100) },\n { name: 'failed', score: Math.round((snapshot.failedTestCases / total) * 100) },\n ],\n checks: [\n {\n name: 'run_status',\n passed: snapshot.status === 'completed',\n detail: snapshot.status,\n },\n ],\n failures:\n snapshot.errorMessage && snapshot.errorMessage.length > 0\n ? [{ title: snapshot.errorMessage }]\n : [],\n meta: {\n model: 'n/a',\n provider: 'runner',\n commit: 'local',\n branch: 'local',\n seed: 0,\n concurrency: 1,\n duration: `${durationMs}ms`,\n artifact: snapshot.artifactPath,\n },\n };\n}\n\nfunction toEvalDataset(\n item: CollectedDataset,\n snapshots: ReadonlyArray<RunSnapshot>,\n): EvalDataset {\n const runs = snapshots\n .filter((snapshot) => snapshot.datasetId === item.id)\n .sort((a, b) => b.queuedAt - a.queuedAt)\n .map(toEvalRun);\n\n return {\n id: item.id,\n name: item.dataset.getName(),\n overview: `Discovered from ${item.filePath}`,\n runs,\n };\n}\n\nfunction toEvaluatorOption(item: CollectedEvaluator): EvalsData['evaluators'][number] {\n return {\n id: item.id,\n name: item.evaluator.getName() ?? toSlug(item.id),\n configPreview: `Source: ${item.filePath}`,\n };\n}\n\nexport async function loadRunnerData(runner: RunnerApi): Promise<EvalsData> {\n const [datasets, evaluators] = await Promise.all([\n runner.collectDatasets(),\n runner.collectEvaluators(),\n ]);\n const snapshots = runner.getAllRunSnapshots();\n\n if (datasets.length === 0 && evaluators.length === 0) {\n return loadMockData();\n }\n\n return {\n datasets: datasets.map((dataset) => toEvalDataset(dataset, snapshots)),\n evaluators: evaluators.map(toEvaluatorOption),\n };\n}\n\nexport function applyRunnerEvent(\n data: EvalsData,\n event: RunnerEvent,\n runner: RunnerApi,\n): EvalsData {\n const snapshot = runner.getRunSnapshot(event.runId);\n if (!snapshot) {\n return data;\n }\n\n const dataset = data.datasets.find((item) => item.id === snapshot.datasetId);\n if (!dataset) {\n return data;\n }\n\n const run = toEvalRun(snapshot);\n const hasRun = dataset.runs.some((item) => item.id === run.id);\n const nextRuns = hasRun\n ? dataset.runs.map((item) => (item.id === run.id ? run : item))\n : [run, ...dataset.runs];\n\n return {\n ...data,\n datasets: data.datasets.map((item) =>\n item.id === dataset.id ? { ...item, runs: nextRuns } : item,\n ),\n };\n}\n\nexport function parseStartupArgs(argv: string[]): StartupArgs {\n const args: StartupArgs = { unknownArgs: [] };\n for (let index = 0; index < argv.length; index += 1) {\n const token = argv[index];\n if (token === '--dataset' && argv[index + 1]) {\n args.datasetId = argv[index + 1];\n index += 1;\n continue;\n }\n if (token === '--run' && argv[index + 1]) {\n args.runId = argv[index + 1];\n index += 1;\n continue;\n }\n if (token === '--search' && argv[index + 1]) {\n args.search = argv[index + 1];\n index += 1;\n continue;\n }\n args.unknownArgs.push(token);\n }\n return args;\n}\n\nexport function getFilteredDatasets(data: EvalsData, searchQuery: string): EvalDataset[] {\n const query = searchQuery.trim().toLowerCase();\n if (!query) {\n return data.datasets;\n }\n return data.datasets.filter((dataset) => dataset.name.toLowerCase().includes(query));\n}\n\nexport function getDatasetByMenuIndex(datasets: EvalDataset[], menuIndex: number): EvalDataset | undefined {\n if (menuIndex <= 0) {\n return undefined;\n }\n return datasets[menuIndex - 1];\n}\n\nexport function getRunByMenuIndex(dataset: EvalDataset | undefined, menuIndex: number): EvalRun | undefined {\n if (!dataset || menuIndex <= 0) {\n return undefined;\n }\n return dataset.runs[menuIndex - 1];\n}\n\nexport function createInitialState(data: EvalsData, args: StartupArgs): CliState {\n const warnings: string[] = [];\n if (args.unknownArgs.length > 0) {\n warnings.push(`Unknown args: ${args.unknownArgs.join(', ')}`);\n warnings.push('Supported: --dataset <id>, --run <id>, --search <term>');\n }\n\n const searchQuery = args.search ?? '';\n const filteredDatasets = getFilteredDatasets(data, searchQuery);\n const datasetByArg = filteredDatasets.find((dataset) => dataset.id === args.datasetId);\n const datasetMenuIndex = datasetByArg ? filteredDatasets.indexOf(datasetByArg) + 1 : 0;\n\n let level: CliState['level'] = 'datasets';\n let runMenuIndex = 0;\n\n if (datasetByArg) {\n level = 'runs';\n } else if (args.datasetId) {\n warnings.push(`Dataset \"${args.datasetId}\" not found.`);\n }\n\n if (datasetByArg && args.runId) {\n const runIndex = datasetByArg.runs.findIndex((run) => run.id === args.runId);\n if (runIndex >= 0) {\n runMenuIndex = runIndex + 1;\n level = 'details';\n } else {\n warnings.push(`Run \"${args.runId}\" not found in dataset \"${datasetByArg.id}\".`);\n }\n }\n\n return {\n level,\n focus: 'left',\n datasetMenuIndex,\n runMenuIndex,\n detailsScrollOffset: 0,\n selectedEvaluatorIds: data.evaluators.slice(0, 2).map((item) => item.id),\n evaluatorMenuIndex: 0,\n searchQuery,\n searchMode: false,\n startupWarnings: warnings,\n };\n}\n\nexport type CliAction =\n | { type: 'MOVE_UP'; max: number }\n | { type: 'MOVE_DOWN'; max: number }\n | { type: 'ENTER'; hasDataset: boolean; hasRun: boolean }\n | { type: 'BACK' }\n | { type: 'TOGGLE_FOCUS' }\n | { type: 'START_SEARCH' }\n | { type: 'END_SEARCH' }\n | { type: 'APPEND_SEARCH'; value: string }\n | { type: 'REMOVE_SEARCH_CHAR' }\n | { type: 'TOGGLE_EVALUATOR'; evaluatorId: string }\n | { type: 'CLEAR_WARNINGS' };\n\nexport function reduceCliState(state: CliState, action: CliAction): CliState {\n if (action.type === 'MOVE_UP') {\n if (state.searchMode) {\n return state;\n }\n if (state.level === 'details' && state.focus === 'right') {\n return { ...state, detailsScrollOffset: Math.max(0, state.detailsScrollOffset - 1) };\n }\n if (state.level === 'datasets') {\n return { ...state, datasetMenuIndex: Math.max(0, state.datasetMenuIndex - 1) };\n }\n if (state.level === 'runs') {\n return { ...state, runMenuIndex: Math.max(0, state.runMenuIndex - 1) };\n }\n if (state.level === 'new-evaluation') {\n return { ...state, evaluatorMenuIndex: Math.max(0, state.evaluatorMenuIndex - 1) };\n }\n return state;\n }\n\n if (action.type === 'MOVE_DOWN') {\n if (state.searchMode) {\n return state;\n }\n if (state.level === 'details' && state.focus === 'right') {\n return { ...state, detailsScrollOffset: Math.min(action.max, state.detailsScrollOffset + 1) };\n }\n if (state.level === 'datasets') {\n return { ...state, datasetMenuIndex: Math.min(action.max, state.datasetMenuIndex + 1) };\n }\n if (state.level === 'runs') {\n return { ...state, runMenuIndex: Math.min(action.max, state.runMenuIndex + 1) };\n }\n if (state.level === 'new-evaluation') {\n return { ...state, evaluatorMenuIndex: Math.min(action.max, state.evaluatorMenuIndex + 1) };\n }\n return state;\n }\n\n if (action.type === 'ENTER') {\n if (state.searchMode) {\n return { ...state, searchMode: false };\n }\n if (state.level === 'datasets') {\n if (state.datasetMenuIndex === 0) {\n return { ...state, level: 'new-evaluation' };\n }\n if (action.hasDataset) {\n return { ...state, level: 'runs', runMenuIndex: 0 };\n }\n return state;\n }\n if (state.level === 'runs') {\n if (state.runMenuIndex === 0) {\n return { ...state, level: 'new-evaluation' };\n }\n if (action.hasRun) {\n return { ...state, level: 'details', detailsScrollOffset: 0 };\n }\n return state;\n }\n if (state.level === 'new-evaluation') {\n return state;\n }\n return state;\n }\n\n if (action.type === 'BACK') {\n if (state.searchMode) {\n return { ...state, searchMode: false };\n }\n if (state.level === 'details') {\n return { ...state, level: 'runs' };\n }\n if (state.level === 'runs' || state.level === 'new-evaluation') {\n return { ...state, level: 'datasets' };\n }\n return state;\n }\n\n if (action.type === 'TOGGLE_FOCUS') {\n return { ...state, focus: state.focus === 'left' ? 'right' : 'left' };\n }\n\n if (action.type === 'START_SEARCH') {\n return { ...state, searchMode: true };\n }\n\n if (action.type === 'END_SEARCH') {\n return { ...state, searchMode: false };\n }\n\n if (action.type === 'APPEND_SEARCH') {\n return { ...state, searchQuery: `${state.searchQuery}${action.value}` };\n }\n\n if (action.type === 'REMOVE_SEARCH_CHAR') {\n return { ...state, searchQuery: state.searchQuery.slice(0, -1) };\n }\n\n if (action.type === 'TOGGLE_EVALUATOR') {\n const exists = state.selectedEvaluatorIds.includes(action.evaluatorId);\n return {\n ...state,\n selectedEvaluatorIds: exists\n ? state.selectedEvaluatorIds.filter((id) => id !== action.evaluatorId)\n : [...state.selectedEvaluatorIds, action.evaluatorId],\n };\n }\n\n if (action.type === 'CLEAR_WARNINGS') {\n return { ...state, startupWarnings: [] };\n }\n\n return state;\n}\n","import { Schema as S } from 'effect';\n\ntype InputOrBuilder<T> = T | (() => T);\n\ninterface TestCaseConfig<TInput> {\n name: string;\n tags: string[];\n inputSchema: S.Schema.Any;\n input: InputOrBuilder<TInput>;\n}\n\ninterface TestCaseDescribeConfig<TI extends S.Schema.Any> {\n name: string;\n tags: string[];\n inputSchema: TI;\n input: InputOrBuilder<S.Schema.Type<TI>>;\n}\n\nfunction resolve<T>(value: InputOrBuilder<T>): T {\n return typeof value === 'function' ? (value as () => T)() : value;\n}\n\nexport class TestCase<TInput = unknown> {\n private readonly _config: TestCaseConfig<TInput>;\n\n private constructor(config: TestCaseConfig<TInput>) {\n this._config = config;\n }\n\n static describe<TI extends S.Schema.Any>(\n config: TestCaseDescribeConfig<TI>,\n ): TestCase<S.Schema.Type<TI>> {\n return new TestCase<S.Schema.Type<TI>>({\n name: config.name,\n tags: config.tags,\n inputSchema: config.inputSchema,\n input: config.input,\n });\n }\n\n getName(): string {\n return this._config.name;\n }\n\n getTags(): string[] {\n return this._config.tags;\n }\n\n getInputSchema(): S.Schema.Any {\n return this._config.inputSchema;\n }\n\n getInput(): TInput {\n return resolve(this._config.input);\n }\n}\n","import { Schema as S } from 'effect';\n\nexport interface EvalMiddleware<TCtx> {\n name: string;\n resolve: () => TCtx | Promise<TCtx>;\n}\n\ntype EvaluateFn<TInput, TScore, TCtx> = (\n input: TInput,\n ctx: TCtx,\n) => TScore | Promise<TScore>;\n\ninterface EvaluatorConfig<TInput, TOutput, TScore, TCtx> {\n name?: string;\n inputSchema?: S.Schema.Any;\n outputSchema?: S.Schema.Any;\n scoreSchema?: S.Schema.Any;\n middlewares: ReadonlyArray<EvalMiddleware<unknown>>;\n evaluateFn?: EvaluateFn<TInput, TScore, TCtx>;\n passThreshold?: number;\n passCriterion?: (score: unknown) => boolean;\n /** Phantom field for TOutput type parameter */\n _outputType?: TOutput;\n}\n\ninterface EvaluatorDefineConfig<\n TI extends S.Schema.Any,\n TO extends S.Schema.Any,\n TS extends S.Schema.Any,\n> {\n name: string;\n inputSchema: TI;\n outputSchema: TO;\n scoreSchema: TS;\n passThreshold?: number;\n passCriterion?: (score: unknown) => boolean;\n}\n\nexport class Evaluator<\n TInput = unknown,\n TOutput = unknown,\n TScore = unknown,\n TCtx = Record<string, never>,\n> {\n private readonly _config: EvaluatorConfig<TInput, TOutput, TScore, TCtx>;\n\n private constructor(\n config: EvaluatorConfig<TInput, TOutput, TScore, TCtx>,\n ) {\n this._config = config;\n }\n\n private getState(): EvaluatorConfig<TInput, TOutput, TScore, TCtx> {\n return {\n name: this._config.name,\n inputSchema: this._config.inputSchema,\n outputSchema: this._config.outputSchema,\n scoreSchema: this._config.scoreSchema,\n middlewares: this._config.middlewares,\n evaluateFn: this._config.evaluateFn,\n passThreshold: this._config.passThreshold,\n passCriterion: this._config.passCriterion,\n };\n }\n\n static use<TCtx>(\n middleware: EvalMiddleware<TCtx>,\n ): Evaluator<unknown, unknown, unknown, TCtx> {\n return new Evaluator<unknown, unknown, unknown, TCtx>({\n middlewares: [middleware as EvalMiddleware<unknown>],\n });\n }\n\n use<TNew>(\n middleware: EvalMiddleware<TNew>,\n ): Evaluator<TInput, TOutput, TScore, TCtx & TNew> {\n const state = this.getState();\n return new Evaluator<TInput, TOutput, TScore, TCtx & TNew>({\n ...(state as unknown as EvaluatorConfig<\n TInput,\n TOutput,\n TScore,\n TCtx & TNew\n >),\n middlewares: [...state.middlewares, middleware as EvalMiddleware<unknown>],\n });\n }\n\n define<\n TI extends S.Schema.Any,\n TO extends S.Schema.Any,\n TS extends S.Schema.Any,\n >(\n config: EvaluatorDefineConfig<TI, TO, TS>,\n ): Evaluator<S.Schema.Type<TI>, S.Schema.Type<TO>, S.Schema.Type<TS>, TCtx> {\n const { middlewares } = this.getState();\n return new Evaluator<\n S.Schema.Type<TI>,\n S.Schema.Type<TO>,\n S.Schema.Type<TS>,\n TCtx\n >({\n name: config.name,\n inputSchema: config.inputSchema,\n outputSchema: config.outputSchema,\n scoreSchema: config.scoreSchema,\n middlewares,\n passThreshold: config.passThreshold,\n passCriterion: config.passCriterion,\n });\n }\n\n evaluate(\n fn: EvaluateFn<TInput, TScore, TCtx>,\n ): Evaluator<TInput, TOutput, TScore, TCtx> {\n return new Evaluator<TInput, TOutput, TScore, TCtx>({\n ...this.getState(),\n evaluateFn: fn,\n });\n }\n\n getName(): string | undefined {\n return this._config.name;\n }\n\n getInputSchema(): S.Schema.Any | undefined {\n return this._config.inputSchema;\n }\n\n getOutputSchema(): S.Schema.Any | undefined {\n return this._config.outputSchema;\n }\n\n getScoreSchema(): S.Schema.Any | undefined {\n return this._config.scoreSchema;\n }\n\n getMiddlewares(): ReadonlyArray<EvalMiddleware<unknown>> {\n return this._config.middlewares;\n }\n\n getEvaluateFn(): EvaluateFn<TInput, TScore, TCtx> | undefined {\n return this._config.evaluateFn;\n }\n\n getPassThreshold(): number | undefined {\n return this._config.passThreshold;\n }\n\n getPassCriterion(): ((score: unknown) => boolean) | undefined {\n return this._config.passCriterion;\n }\n\n async resolveContext(): Promise<TCtx> {\n const parts = await Promise.all(\n this._config.middlewares.map((mw) => mw.resolve()),\n );\n return Object.assign({}, ...parts) as TCtx;\n }\n}\n","import type { TagMatcher, PathMatcher } from './types';\nimport type { TestCase } from './test-case';\n\ninterface DatasetConfig {\n name: string;\n includedTags: ReadonlyArray<TagMatcher>;\n excludedTags: ReadonlyArray<TagMatcher>;\n includedPaths: ReadonlyArray<PathMatcher>;\n excludedPaths: ReadonlyArray<PathMatcher>;\n}\n\ninterface DatasetDefineConfig {\n name: string;\n includedTags?: TagMatcher[];\n excludedTags?: TagMatcher[];\n includedPaths?: PathMatcher[];\n excludedPaths?: PathMatcher[];\n}\n\nfunction matchesAny(\n value: string,\n matchers: ReadonlyArray<string | RegExp>,\n): boolean {\n return matchers.some((matcher) =>\n typeof matcher === 'string' ? value === matcher : matcher.test(value),\n );\n}\n\nfunction matchesAnyPath(\n filePath: string,\n matchers: ReadonlyArray<string | RegExp>,\n): boolean {\n return matchers.some((matcher) => {\n if (typeof matcher === 'string') {\n return simpleGlobMatch(matcher, filePath);\n }\n return matcher.test(filePath);\n });\n}\n\nfunction simpleGlobMatch(pattern: string, value: string): boolean {\n const escaped = pattern\n .replace(/[.+^${}()|[\\]\\\\]/g, '\\\\$&')\n .replace(/\\?/g, '[^/]')\n .replace(/\\*\\*\\//g, '(?:.*/)?')\n .replace(/\\*\\*/g, '.*')\n .replace(/\\*/g, '[^/]*');\n return new RegExp(`^${escaped}$`).test(value);\n}\n\nexport class Dataset {\n private readonly _config: DatasetConfig;\n\n private constructor(config: DatasetConfig) {\n this._config = config;\n }\n\n static define(config: DatasetDefineConfig): Dataset {\n return new Dataset({\n name: config.name,\n includedTags: config.includedTags ?? [],\n excludedTags: config.excludedTags ?? [],\n includedPaths: config.includedPaths ?? [],\n excludedPaths: config.excludedPaths ?? [],\n });\n }\n\n getName(): string {\n return this._config.name;\n }\n\n getIncludedTags(): ReadonlyArray<TagMatcher> {\n return this._config.includedTags;\n }\n\n getExcludedTags(): ReadonlyArray<TagMatcher> {\n return this._config.excludedTags;\n }\n\n getIncludedPaths(): ReadonlyArray<PathMatcher> {\n return this._config.includedPaths;\n }\n\n getExcludedPaths(): ReadonlyArray<PathMatcher> {\n return this._config.excludedPaths;\n }\n\n matchesTestCase(\n testCase: TestCase<unknown>,\n filePath: string,\n ): boolean {\n const tags = testCase.getTags();\n\n if (this._config.excludedTags.length > 0) {\n if (tags.some((tag) => matchesAny(tag, this._config.excludedTags))) {\n return false;\n }\n }\n\n if (this._config.excludedPaths.length > 0) {\n if (matchesAnyPath(filePath, this._config.excludedPaths)) {\n return false;\n }\n }\n\n const tagMatch =\n this._config.includedTags.length === 0 ||\n tags.some((tag) => matchesAny(tag, this._config.includedTags));\n\n const pathMatch =\n this._config.includedPaths.length === 0 ||\n matchesAnyPath(filePath, this._config.includedPaths);\n\n return tagMatch && pathMatch;\n }\n}\n","const registry = new Map<string, MetricDef<unknown>>();\n\nexport interface MetricItem<TData = unknown> {\n readonly id: string;\n readonly data: TData;\n}\n\nexport interface MetricDef<TData = unknown> {\n readonly id: string;\n readonly name?: string;\n format(data: TData): string;\n make(data: TData): MetricItem<TData>;\n}\n\nexport const Metric = {\n of<TData>(config: {\n id: string;\n name?: string;\n format: (data: TData) => string;\n }): MetricDef<TData> {\n const def: MetricDef<TData> = {\n id: config.id,\n name: config.name,\n format: config.format,\n make: (data: TData) => ({ id: config.id, data }),\n };\n registry.set(config.id, def as MetricDef<unknown>);\n return def;\n },\n};\n\nexport function getMetricById(id: string): MetricDef<unknown> | undefined {\n return registry.get(id);\n}\n","const registry = new Map<string, ScoreDef<unknown>>();\n\nexport type ScoreDisplayStrategy = 'bar' | 'number' | 'passFail';\n\nexport interface ScoreItem<TData = unknown> {\n readonly id: string;\n readonly data: TData;\n readonly passed?: boolean;\n}\n\nexport interface ScoreDef<TData = unknown> {\n readonly id: string;\n readonly name?: string;\n readonly displayStrategy: ScoreDisplayStrategy;\n format(data: TData): string;\n make(\n data: TData,\n options?: { definePassed?: (data: TData) => boolean },\n ): ScoreItem<TData>;\n}\n\nexport const Score = {\n of<TData>(config: {\n id: string;\n name?: string;\n displayStrategy: ScoreDisplayStrategy;\n format: (data: TData) => string;\n }): ScoreDef<TData> {\n const def: ScoreDef<TData> = {\n id: config.id,\n name: config.name,\n displayStrategy: config.displayStrategy,\n format: config.format,\n make: (data: TData, options?: { definePassed?: (data: TData) => boolean }) => {\n const passed =\n options?.definePassed !== undefined\n ? options.definePassed(data)\n : undefined;\n return {\n id: config.id,\n data,\n ...(passed !== undefined && { passed }),\n };\n },\n };\n registry.set(config.id, def as ScoreDef<unknown>);\n return def;\n },\n};\n\nexport function getScoreById(id: string): ScoreDef<unknown> | undefined {\n return registry.get(id);\n}\n","import { Metric } from '../metric';\n\nexport interface TokenCountData {\n input?: number;\n output?: number;\n inputCached?: number;\n outputCached?: number;\n}\n\nexport const tokenCountMetric = Metric.of<TokenCountData>({\n id: 'token-count',\n name: 'Tokens',\n format: (data) => {\n const input = data.input ?? 0;\n const output = data.output ?? 0;\n const inputCached = data.inputCached ?? 0;\n const outputCached = data.outputCached ?? 0;\n const cached = inputCached + outputCached;\n return `in:${input} out:${output} cached:${cached}`;\n },\n});\n\nexport interface LatencyData {\n ms: number;\n}\n\nexport const latencyMetric = Metric.of<LatencyData>({\n id: 'latency',\n name: 'Latency',\n format: (data) => `${data.ms}ms`,\n});\n","import { Score } from '../score';\n\nexport interface PercentScoreData {\n value: number;\n}\n\nexport const percentScore = Score.of<PercentScoreData>({\n id: 'percent',\n name: 'Score',\n displayStrategy: 'bar',\n format: (data) => data.value.toFixed(2),\n});\n\nexport interface BinaryScoreData {\n passed: boolean;\n}\n\nexport const binaryScore = Score.of<BinaryScoreData>({\n id: 'binary',\n name: 'Result',\n displayStrategy: 'passFail',\n format: (data) => (data.passed ? 'PASSED' : 'NOT PASSED'),\n});\n","import { randomUUID } from 'node:crypto';\n\nimport { Effect, Fiber, PubSub, Queue } from 'effect';\n\nimport type { RunnerConfig } from './config';\nimport { withRunnerConfig } from './config';\nimport {\n collectDatasetsFromFiles,\n collectEvaluatorsFromFiles,\n collectTestCasesFromFiles,\n} from './discovery';\nimport { createArtifactPath, executeRunTask, type RunTask } from './execution';\nimport type {\n CollectedDataset,\n CollectedEvaluator,\n CollectedTestCase,\n RunDatasetRequest,\n RunSnapshot,\n RunnerEvent,\n SearchTestCasesQuery,\n} from './events';\nimport { createPersistenceWorker } from './persistence';\nimport { searchCollectedTestCases } from './search';\n\ninterface SubscribeOptions {\n runId?: string;\n}\n\nfunction parseRegexLiteral(\n pattern: string,\n): { source: string; flags: string } | undefined {\n if (!pattern.startsWith('/')) {\n return undefined;\n }\n const lastSlash = pattern.lastIndexOf('/');\n if (lastSlash <= 0) {\n return undefined;\n }\n return {\n source: pattern.slice(1, lastSlash),\n flags: pattern.slice(lastSlash + 1),\n };\n}\n\nfunction createNameMatcher(pattern: string): (value: string) => boolean {\n const normalizedPattern = pattern.trim();\n const regexLiteral = parseRegexLiteral(normalizedPattern);\n if (regexLiteral) {\n const regex = new RegExp(regexLiteral.source, regexLiteral.flags);\n return (value: string) => regex.test(value);\n }\n\n if (normalizedPattern.includes('*')) {\n const escaped = normalizedPattern\n .replace(/[.+^${}()|[\\]\\\\]/g, '\\\\$&')\n .replace(/\\*/g, '.*');\n const regex = new RegExp(`^${escaped}$`, 'i');\n return (value: string) => regex.test(value);\n }\n\n return (value: string) => value.toLowerCase() === normalizedPattern.toLowerCase();\n}\n\nexport interface RunnerApi {\n collectDatasets(): Promise<ReadonlyArray<CollectedDataset>>;\n collectEvaluators(): Promise<ReadonlyArray<CollectedEvaluator>>;\n resolveDatasetByName(name: string): Promise<CollectedDataset | undefined>;\n resolveEvaluatorsByNamePattern(\n pattern: string,\n ): Promise<ReadonlyArray<CollectedEvaluator>>;\n searchTestCases(\n query?: SearchTestCasesQuery,\n ): Promise<ReadonlyArray<CollectedTestCase>>;\n collectDatasetTestCases(datasetId: string): Promise<ReadonlyArray<CollectedTestCase>>;\n runDatasetWith(request: RunDatasetRequest): Promise<RunSnapshot>;\n subscribeRunEvents(\n listener: (event: RunnerEvent) => void,\n options?: SubscribeOptions,\n ): () => void;\n getRunSnapshot(runId: string): RunSnapshot | undefined;\n getAllRunSnapshots(): ReadonlyArray<RunSnapshot>;\n shutdown(): Promise<void>;\n}\n\nexport function createRunner(overrides?: Partial<RunnerConfig>): RunnerApi {\n return new EffectRunner(withRunnerConfig(overrides));\n}\n\nclass EffectRunner implements RunnerApi {\n private readonly config: RunnerConfig;\n\n private readonly eventBus = Effect.runSync(PubSub.unbounded<RunnerEvent>());\n\n private readonly runQueue = Effect.runSync(Queue.unbounded<RunTask>());\n\n private readonly persistenceQueue = Effect.runSync(\n Queue.unbounded<{\n runId: string;\n artifactPath: string;\n payload: unknown;\n }>(),\n );\n\n private readonly snapshots = new Map<string, RunSnapshot>();\n private readonly listeners = new Set<{\n runId?: string;\n listener: (event: RunnerEvent) => void;\n }>();\n\n private readonly datasetsById = new Map<string, CollectedDataset>();\n\n private readonly evaluatorsById = new Map<string, CollectedEvaluator>();\n\n private readonly schedulerFiber = Effect.runFork(\n this.createSchedulerEffect(),\n );\n\n private readonly persistenceFiber = Effect.runFork(\n createPersistenceWorker(this.persistenceQueue),\n );\n\n constructor(config: RunnerConfig) {\n this.config = config;\n }\n\n async collectDatasets(): Promise<ReadonlyArray<CollectedDataset>> {\n const datasets = await collectDatasetsFromFiles(this.config.discovery);\n this.datasetsById.clear();\n for (const dataset of datasets) {\n this.datasetsById.set(dataset.id, dataset);\n }\n return datasets;\n }\n\n async collectEvaluators(): Promise<ReadonlyArray<CollectedEvaluator>> {\n const evaluators = await collectEvaluatorsFromFiles(this.config.discovery);\n this.evaluatorsById.clear();\n for (const evaluator of evaluators) {\n this.evaluatorsById.set(evaluator.id, evaluator);\n }\n return evaluators;\n }\n\n async resolveDatasetByName(name: string): Promise<CollectedDataset | undefined> {\n if (this.datasetsById.size === 0) {\n await this.collectDatasets();\n }\n const normalized = name.trim().toLowerCase();\n return Array.from(this.datasetsById.values()).find(\n (item) => item.dataset.getName().toLowerCase() === normalized,\n );\n }\n\n async resolveEvaluatorsByNamePattern(\n pattern: string,\n ): Promise<ReadonlyArray<CollectedEvaluator>> {\n if (this.evaluatorsById.size === 0) {\n await this.collectEvaluators();\n }\n const matcher = createNameMatcher(pattern);\n return Array.from(this.evaluatorsById.values()).filter((item) =>\n matcher(item.evaluator.getName() ?? ''),\n );\n }\n\n async searchTestCases(\n query?: SearchTestCasesQuery,\n ): Promise<ReadonlyArray<CollectedTestCase>> {\n const testCases = await collectTestCasesFromFiles(this.config.discovery);\n return searchCollectedTestCases(testCases, query);\n }\n\n async collectDatasetTestCases(\n datasetId: string,\n ): Promise<ReadonlyArray<CollectedTestCase>> {\n if (this.datasetsById.size === 0) {\n await this.collectDatasets();\n }\n const dataset = this.datasetsById.get(datasetId);\n if (!dataset) {\n throw new Error(`Unknown dataset: ${datasetId}`);\n }\n const allTestCases = await collectTestCasesFromFiles(this.config.discovery);\n return allTestCases.filter((testCase) =>\n dataset.dataset.matchesTestCase(testCase.testCase, testCase.filePath),\n );\n }\n\n async runDatasetWith(request: RunDatasetRequest): Promise<RunSnapshot> {\n if (this.datasetsById.size === 0) {\n await this.collectDatasets();\n }\n if (this.evaluatorsById.size === 0) {\n await this.collectEvaluators();\n }\n\n const dataset = this.datasetsById.get(request.datasetId);\n if (!dataset) {\n throw new Error(`Unknown dataset: ${request.datasetId}`);\n }\n\n const selectedEvaluators = request.evaluatorIds\n .map((id) => this.evaluatorsById.get(id))\n .filter((value): value is CollectedEvaluator => Boolean(value))\n .map((value) => ({ id: value.id, evaluator: value.evaluator }));\n\n if (selectedEvaluators.length === 0) {\n throw new Error('No evaluators selected for run');\n }\n\n const selectedTestCases = await this.collectDatasetTestCases(request.datasetId);\n\n const runId = `run-${randomUUID()}`;\n const artifactPath = createArtifactPath(\n this.config.artifactDirectory,\n request.datasetId,\n runId,\n );\n const snapshot: RunSnapshot = {\n runId,\n datasetId: request.datasetId,\n datasetName: dataset.dataset.getName(),\n evaluatorIds: selectedEvaluators.map((item) => item.id),\n queuedAt: Date.now(),\n totalTestCases: selectedTestCases.length,\n completedTestCases: 0,\n passedTestCases: 0,\n failedTestCases: 0,\n status: 'queued',\n artifactPath,\n };\n\n this.snapshots.set(runId, snapshot);\n const queuedEvent: RunnerEvent = {\n type: 'RunQueued',\n runId,\n datasetId: request.datasetId,\n datasetName: dataset.dataset.getName(),\n evaluatorIds: selectedEvaluators.map((item) => item.id),\n totalTestCases: selectedTestCases.length,\n artifactPath,\n };\n await Effect.runPromise(this.publishEvent(queuedEvent));\n await Effect.runPromise(\n Queue.offer(this.persistenceQueue, {\n runId,\n artifactPath,\n payload: queuedEvent,\n }),\n );\n\n await Effect.runPromise(\n Queue.offer(this.runQueue, {\n runId,\n datasetId: request.datasetId,\n dataset: dataset.dataset,\n evaluators: selectedEvaluators,\n testCases: selectedTestCases,\n snapshot,\n }),\n );\n\n return snapshot;\n }\n\n subscribeRunEvents(\n listener: (event: RunnerEvent) => void,\n options?: SubscribeOptions,\n ): () => void {\n const entry = { runId: options?.runId, listener };\n this.listeners.add(entry);\n return () => {\n this.listeners.delete(entry);\n };\n }\n\n getRunSnapshot(runId: string): RunSnapshot | undefined {\n return this.snapshots.get(runId);\n }\n\n getAllRunSnapshots(): ReadonlyArray<RunSnapshot> {\n return Array.from(this.snapshots.values()).sort(\n (a, b) => b.queuedAt - a.queuedAt,\n );\n }\n\n async shutdown(): Promise<void> {\n await Effect.runPromise(Fiber.interrupt(this.schedulerFiber));\n await Effect.runPromise(Fiber.interrupt(this.persistenceFiber));\n await Effect.runPromise(Queue.shutdown(this.runQueue));\n await Effect.runPromise(Queue.shutdown(this.persistenceQueue));\n await Effect.runPromise(PubSub.shutdown(this.eventBus));\n }\n\n private createSchedulerEffect() {\n const self = this;\n return Effect.forever(\n Effect.gen(function* () {\n const task = yield* Queue.take(self.runQueue);\n yield* Effect.fork(\n executeRunTask(\n task,\n self.publishEvent.bind(self),\n self.persistenceQueue,\n self.updateSnapshot.bind(self),\n ),\n );\n }),\n );\n }\n\n private updateSnapshot(\n runId: string,\n updater: (snapshot: RunSnapshot) => RunSnapshot,\n ): void {\n const existing = this.snapshots.get(runId);\n if (!existing) {\n return;\n }\n this.snapshots.set(runId, updater(existing));\n }\n\n private publishEvent(event: RunnerEvent): Effect.Effect<void, never, never> {\n return Effect.sync(() => {\n for (const entry of this.listeners) {\n if (entry.runId && entry.runId !== event.runId) {\n continue;\n }\n entry.listener(event);\n }\n }).pipe(\n Effect.flatMap(() => PubSub.publish(this.eventBus, event)),\n Effect.asVoid,\n );\n }\n}\n","export interface RunnerDiscoveryConfig {\n rootDir: string;\n datasetSuffixes: ReadonlyArray<string>;\n evaluatorSuffixes: ReadonlyArray<string>;\n testCaseSuffixes: ReadonlyArray<string>;\n excludeDirectories: ReadonlyArray<string>;\n}\n\nexport interface RunnerConfig {\n discovery: RunnerDiscoveryConfig;\n artifactDirectory: string;\n}\n\nexport const defaultRunnerConfig: RunnerConfig = {\n discovery: {\n rootDir: process.cwd(),\n datasetSuffixes: ['.dataset.ts', '.dataset.tsx', '.dataset.js', '.dataset.mjs'],\n evaluatorSuffixes: [\n '.evaluator.ts',\n '.evaluator.tsx',\n '.evaluator.js',\n '.evaluator.mjs',\n ],\n testCaseSuffixes: [\n '.test-case.ts',\n '.test-case.tsx',\n '.test-case.js',\n '.test-case.mjs',\n ],\n excludeDirectories: ['node_modules', 'dist', '.next', '.git', '.pnpm-store'],\n },\n artifactDirectory: '.eval-results',\n};\n\nexport function withRunnerConfig(overrides?: Partial<RunnerConfig>): RunnerConfig {\n if (!overrides) {\n return defaultRunnerConfig;\n }\n const discovery = overrides.discovery\n ? {\n ...defaultRunnerConfig.discovery,\n ...overrides.discovery,\n }\n : defaultRunnerConfig.discovery;\n\n return {\n ...defaultRunnerConfig,\n ...overrides,\n discovery,\n };\n}\n","import { Dirent } from 'node:fs';\nimport { readdir } from 'node:fs/promises';\nimport { resolve, relative } from 'node:path';\nimport { pathToFileURL } from 'node:url';\n\nimport type { Dataset } from '../evals/dataset';\nimport type { Evaluator } from '../evals/evaluator';\nimport type { TestCase } from '../evals/test-case';\nimport type {\n CollectedDataset,\n CollectedEvaluator,\n CollectedTestCase,\n} from './events';\nimport type { RunnerDiscoveryConfig } from './config';\n\ntype JitiModuleLoader = {\n (id: string): unknown;\n import?: (id: string) => Promise<unknown> | unknown;\n};\n\nlet jitiLoader: JitiModuleLoader | undefined;\n\nfunction toId(prefix: string, filePath: string, name?: string): string {\n const stable = name && name.trim().length > 0 ? name : filePath;\n return `${prefix}:${stable}`\n .toLowerCase()\n .replace(/[^a-z0-9]+/g, '-')\n .replace(/^-+|-+$/g, '');\n}\n\nfunction hasMethod(value: unknown, methodName: string): boolean {\n return (\n typeof value === 'object' &&\n value !== null &&\n methodName in value &&\n typeof (value as Record<string, unknown>)[methodName] === 'function'\n );\n}\n\nfunction isDatasetLike(value: unknown): value is Dataset {\n return hasMethod(value, 'getName') && hasMethod(value, 'matchesTestCase');\n}\n\nfunction isEvaluatorLike(\n value: unknown,\n): value is Evaluator<unknown, unknown, unknown, unknown> {\n return (\n hasMethod(value, 'getName') &&\n hasMethod(value, 'resolveContext') &&\n hasMethod(value, 'getEvaluateFn')\n );\n}\n\nfunction isTestCaseLike(value: unknown): value is TestCase<unknown> {\n return (\n hasMethod(value, 'getName') &&\n hasMethod(value, 'getTags') &&\n hasMethod(value, 'getInput')\n );\n}\n\nasync function walkDirectory(\n rootDir: string,\n excludeDirectories: ReadonlyArray<string>,\n): Promise<string[]> {\n const out: string[] = [];\n\n async function walk(currentDir: string): Promise<void> {\n let entries: Dirent[];\n try {\n entries = await readdir(currentDir, { withFileTypes: true });\n } catch {\n return;\n }\n\n await Promise.all(\n entries.map(async (entry) => {\n const absolute = resolve(currentDir, entry.name);\n if (entry.isDirectory()) {\n if (excludeDirectories.includes(entry.name)) {\n return;\n }\n await walk(absolute);\n return;\n }\n\n if (entry.isFile()) {\n out.push(absolute);\n }\n }),\n );\n }\n\n await walk(rootDir);\n return out;\n}\n\nfunction hasOneSuffix(\n filePath: string,\n suffixes: ReadonlyArray<string>,\n): boolean {\n return suffixes.some((suffix) => filePath.endsWith(suffix));\n}\n\nasync function loadModuleExports(filePath: string): Promise<unknown[]> {\n if (filePath.endsWith('.ts') || filePath.endsWith('.tsx')) {\n if (!jitiLoader) {\n const jitiModule = (await import('jiti')) as {\n createJiti?: (filename: string, opts?: Record<string, unknown>) => JitiModuleLoader;\n default?: (filename: string, opts?: Record<string, unknown>) => JitiModuleLoader;\n };\n const createJiti = jitiModule.createJiti ?? jitiModule.default;\n if (!createJiti) {\n throw new Error('Failed to initialize jiti TypeScript loader');\n }\n jitiLoader = createJiti(import.meta.url, {\n interopDefault: true,\n moduleCache: true,\n }) as JitiModuleLoader;\n }\n const loaded = jitiLoader.import\n ? await jitiLoader.import(filePath)\n : await Promise.resolve(jitiLoader(filePath));\n return Object.values(loaded as Record<string, unknown>);\n }\n\n const moduleUrl = pathToFileURL(filePath).href;\n const loaded = (await import(moduleUrl)) as Record<string, unknown>;\n return Object.values(loaded);\n}\n\nexport async function collectDatasetsFromFiles(\n config: RunnerDiscoveryConfig,\n): Promise<ReadonlyArray<CollectedDataset>> {\n const files = await walkDirectory(config.rootDir, config.excludeDirectories);\n const matched = files.filter((filePath) =>\n hasOneSuffix(filePath, config.datasetSuffixes),\n );\n\n const found = await Promise.all(\n matched.map(async (absolutePath) => {\n const exports = await loadModuleExports(absolutePath);\n const datasets = exports.filter(isDatasetLike);\n const relPath = relative(config.rootDir, absolutePath);\n return datasets.map((dataset) => ({\n id: toId('dataset', relPath, dataset.getName()),\n filePath: relPath,\n dataset,\n }));\n }),\n );\n\n return found.flat();\n}\n\nexport async function collectEvaluatorsFromFiles(\n config: RunnerDiscoveryConfig,\n): Promise<ReadonlyArray<CollectedEvaluator>> {\n const files = await walkDirectory(config.rootDir, config.excludeDirectories);\n const matched = files.filter((filePath) =>\n hasOneSuffix(filePath, config.evaluatorSuffixes),\n );\n\n const found = await Promise.all(\n matched.map(async (absolutePath) => {\n const exports = await loadModuleExports(absolutePath);\n const evaluators = exports.filter(isEvaluatorLike);\n const relPath = relative(config.rootDir, absolutePath);\n return evaluators.map((evaluator) => ({\n id: toId('evaluator', relPath, evaluator.getName()),\n filePath: relPath,\n evaluator,\n }));\n }),\n );\n\n return found.flat();\n}\n\nexport async function collectTestCasesFromFiles(\n config: RunnerDiscoveryConfig,\n): Promise<ReadonlyArray<CollectedTestCase>> {\n const files = await walkDirectory(config.rootDir, config.excludeDirectories);\n const matched = files.filter((filePath) =>\n hasOneSuffix(filePath, config.testCaseSuffixes),\n );\n\n const found = await Promise.all(\n matched.map(async (absolutePath) => {\n const exports = await loadModuleExports(absolutePath);\n const testCases = exports.filter(isTestCaseLike);\n const relPath = relative(config.rootDir, absolutePath);\n return testCases.map((testCase) => ({\n id: toId('test-case', relPath, testCase.getName()),\n filePath: relPath,\n testCase,\n }));\n }),\n );\n\n return found.flat();\n}\n","import { join } from 'node:path';\n\nimport { Effect, Queue } from 'effect';\n\nimport type { Dataset } from '../evals/dataset';\nimport type { Evaluator } from '../evals/evaluator';\nimport type { MetricItem } from '../evals/metric';\nimport type { ScoreItem } from '../evals/score';\nimport type { CollectedTestCase, RunSnapshot, RunnerEvent } from './events';\nimport type { PersistenceMessage } from './persistence';\nimport { toNumericScoreFromScores } from './score-utils';\n\nfunction computeEvaluatorPassed(\n evaluator: Evaluator<unknown, unknown, unknown, unknown>,\n result: unknown,\n scores: ReadonlyArray<ScoreItem>,\n): boolean {\n const scoresWithPassed = scores.filter((s) => 'passed' in s && s.passed !== undefined);\n if (scoresWithPassed.length > 0) {\n return scoresWithPassed.every((s) => s.passed === true);\n }\n const passCriterion = evaluator.getPassCriterion();\n if (passCriterion) {\n return passCriterion(result);\n }\n const passThreshold = evaluator.getPassThreshold();\n if (passThreshold !== undefined) {\n const numeric = toNumericScoreFromScores(scores);\n return numeric !== undefined && numeric >= passThreshold;\n }\n return true;\n}\n\nfunction normalizeResult(\n result: unknown,\n): {\n scores: ReadonlyArray<ScoreItem>;\n metrics?: ReadonlyArray<MetricItem>;\n} {\n if (typeof result !== 'object' || result === null) {\n return { scores: [] };\n }\n const obj = result as Record<string, unknown>;\n const scores = Array.isArray(obj.scores)\n ? (obj.scores as ReadonlyArray<ScoreItem>)\n : [];\n const metrics = Array.isArray(obj.metrics)\n ? (obj.metrics as ReadonlyArray<MetricItem>)\n : undefined;\n return { scores, metrics };\n}\n\nexport interface RunTask {\n runId: string;\n datasetId: string;\n dataset: Dataset;\n evaluators: ReadonlyArray<{\n id: string;\n evaluator: Evaluator<unknown, unknown, unknown, unknown>;\n }>;\n testCases: ReadonlyArray<CollectedTestCase>;\n snapshot: RunSnapshot;\n}\n\nfunction nowIsoForFile(): string {\n return new Date().toISOString().replace(/[:.]/g, '-');\n}\n\nexport function createArtifactPath(\n artifactDirectory: string,\n datasetId: string,\n runId: string,\n): string {\n return join(\n artifactDirectory,\n `${datasetId}_${runId}_${nowIsoForFile()}.jsonl`,\n );\n}\n\nexport const executeRunTask = (\n task: RunTask,\n publishEvent: (event: RunnerEvent) => Effect.Effect<void, never, never>,\n persistenceQueue: Queue.Queue<PersistenceMessage>,\n updateSnapshot: (\n runId: string,\n updater: (snapshot: RunSnapshot) => RunSnapshot,\n ) => void,\n): Effect.Effect<void, never, never> =>\n Effect.gen(function* () {\n const startedAt = Date.now();\n updateSnapshot(task.runId, (snapshot) => ({\n ...snapshot,\n status: 'running',\n startedAt,\n }));\n yield* publishEvent({\n type: 'RunStarted',\n runId: task.runId,\n startedAt,\n });\n\n let completedTestCases = 0;\n let passedTestCases = 0;\n let failedTestCases = 0;\n\n for (const testCaseItem of task.testCases) {\n const started = Date.now();\n const evaluatorScores: Array<{\n evaluatorId: string;\n scores: ReadonlyArray<ScoreItem>;\n passed: boolean;\n metrics?: ReadonlyArray<MetricItem>;\n }> = [];\n let testCaseError: string | undefined;\n\n for (const { id: evaluatorId, evaluator } of task.evaluators) {\n const evaluateFn = evaluator.getEvaluateFn();\n if (!evaluateFn) {\n continue;\n }\n\n try {\n const ctx = yield* Effect.promise(() =>\n Promise.resolve(evaluator.resolveContext()),\n );\n const result = yield* Effect.promise(() =>\n Promise.resolve(evaluateFn(testCaseItem.testCase.getInput(), ctx)),\n );\n const { scores, metrics } = normalizeResult(result);\n const passed = computeEvaluatorPassed(evaluator, result, scores);\n evaluatorScores.push({ evaluatorId, scores, passed, metrics });\n } catch (error) {\n testCaseError =\n error instanceof Error\n ? error.message\n : 'Evaluator execution failed';\n evaluatorScores.push({\n evaluatorId,\n scores: [],\n passed: false,\n });\n }\n }\n\n const testCasePassed = evaluatorScores.every((s) => s.passed);\n completedTestCases += 1;\n if (testCasePassed) {\n passedTestCases += 1;\n } else {\n failedTestCases += 1;\n }\n\n const progressEvent: RunnerEvent = {\n type: 'TestCaseProgress',\n runId: task.runId,\n testCaseId: testCaseItem.id,\n testCaseName: testCaseItem.testCase.getName(),\n completedTestCases,\n totalTestCases: task.testCases.length,\n passed: testCasePassed,\n durationMs: Date.now() - started,\n evaluatorScores,\n errorMessage: testCaseError,\n };\n\n updateSnapshot(task.runId, (snapshot) => ({\n ...snapshot,\n completedTestCases,\n passedTestCases,\n failedTestCases,\n }));\n\n yield* publishEvent(progressEvent);\n yield* Queue.offer(persistenceQueue, {\n runId: task.runId,\n artifactPath: task.snapshot.artifactPath,\n payload: progressEvent,\n });\n }\n\n const finishedAt = Date.now();\n const completedEvent: RunnerEvent = {\n type: 'RunCompleted',\n runId: task.runId,\n finishedAt,\n passedTestCases,\n failedTestCases,\n totalTestCases: task.testCases.length,\n artifactPath: task.snapshot.artifactPath,\n };\n\n updateSnapshot(task.runId, (snapshot) => ({\n ...snapshot,\n status: 'completed',\n completedTestCases,\n passedTestCases,\n failedTestCases,\n finishedAt,\n }));\n\n yield* publishEvent(completedEvent);\n yield* Queue.offer(persistenceQueue, {\n runId: task.runId,\n artifactPath: task.snapshot.artifactPath,\n payload: completedEvent,\n });\n yield* publishEvent({\n type: 'ArtifactFlushed',\n runId: task.runId,\n artifactPath: task.snapshot.artifactPath,\n });\n });\n","import type { ScoreItem } from '../evals/score';\nimport { getScoreById } from '../evals';\n\nexport function toNumericScoreFromScores(\n scores: ReadonlyArray<ScoreItem>,\n): number | undefined {\n for (const item of scores) {\n const def = getScoreById(item.id);\n if (def && def.displayStrategy === 'bar' && typeof item.data === 'object' && item.data !== null && 'value' in item.data) {\n const value = (item.data as { value: unknown }).value;\n if (typeof value === 'number' && Number.isFinite(value)) {\n return value;\n }\n }\n const numeric = toNumericScore(item.data);\n if (numeric !== undefined) {\n return numeric;\n }\n }\n return undefined;\n}\n\nexport function toNumericScore(value: unknown): number | undefined {\n if (typeof value === 'number' && Number.isFinite(value)) {\n return value;\n }\n if (typeof value !== 'object' || value === null) {\n return undefined;\n }\n const obj = value as Record<string, unknown>;\n if (\n 'score' in obj &&\n typeof obj.score === 'number' &&\n Number.isFinite(obj.score)\n ) {\n return obj.score;\n }\n const numberValues = Object.values(value).filter(\n (entry): entry is number =>\n typeof entry === 'number' && Number.isFinite(entry),\n );\n if (numberValues.length === 0) {\n return undefined;\n }\n return (\n numberValues.reduce((sum, entry) => sum + entry, 0) / numberValues.length\n );\n}\n","import { appendFile, mkdir } from 'node:fs/promises';\nimport { dirname } from 'node:path';\n\nimport { Effect, Queue } from 'effect';\n\nexport interface PersistenceMessage {\n runId: string;\n artifactPath: string;\n payload: unknown;\n}\n\nasync function appendJsonLine(\n artifactPath: string,\n payload: unknown,\n): Promise<void> {\n await mkdir(dirname(artifactPath), { recursive: true });\n await appendFile(artifactPath, `${JSON.stringify(payload)}\\n`, 'utf8');\n}\n\nexport const createPersistenceWorker = (\n queue: Queue.Queue<PersistenceMessage>,\n): Effect.Effect<never, never, never> =>\n Effect.forever(\n Effect.gen(function* () {\n const message = yield* Queue.take(queue);\n yield* Effect.promise(() =>\n appendJsonLine(message.artifactPath, {\n runId: message.runId,\n ts: Date.now(),\n ...message.payload,\n }),\n );\n }),\n );\n","import type { CollectedTestCase, SearchTestCasesQuery } from './events';\n\nfunction matchesAny(\n value: string,\n matchers: ReadonlyArray<string | RegExp> | undefined,\n): boolean {\n if (!matchers || matchers.length === 0) {\n return true;\n }\n return matchers.some((matcher) =>\n typeof matcher === 'string' ? matcher === value : matcher.test(value),\n );\n}\n\nfunction matchesPath(\n value: string,\n matchers: ReadonlyArray<string | RegExp> | undefined,\n): boolean {\n if (!matchers || matchers.length === 0) {\n return true;\n }\n return matchers.some((matcher) => {\n if (typeof matcher === 'string') {\n return value.includes(matcher);\n }\n return matcher.test(value);\n });\n}\n\nexport function searchCollectedTestCases(\n all: ReadonlyArray<CollectedTestCase>,\n query?: SearchTestCasesQuery,\n): ReadonlyArray<CollectedTestCase> {\n if (!query) {\n return all;\n }\n\n return all.filter((item) => {\n const tags = item.testCase.getTags();\n\n if (\n query.excludedTags &&\n tags.some((tag) => matchesAny(tag, query.excludedTags))\n ) {\n return false;\n }\n if (\n query.excludedPaths &&\n matchesPath(item.filePath, query.excludedPaths)\n ) {\n return false;\n }\n\n const includedTagsMatch =\n !query.includedTags ||\n query.includedTags.length === 0 ||\n tags.some((tag) => matchesAny(tag, query.includedTags));\n\n const includedPathsMatch =\n !query.includedPaths ||\n query.includedPaths.length === 0 ||\n matchesPath(item.filePath, query.includedPaths);\n\n return includedTagsMatch && includedPathsMatch;\n });\n}\n"]}
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
import { Schema } from 'effect';
|
|
2
|
+
|
|
3
|
+
type EvalStatus = 'PASS' | 'FAILED' | 'RUNNING';
|
|
4
|
+
interface EvalDimension {
|
|
5
|
+
name: string;
|
|
6
|
+
score: number;
|
|
7
|
+
}
|
|
8
|
+
interface EvalCheck {
|
|
9
|
+
name: string;
|
|
10
|
+
passed: boolean;
|
|
11
|
+
detail?: string;
|
|
12
|
+
}
|
|
13
|
+
interface EvalFailure {
|
|
14
|
+
title: string;
|
|
15
|
+
}
|
|
16
|
+
interface EvalPerformance {
|
|
17
|
+
passRate: number;
|
|
18
|
+
avgScore: number;
|
|
19
|
+
latencyP95Ms: number;
|
|
20
|
+
latencyAvgMs: number;
|
|
21
|
+
tokensAvg: number;
|
|
22
|
+
tokensP95: number;
|
|
23
|
+
costUsd: number;
|
|
24
|
+
/** Per-sample latency in ms for sparkline (e.g. last N requests) */
|
|
25
|
+
latencyHistoryMs?: number[];
|
|
26
|
+
}
|
|
27
|
+
interface EvalRunMeta {
|
|
28
|
+
model: string;
|
|
29
|
+
provider: string;
|
|
30
|
+
commit: string;
|
|
31
|
+
branch: string;
|
|
32
|
+
seed: number;
|
|
33
|
+
concurrency: number;
|
|
34
|
+
duration: string;
|
|
35
|
+
artifact: string;
|
|
36
|
+
}
|
|
37
|
+
interface EvalRun {
|
|
38
|
+
id: string;
|
|
39
|
+
label: string;
|
|
40
|
+
status: EvalStatus;
|
|
41
|
+
performance: EvalPerformance;
|
|
42
|
+
dimensions: EvalDimension[];
|
|
43
|
+
checks: EvalCheck[];
|
|
44
|
+
failures: EvalFailure[];
|
|
45
|
+
meta: EvalRunMeta;
|
|
46
|
+
}
|
|
47
|
+
interface EvalDataset {
|
|
48
|
+
id: string;
|
|
49
|
+
name: string;
|
|
50
|
+
overview: string;
|
|
51
|
+
runs: EvalRun[];
|
|
52
|
+
}
|
|
53
|
+
interface EvaluatorOption {
|
|
54
|
+
id: string;
|
|
55
|
+
name: string;
|
|
56
|
+
configPreview: string;
|
|
57
|
+
}
|
|
58
|
+
interface EvalsData {
|
|
59
|
+
datasets: EvalDataset[];
|
|
60
|
+
evaluators: EvaluatorOption[];
|
|
61
|
+
}
|
|
62
|
+
type PaneFocus = 'left' | 'right';
|
|
63
|
+
type ViewLevel = 'datasets' | 'runs' | 'details' | 'new-evaluation';
|
|
64
|
+
interface StartupArgs {
|
|
65
|
+
datasetId?: string;
|
|
66
|
+
runId?: string;
|
|
67
|
+
search?: string;
|
|
68
|
+
unknownArgs: string[];
|
|
69
|
+
}
|
|
70
|
+
interface CliState {
|
|
71
|
+
level: ViewLevel;
|
|
72
|
+
focus: PaneFocus;
|
|
73
|
+
datasetMenuIndex: number;
|
|
74
|
+
runMenuIndex: number;
|
|
75
|
+
detailsScrollOffset: number;
|
|
76
|
+
selectedEvaluatorIds: string[];
|
|
77
|
+
evaluatorMenuIndex: number;
|
|
78
|
+
searchQuery: string;
|
|
79
|
+
searchMode: boolean;
|
|
80
|
+
startupWarnings: string[];
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
interface RunnerDiscoveryConfig {
|
|
84
|
+
rootDir: string;
|
|
85
|
+
datasetSuffixes: ReadonlyArray<string>;
|
|
86
|
+
evaluatorSuffixes: ReadonlyArray<string>;
|
|
87
|
+
testCaseSuffixes: ReadonlyArray<string>;
|
|
88
|
+
excludeDirectories: ReadonlyArray<string>;
|
|
89
|
+
}
|
|
90
|
+
interface RunnerConfig {
|
|
91
|
+
discovery: RunnerDiscoveryConfig;
|
|
92
|
+
artifactDirectory: string;
|
|
93
|
+
}
|
|
94
|
+
declare const defaultRunnerConfig: RunnerConfig;
|
|
95
|
+
declare function withRunnerConfig(overrides?: Partial<RunnerConfig>): RunnerConfig;
|
|
96
|
+
|
|
97
|
+
/** Matches a tag by exact string equality or regex test */
|
|
98
|
+
type TagMatcher = string | RegExp;
|
|
99
|
+
/** Matches a file path by glob string or regex test */
|
|
100
|
+
type PathMatcher = string | RegExp;
|
|
101
|
+
|
|
102
|
+
type InputOrBuilder<T> = T | (() => T);
|
|
103
|
+
interface TestCaseDescribeConfig<TI extends Schema.Schema.Any> {
|
|
104
|
+
name: string;
|
|
105
|
+
tags: string[];
|
|
106
|
+
inputSchema: TI;
|
|
107
|
+
input: InputOrBuilder<Schema.Schema.Type<TI>>;
|
|
108
|
+
}
|
|
109
|
+
declare class TestCase<TInput = unknown> {
|
|
110
|
+
private readonly _config;
|
|
111
|
+
private constructor();
|
|
112
|
+
static describe<TI extends Schema.Schema.Any>(config: TestCaseDescribeConfig<TI>): TestCase<Schema.Schema.Type<TI>>;
|
|
113
|
+
getName(): string;
|
|
114
|
+
getTags(): string[];
|
|
115
|
+
getInputSchema(): Schema.Schema.Any;
|
|
116
|
+
getInput(): TInput;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
interface DatasetDefineConfig {
|
|
120
|
+
name: string;
|
|
121
|
+
includedTags?: TagMatcher[];
|
|
122
|
+
excludedTags?: TagMatcher[];
|
|
123
|
+
includedPaths?: PathMatcher[];
|
|
124
|
+
excludedPaths?: PathMatcher[];
|
|
125
|
+
}
|
|
126
|
+
declare class Dataset {
|
|
127
|
+
private readonly _config;
|
|
128
|
+
private constructor();
|
|
129
|
+
static define(config: DatasetDefineConfig): Dataset;
|
|
130
|
+
getName(): string;
|
|
131
|
+
getIncludedTags(): ReadonlyArray<TagMatcher>;
|
|
132
|
+
getExcludedTags(): ReadonlyArray<TagMatcher>;
|
|
133
|
+
getIncludedPaths(): ReadonlyArray<PathMatcher>;
|
|
134
|
+
getExcludedPaths(): ReadonlyArray<PathMatcher>;
|
|
135
|
+
matchesTestCase(testCase: TestCase<unknown>, filePath: string): boolean;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
interface EvalMiddleware<TCtx> {
|
|
139
|
+
name: string;
|
|
140
|
+
resolve: () => TCtx | Promise<TCtx>;
|
|
141
|
+
}
|
|
142
|
+
type EvaluateFn<TInput, TScore, TCtx> = (input: TInput, ctx: TCtx) => TScore | Promise<TScore>;
|
|
143
|
+
interface EvaluatorDefineConfig<TI extends Schema.Schema.Any, TO extends Schema.Schema.Any, TS extends Schema.Schema.Any> {
|
|
144
|
+
name: string;
|
|
145
|
+
inputSchema: TI;
|
|
146
|
+
outputSchema: TO;
|
|
147
|
+
scoreSchema: TS;
|
|
148
|
+
passThreshold?: number;
|
|
149
|
+
passCriterion?: (score: unknown) => boolean;
|
|
150
|
+
}
|
|
151
|
+
declare class Evaluator<TInput = unknown, TOutput = unknown, TScore = unknown, TCtx = Record<string, never>> {
|
|
152
|
+
private readonly _config;
|
|
153
|
+
private constructor();
|
|
154
|
+
private getState;
|
|
155
|
+
static use<TCtx>(middleware: EvalMiddleware<TCtx>): Evaluator<unknown, unknown, unknown, TCtx>;
|
|
156
|
+
use<TNew>(middleware: EvalMiddleware<TNew>): Evaluator<TInput, TOutput, TScore, TCtx & TNew>;
|
|
157
|
+
define<TI extends Schema.Schema.Any, TO extends Schema.Schema.Any, TS extends Schema.Schema.Any>(config: EvaluatorDefineConfig<TI, TO, TS>): Evaluator<Schema.Schema.Type<TI>, Schema.Schema.Type<TO>, Schema.Schema.Type<TS>, TCtx>;
|
|
158
|
+
evaluate(fn: EvaluateFn<TInput, TScore, TCtx>): Evaluator<TInput, TOutput, TScore, TCtx>;
|
|
159
|
+
getName(): string | undefined;
|
|
160
|
+
getInputSchema(): Schema.Schema.Any | undefined;
|
|
161
|
+
getOutputSchema(): Schema.Schema.Any | undefined;
|
|
162
|
+
getScoreSchema(): Schema.Schema.Any | undefined;
|
|
163
|
+
getMiddlewares(): ReadonlyArray<EvalMiddleware<unknown>>;
|
|
164
|
+
getEvaluateFn(): EvaluateFn<TInput, TScore, TCtx> | undefined;
|
|
165
|
+
getPassThreshold(): number | undefined;
|
|
166
|
+
getPassCriterion(): ((score: unknown) => boolean) | undefined;
|
|
167
|
+
resolveContext(): Promise<TCtx>;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
interface MetricItem<TData = unknown> {
|
|
171
|
+
readonly id: string;
|
|
172
|
+
readonly data: TData;
|
|
173
|
+
}
|
|
174
|
+
interface MetricDef<TData = unknown> {
|
|
175
|
+
readonly id: string;
|
|
176
|
+
readonly name?: string;
|
|
177
|
+
format(data: TData): string;
|
|
178
|
+
make(data: TData): MetricItem<TData>;
|
|
179
|
+
}
|
|
180
|
+
declare const Metric: {
|
|
181
|
+
of<TData>(config: {
|
|
182
|
+
id: string;
|
|
183
|
+
name?: string;
|
|
184
|
+
format: (data: TData) => string;
|
|
185
|
+
}): MetricDef<TData>;
|
|
186
|
+
};
|
|
187
|
+
declare function getMetricById(id: string): MetricDef<unknown> | undefined;
|
|
188
|
+
|
|
189
|
+
type ScoreDisplayStrategy = 'bar' | 'number' | 'passFail';
|
|
190
|
+
interface ScoreItem<TData = unknown> {
|
|
191
|
+
readonly id: string;
|
|
192
|
+
readonly data: TData;
|
|
193
|
+
readonly passed?: boolean;
|
|
194
|
+
}
|
|
195
|
+
interface ScoreDef<TData = unknown> {
|
|
196
|
+
readonly id: string;
|
|
197
|
+
readonly name?: string;
|
|
198
|
+
readonly displayStrategy: ScoreDisplayStrategy;
|
|
199
|
+
format(data: TData): string;
|
|
200
|
+
make(data: TData, options?: {
|
|
201
|
+
definePassed?: (data: TData) => boolean;
|
|
202
|
+
}): ScoreItem<TData>;
|
|
203
|
+
}
|
|
204
|
+
declare const Score: {
|
|
205
|
+
of<TData>(config: {
|
|
206
|
+
id: string;
|
|
207
|
+
name?: string;
|
|
208
|
+
displayStrategy: ScoreDisplayStrategy;
|
|
209
|
+
format: (data: TData) => string;
|
|
210
|
+
}): ScoreDef<TData>;
|
|
211
|
+
};
|
|
212
|
+
declare function getScoreById(id: string): ScoreDef<unknown> | undefined;
|
|
213
|
+
|
|
214
|
+
interface CollectedDataset {
|
|
215
|
+
id: string;
|
|
216
|
+
filePath: string;
|
|
217
|
+
dataset: Dataset;
|
|
218
|
+
}
|
|
219
|
+
interface CollectedEvaluator {
|
|
220
|
+
id: string;
|
|
221
|
+
filePath: string;
|
|
222
|
+
evaluator: Evaluator<unknown, unknown, unknown, unknown>;
|
|
223
|
+
}
|
|
224
|
+
interface CollectedTestCase {
|
|
225
|
+
id: string;
|
|
226
|
+
filePath: string;
|
|
227
|
+
testCase: TestCase<unknown>;
|
|
228
|
+
}
|
|
229
|
+
interface SearchTestCasesQuery {
|
|
230
|
+
includedTags?: ReadonlyArray<string | RegExp>;
|
|
231
|
+
excludedTags?: ReadonlyArray<string | RegExp>;
|
|
232
|
+
includedPaths?: ReadonlyArray<string | RegExp>;
|
|
233
|
+
excludedPaths?: ReadonlyArray<string | RegExp>;
|
|
234
|
+
}
|
|
235
|
+
interface RunDatasetRequest {
|
|
236
|
+
datasetId: string;
|
|
237
|
+
evaluatorIds: ReadonlyArray<string>;
|
|
238
|
+
concurrency?: number;
|
|
239
|
+
}
|
|
240
|
+
interface RunSnapshot {
|
|
241
|
+
runId: string;
|
|
242
|
+
datasetId: string;
|
|
243
|
+
datasetName: string;
|
|
244
|
+
evaluatorIds: ReadonlyArray<string>;
|
|
245
|
+
queuedAt: number;
|
|
246
|
+
startedAt?: number;
|
|
247
|
+
finishedAt?: number;
|
|
248
|
+
totalTestCases: number;
|
|
249
|
+
completedTestCases: number;
|
|
250
|
+
passedTestCases: number;
|
|
251
|
+
failedTestCases: number;
|
|
252
|
+
status: 'queued' | 'running' | 'completed' | 'failed';
|
|
253
|
+
artifactPath: string;
|
|
254
|
+
errorMessage?: string;
|
|
255
|
+
}
|
|
256
|
+
type RunnerEvent = {
|
|
257
|
+
type: 'RunQueued';
|
|
258
|
+
runId: string;
|
|
259
|
+
datasetId: string;
|
|
260
|
+
datasetName: string;
|
|
261
|
+
evaluatorIds: ReadonlyArray<string>;
|
|
262
|
+
totalTestCases: number;
|
|
263
|
+
artifactPath: string;
|
|
264
|
+
} | {
|
|
265
|
+
type: 'RunStarted';
|
|
266
|
+
runId: string;
|
|
267
|
+
startedAt: number;
|
|
268
|
+
} | {
|
|
269
|
+
type: 'TestCaseProgress';
|
|
270
|
+
runId: string;
|
|
271
|
+
testCaseId: string;
|
|
272
|
+
testCaseName: string;
|
|
273
|
+
completedTestCases: number;
|
|
274
|
+
totalTestCases: number;
|
|
275
|
+
passed: boolean;
|
|
276
|
+
durationMs: number;
|
|
277
|
+
evaluatorScores: ReadonlyArray<{
|
|
278
|
+
evaluatorId: string;
|
|
279
|
+
scores: ReadonlyArray<ScoreItem>;
|
|
280
|
+
passed: boolean;
|
|
281
|
+
metrics?: ReadonlyArray<MetricItem>;
|
|
282
|
+
}>;
|
|
283
|
+
errorMessage?: string;
|
|
284
|
+
} | {
|
|
285
|
+
type: 'RunCompleted';
|
|
286
|
+
runId: string;
|
|
287
|
+
finishedAt: number;
|
|
288
|
+
passedTestCases: number;
|
|
289
|
+
failedTestCases: number;
|
|
290
|
+
totalTestCases: number;
|
|
291
|
+
artifactPath: string;
|
|
292
|
+
} | {
|
|
293
|
+
type: 'RunFailed';
|
|
294
|
+
runId: string;
|
|
295
|
+
finishedAt: number;
|
|
296
|
+
errorMessage: string;
|
|
297
|
+
artifactPath: string;
|
|
298
|
+
} | {
|
|
299
|
+
type: 'ArtifactFlushed';
|
|
300
|
+
runId: string;
|
|
301
|
+
artifactPath: string;
|
|
302
|
+
};
|
|
303
|
+
|
|
304
|
+
interface SubscribeOptions {
|
|
305
|
+
runId?: string;
|
|
306
|
+
}
|
|
307
|
+
interface RunnerApi {
|
|
308
|
+
collectDatasets(): Promise<ReadonlyArray<CollectedDataset>>;
|
|
309
|
+
collectEvaluators(): Promise<ReadonlyArray<CollectedEvaluator>>;
|
|
310
|
+
resolveDatasetByName(name: string): Promise<CollectedDataset | undefined>;
|
|
311
|
+
resolveEvaluatorsByNamePattern(pattern: string): Promise<ReadonlyArray<CollectedEvaluator>>;
|
|
312
|
+
searchTestCases(query?: SearchTestCasesQuery): Promise<ReadonlyArray<CollectedTestCase>>;
|
|
313
|
+
collectDatasetTestCases(datasetId: string): Promise<ReadonlyArray<CollectedTestCase>>;
|
|
314
|
+
runDatasetWith(request: RunDatasetRequest): Promise<RunSnapshot>;
|
|
315
|
+
subscribeRunEvents(listener: (event: RunnerEvent) => void, options?: SubscribeOptions): () => void;
|
|
316
|
+
getRunSnapshot(runId: string): RunSnapshot | undefined;
|
|
317
|
+
getAllRunSnapshots(): ReadonlyArray<RunSnapshot>;
|
|
318
|
+
shutdown(): Promise<void>;
|
|
319
|
+
}
|
|
320
|
+
declare function createRunner(overrides?: Partial<RunnerConfig>): RunnerApi;
|
|
321
|
+
|
|
322
|
+
declare function loadMockData(): EvalsData;
|
|
323
|
+
declare function loadRunnerData(runner: RunnerApi): Promise<EvalsData>;
|
|
324
|
+
declare function parseStartupArgs(argv: string[]): StartupArgs;
|
|
325
|
+
|
|
326
|
+
interface TokenCountData {
|
|
327
|
+
input?: number;
|
|
328
|
+
output?: number;
|
|
329
|
+
inputCached?: number;
|
|
330
|
+
outputCached?: number;
|
|
331
|
+
}
|
|
332
|
+
declare const tokenCountMetric: MetricDef<TokenCountData>;
|
|
333
|
+
interface LatencyData {
|
|
334
|
+
ms: number;
|
|
335
|
+
}
|
|
336
|
+
declare const latencyMetric: MetricDef<LatencyData>;
|
|
337
|
+
|
|
338
|
+
interface PercentScoreData {
|
|
339
|
+
value: number;
|
|
340
|
+
}
|
|
341
|
+
declare const percentScore: ScoreDef<PercentScoreData>;
|
|
342
|
+
interface BinaryScoreData {
|
|
343
|
+
passed: boolean;
|
|
344
|
+
}
|
|
345
|
+
declare const binaryScore: ScoreDef<BinaryScoreData>;
|
|
346
|
+
|
|
347
|
+
export { type BinaryScoreData, type CliState, type CollectedDataset, type CollectedEvaluator, type CollectedTestCase, Dataset, type EvalDataset, type EvalMiddleware, type EvalRun, type EvalsData, Evaluator, type EvaluatorOption, type LatencyData, Metric, type MetricDef, type MetricItem, type PathMatcher, type PercentScoreData, type RunDatasetRequest, type RunSnapshot, type RunnerApi, type RunnerConfig, type RunnerDiscoveryConfig, type RunnerEvent, Score, type ScoreDef, type ScoreDisplayStrategy, type ScoreItem, type SearchTestCasesQuery, type StartupArgs, type TagMatcher, TestCase, type TokenCountData, type ViewLevel, binaryScore, createRunner, defaultRunnerConfig, getMetricById, getScoreById, latencyMetric, loadMockData, loadRunnerData, parseStartupArgs, percentScore, tokenCountMetric, withRunnerConfig };
|