nodebench-mcp 2.66.0 → 2.68.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmarks/chainEval.d.ts +21 -0
- package/dist/benchmarks/chainEval.js +683 -0
- package/dist/benchmarks/chainEval.js.map +1 -0
- package/dist/benchmarks/llmJudgeEval.d.ts +3 -0
- package/dist/benchmarks/llmJudgeEval.js +321 -21
- package/dist/benchmarks/llmJudgeEval.js.map +1 -1
- package/dist/benchmarks/pipelineEval.d.ts +63 -0
- package/dist/benchmarks/pipelineEval.js +1053 -0
- package/dist/benchmarks/pipelineEval.js.map +1 -0
- package/dist/benchmarks/searchQualityEval.js +4 -4
- package/dist/benchmarks/searchQualityEval.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chainEval.js","sourceRoot":"","sources":["../../src/benchmarks/chainEval.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;GAmBG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAExD,OAAO,EAAE,KAAK,EAAE,MAAM,UAAU,CAAC;AAEjC,4DAA4D;AAC5D,KAAK,UAAU,yBAAyB;IACtC,MAAM,WAAW,GAAG,MAAM,eAAe,EAAE,CAAC;IAC5C,MAAM,MAAM,GAAc,EAAE,CAAC;IAE7B,mEAAmE;IACnE,IAAI,CAAC;QACH,MAAM,EAAE,eAAe,EAAE,GAAG,MAAM,MAAM,CAAC,uBAAuB,CAAC,CAAC;QAClE,MAAM,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,WAAW,CAAC,CAAC,CAAC;IAC/C,CAAC;IAAC,MAAM,CAAC,CAAC,6BAA6B,CAAC,CAAC;IAEzC,8EAA8E;IAC9E,IAAI,CAAC;QACH,MAAM,EAAE,+BAA+B,EAAE,GAAG,MAAM,MAAM,CAAC,uCAAuC,CAAC,CAAC;QAClG,MAAM,CAAC,IAAI,CAAC,GAAG,+BAA+B,CAAC,CAAC,GAAG,WAAW,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IAC/E,CAAC;IAAC,MAAM,CAAC,CAAC,6CAA6C,CAAC,CAAC;IAEzD,OAAO,CAAC,GAAG,WAAW,EAAE,GAAG,MAAM,CAAC,CAAC;AACrC,CAAC;AA8CD,gFAAgF;AAEhF,SAAS,QAAQ,CAAC,GAAQ,EAAE,GAAG,IAAc;IAC3C,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAClD,OAAO,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE;QACnB,IAAI,CAAC,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,SAAS,EAAE,CAAC;YACxD,IAAI,OAAO,GAAG,CAAC,CAAC,CAAC,KAAK,QAAQ;gBAAE,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;YACzD,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;gBAAE,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;YACpD,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,WAAW,CAAC,GAAQ,EAAE,GAAW;IACxC,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,KAAK,CAAC;IAClD,MAAM,GAAG,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC;IACrB,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,SAAS;QAAE,OAAO,KAAK,CAAC;IACpD,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;IACnD,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC;QAAE,OAAO,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC;IAC9C,IAAI,OAAO,GAAG,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAChE,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,cAAc,CAAC,GAAQ,EAAE,SAAiB;IACjD,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,CAAC;IAC9C,OAAO,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,WAAW,EAAE,CAAC,CAAC;AAC/C,CAAC;AAED,gFAAgF;AAEhF,MAAM,MAAM,GAAsB;IAChC,+CAA+C;IAC/C;QACE,EAAE,EAAE,sBAAsB;QAC1B,IAAI,EAAE,sBAAsB;QAC5B,QAAQ,EAAE,cAAc;QACxB,IAAI,EAAE,SAAS;QACf,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,oBAAoB;gBAC1B,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,sBAAsB,EAAE,CAAC;gBACpD,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,oBAAoB,EAAE,QAAQ,CAAC;oBACjD,MAAM,EAAE,QAAQ,CAAC,CAAC,EAAE,oBAAoB,CAAC,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,CAAC,4BAA4B;iBACnG,CAAC;aACH;YACD;gBACE,IAAI,EAAE,sBAAsB;gBAC5B,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;gBAClC,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,UAAU,EAAE,aAAa,EAAE,eAAe,CAAC;oBAC7D,MAAM,EAAE,QAAQ,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,gCAAgC,CAAC,CAAC,CAAC,4BAA4B;iBAClG,CAAC;aACH;YACD;gBACE,IAAI,EAAE,0BAA0B;gBAChC,SAAS,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;oBACrB,UAAU,EAAE,cAAc;oBAC1B,QAAQ,EAAE,CAAC;oBACX,KAAK,EAAE,gHAAgH;iBACxH,CAAC;gBACF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,0EAA0E;oBAC1E,0FAA0F;oBAC1F,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,CAAC,IAAI,QAAQ,CAAC,CAAC,EAAE,aAAa,EAAE,aAAa,CAAC;oBACjF,MAAM,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,wBAAwB,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,CAAC,sBAAsB;iBACnI,CAAC;aACH;YACD;gBACE,IAAI,EAAE,cAAc;gBACpB,SAAS,EAAE,CAAC,KAAK,EAAE,EAAE;oBACnB,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,eAAe,EAAE,gBAAgB,EAAE,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,cAAc,CAAC;oBAC9H,OAAO,EAAE,MAAM,EAAE,iBAAiB,MAAM,EAAE,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;gBAC3F,CAAC;gBACD,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,UAAU,EAAE,SAAS,EAAE,QAAQ,CAAC;oBAClD,MAAM,EAAE,gBAAgB;iBACzB,CAAC;aACH;YACD;gBACE,IAAI,EAAE,iBAAiB;gBACvB,SAAS,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;oBACrB,KAAK,EAAE,gCAAgC;oBACvC,WAAW,EAAE,aAAa,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,MAAM,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,cAAc,EAAE,MAAM,IAAI,CAAC,EAAE;oBAClG,QAAQ,EAAE,eAAe;iBAC1B,CAAC;gBACF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,aAAa,EAAE,SAAS,EAAE,OAAO,CAAC;oBACpD,MAAM,EAAE,mBAAmB;iBAC5B,CAAC;aACH;SACF;QACD,eAAe,EAAE,CAAC,GAAG,EAAE,EAAE;YACvB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,IAAI,IAAI,GAAG,IAAI,CAAC;YAChB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,kBAAkB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAAC,CAAC;YAC9G,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAAC,CAAC;YACrF,4CAA4C;YAC5C,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAAC,CAAC;iBAC/F,CAAC;gBACJ,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC;gBACpD,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,eAAe,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;oBAAE,OAAO,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;gBACtH,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;oBAAE,OAAO,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;YACrI,CAAC;YACD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,CAAC,IAAI,CAAC,mCAAmC,CAAC,CAAC;YAC5E,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;QAC3B,CAAC;KACF;IAED,iCAAiC;IACjC;QACE,EAAE,EAAE,gBAAgB;QACpB,IAAI,EAAE,uCAAuC;QAC7C,QAAQ,EAAE,YAAY;QACtB,IAAI,EAAE,SAAS;QACf,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,oBAAoB;gBAC1B,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,uCAAuC,EAAE,CAAC;gBACrE,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,QAAQ,CAAC,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAAC;aAC7E;YACD;gBACE,IAAI,EAAE,sBAAsB;gBAC5B,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;gBAClC,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,UAAU,CAAC,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,aAAa,EAAE,CAAC;aACnH;YACD;gBACE,IAAI,EAAE,0BAA0B;gBAChC,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;oBAChB,UAAU,EAAE,gBAAgB;oBAC5B,QAAQ,EAAE,CAAC;oBACX,KAAK,EAAE,uHAAuH;iBAC/H,CAAC;gBACF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa,EAAE,WAAW,CAAC;oBAChE,MAAM,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,+BAA+B,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,CAAC,WAAW;iBAClI,CAAC;aACH;YACD;gBACE,IAAI,EAAE,cAAc;gBACpB,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,+CAA+C,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;gBAC1H,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAAC;aAC5D;SACF;QACD,eAAe,EAAE,CAAC,GAAG,EAAE,EAAE;YACvB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,IAAI,IAAI,GAAG,IAAI,CAAC;YAChB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAAC,CAAC;YACpG,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;YAC1E,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;QAC3B,CAAC;KACF;IAED,mCAAmC;IACnC;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,yBAAyB;QAC/B,QAAQ,EAAE,kBAAkB;QAC5B,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,oBAAoB;gBAC1B,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,sCAAsC,EAAE,CAAC;gBACpE,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,QAAQ,CAAC,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAAC;aAC7E;YACD;gBACE,IAAI,EAAE,sBAAsB;gBAC5B,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC;gBACnC,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,UAAU,EAAE,aAAa,EAAE,eAAe,CAAC,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,gBAAgB,EAAE,CAAC;aAC9J;YACD;gBACE,IAAI,EAAE,0BAA0B;gBAChC,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;oBAChB,UAAU,EAAE,kBAAkB;oBAC9B,QAAQ,EAAE,EAAE;oBACZ,KAAK,EAAE,+HAA+H;iBACvI,CAAC;gBACF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa,EAAE,aAAa,CAAC;oBAClE,MAAM,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,WAAW;iBACvH,CAAC;aACH;YACD;gBACE,IAAI,EAAE,cAAc;gBACpB,SAAS,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;oBACrB,MAAM,EAAE,4BAA4B,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,MAAM,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,MAAM,IAAI,CAAC,WAAW;oBAClH,QAAQ,EAAE,UAAU;oBACpB,MAAM,EAAE,UAAU;iBACnB,CAAC;gBACF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;aACrD;SACF;QACD,eAAe,EAAE,CAAC,GAAG,EAAE,EAAE;YACvB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,IAAI,IAAI,GAAG,IAAI,CAAC;YAChB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC;gBAC7C,OAAO,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAC1D,CAAC;YACD,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAAC,CAAC;YACzF,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;YAChF,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;QAC3B,CAAC;KACF;IAED,yCAAyC;IACzC;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,sCAAsC;QAC5C,QAAQ,EAAE,kBAAkB;QAC5B,IAAI,EAAE,YAAY;QAClB,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,oBAAoB;gBAC1B,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,iCAAiC,EAAE,CAAC;gBAC/D,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,QAAQ,CAAC,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAAC;aAC7E;YACD;gBACE,IAAI,EAAE,gBAAgB;gBACtB,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,wCAAwC,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;gBAChF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,SAAS,IAAI,OAAO,CAAC,KAAK,QAAQ;oBAC5D,MAAM,EAAE,oBAAoB;iBAC7B,CAAC;aACH;YACD;gBACE,IAAI,EAAE,0BAA0B;gBAChC,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;oBAChB,UAAU,EAAE,kBAAkB;oBAC9B,QAAQ,EAAE,CAAC;oBACX,KAAK,EAAE,gHAAgH;iBACxH,CAAC;gBACF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa,CAAC;oBACnD,MAAM,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,+BAA+B,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,CAAC,WAAW;iBAClI,CAAC;aACH;YACD;gBACE,IAAI,EAAE,cAAc;gBACpB,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,+BAA+B,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;gBAC3G,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;aACrD;YACD;gBACE,IAAI,EAAE,iBAAiB;gBACvB,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;oBAChB,KAAK,EAAE,yCAAyC;oBAChD,WAAW,EAAE,yEAAyE;oBACtF,QAAQ,EAAE,UAAU;iBACrB,CAAC;gBACF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,mBAAmB,EAAE,CAAC;aAC/D;SACF;QACD,eAAe,EAAE,CAAC,GAAG,EAAE,EAAE;YACvB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,IAAI,IAAI,GAAG,IAAI,CAAC;YAChB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAAC,CAAC;YACpG,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;YAC1E,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;QAC3B,CAAC;KACF;IAED,6CAA6C;IAC7C;QACE,EAAE,EAAE,uBAAuB;QAC3B,IAAI,EAAE,kCAAkC;QACxC,QAAQ,EAAE,gBAAgB;QAC1B,IAAI,EAAE,QAAQ;QACd,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,oBAAoB;gBAC1B,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,qCAAqC,EAAE,CAAC;gBACnE,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,QAAQ,CAAC,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAAC;aAC7E;YACD;gBACE,IAAI,EAAE,WAAW;gBACjB,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,WAAW,EAAE,KAAK,EAAE,4CAA4C,EAAE,CAAC;gBAC/F,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,SAAS,IAAI,OAAO,CAAC,KAAK,QAAQ;oBAC5D,MAAM,EAAE,gBAAgB;iBACzB,CAAC;aACH;YACD;gBACE,IAAI,EAAE,0BAA0B;gBAChC,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;oBAChB,UAAU,EAAE,kBAAkB;oBAC9B,QAAQ,EAAE,CAAC;oBACX,KAAK,EAAE,+GAA+G;iBACvH,CAAC;gBACF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa,CAAC;oBACnD,MAAM,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,2BAA2B,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,WAAW;iBAC1H,CAAC;aACH;YACD;gBACE,IAAI,EAAE,cAAc;gBACpB,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,kCAAkC,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,EAAE,aAAa,EAAE,CAAC;gBAC9G,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;aACrD;SACF;QACD,eAAe,EAAE,CAAC,GAAG,EAAE,EAAE;YACvB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,IAAI,IAAI,GAAG,IAAI,CAAC;YAChB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAAC,CAAC;YAChG,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;YACtE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;QAC3B,CAAC;KACF;IAED,4CAA4C;IAC5C;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,iCAAiC;QACvC,QAAQ,EAAE,gBAAgB;QAC1B,IAAI,EAAE,SAAS;QACf,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,oBAAoB;gBAC1B,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,8BAA8B,EAAE,CAAC;gBAC5D,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,QAAQ,CAAC,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAAC;aAC7E;YACD;gBACE,IAAI,EAAE,0BAA0B;gBAChC,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;oBAChB,UAAU,EAAE,kBAAkB;oBAC9B,QAAQ,EAAE,CAAC;oBACX,KAAK,EAAE,iIAAiI;iBACzI,CAAC;gBACF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa,CAAC;oBACnD,MAAM,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC,CAAC,WAAW;iBACzH,CAAC;aACH;YACD;gBACE,IAAI,EAAE,cAAc;gBACpB,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,6CAA6C,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;gBACtH,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;aACrD;SACF;QACD,eAAe,EAAE,CAAC,GAAG,EAAE,EAAE;YACvB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,IAAI,IAAI,GAAG,IAAI,CAAC;YAChB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAAC,CAAC;YAC/F,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,CAAC,IAAI,CAAC,8BAA8B,CAAC,CAAC;YACvE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;QAC3B,CAAC;KACF;IAED,6BAA6B;IAC7B;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,0BAA0B;QAChC,QAAQ,EAAE,cAAc;QACxB,IAAI,EAAE,SAAS;QACf,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,iBAAiB;gBACvB,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC;gBACrB,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,SAAS,IAAI,OAAO,CAAC,KAAK,QAAQ;oBAC5D,MAAM,EAAE,cAAc,CAAC,CAAC,IAAI,EAAE,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,iBAAiB;iBACjF,CAAC;aACH;YACD;gBACE,IAAI,EAAE,gBAAgB;gBACtB,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,uCAAuC,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC;gBAChF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,SAAS,IAAI,OAAO,CAAC,KAAK,QAAQ;oBAC5D,MAAM,EAAE,oBAAoB;iBAC7B,CAAC;aACH;YACD;gBACE,IAAI,EAAE,oBAAoB;gBAC1B,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,QAAQ,EAAE,0BAA0B,EAAE,CAAC;gBAC3D,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,SAAS,IAAI,OAAO,CAAC,KAAK,QAAQ;oBAC5D,MAAM,EAAE,kBAAkB;iBAC3B,CAAC;aACH;SACF;QACD,eAAe,EAAE,CAAC,GAAG,EAAE,EAAE;YACvB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,IAAI,IAAI,GAAG,IAAI,CAAC;YAChB,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,SAAS,EAAE,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAAC,CAAC;YAClG,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,SAAS,EAAE,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAAC,CAAC;YACtG,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,SAAS,EAAE,CAAC;gBAAC,OAAO,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAAC,CAAC;YACtG,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;YACxE,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;QAC3B,CAAC;KACF;IAED,kCAAkC;IAClC;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,+BAA+B;QACrC,QAAQ,EAAE,kBAAkB;QAC5B,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,oBAAoB;gBAC1B,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,gCAAgC,EAAE,CAAC;gBAC9D,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,QAAQ,CAAC,EAAE,MAAM,EAAE,gBAAgB,EAAE,CAAC;aAC7E;YACD;gBACE,IAAI,EAAE,qBAAqB;gBAC3B,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;gBAClC,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,SAAS,IAAI,OAAO,CAAC,KAAK,QAAQ;oBAC5D,MAAM,EAAE,gBAAgB;iBACzB,CAAC;aACH;YACD;gBACE,IAAI,EAAE,0BAA0B;gBAChC,SAAS,EAAE,GAAG,EAAE,CAAC,CAAC;oBAChB,UAAU,EAAE,kBAAkB;oBAC9B,QAAQ,EAAE,CAAC;oBACX,KAAK,EAAE,oHAAoH;iBAC5H,CAAC;gBACF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;oBAChB,IAAI,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,aAAa,EAAE,aAAa,CAAC;oBAClE,MAAM,EAAE,QAAQ,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,2BAA2B,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC,kBAAkB,CAAC,CAAC,CAAC,WAAW;iBAC7H,CAAC;aACH;YACD;gBACE,IAAI,EAAE,cAAc;gBACpB,SAAS,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;oBACrB,MAAM,EAAE,kBAAkB,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,MAAM,IAAI,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,MAAM,IAAI,CAAC,WAAW;oBACxG,QAAQ,EAAE,UAAU;oBACpB,MAAM,EAAE,UAAU;iBACnB,CAAC;gBACF,QAAQ,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;aACrD;SACF;QACD,eAAe,EAAE,CAAC,GAAG,EAAE,EAAE;YACvB,MAAM,OAAO,GAAa,EAAE,CAAC;YAC7B,IAAI,IAAI,GAAG,IAAI,CAAC;YAChB,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,WAAW,EAAE,CAAC;gBAC7C,OAAO,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;gBAAC,IAAI,GAAG,KAAK,CAAC;YAC9D,CAAC;YACD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,CAAC,IAAI,CAAC,oCAAoC,CAAC,CAAC;YAC7E,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;QAC3B,CAAC;KACF;CACF,CAAC;AAEF,gFAAgF;AAEhF,KAAK,UAAU,QAAQ,CAAC,KAAsB,EAAE,KAAgB;IAC9D,MAAM,WAAW,GAAsB,EAAE,CAAC;IAC1C,MAAM,UAAU,GAAwB,EAAE,CAAC;IAC3C,IAAI,WAAW,GAAG,KAAK,CAAC;IACxB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAE7B,IAAI,WAAW,EAAE,CAAC;YAChB,WAAW,CAAC,IAAI,CAAC;gBACf,SAAS,EAAE,CAAC;gBACZ,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,IAAI;gBACZ,UAAU,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,4BAA4B,EAAE;gBACjE,SAAS,EAAE,CAAC;aACb,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,IAAI,CAAC,IAAI,CAAC,CAAC;QACnD,IAAI,CAAC,IAAI,EAAE,CAAC;YACV,WAAW,CAAC,IAAI,CAAC;gBACf,SAAS,EAAE,CAAC;gBACZ,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,IAAI;gBACZ,UAAU,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,mBAAmB,IAAI,CAAC,IAAI,EAAE,EAAE;gBACnE,SAAS,EAAE,CAAC;gBACZ,KAAK,EAAE,mBAAmB,IAAI,CAAC,IAAI,EAAE;aACtC,CAAC,CAAC;YACH,WAAW,GAAG,IAAI,CAAC;YACnB,SAAS;QACX,CAAC;QAED,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC;YACxC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YACxC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACzC,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAEzC,UAAU,CAAC,CAAC,CAAC,GAAG,MAAM,CAAC;YACvB,WAAW,CAAC,IAAI,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,SAAS,EAAE,CAAC,CAAC;YAEjG,0DAA0D;YAC1D,IAAI,CAAC,UAAU,CAAC,IAAI,IAAI,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACnD,gFAAgF;gBAChF,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;oBACpC,WAAW,GAAG,IAAI,CAAC;gBACrB,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,GAAQ,EAAE,CAAC;YAClB,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACzC,WAAW,CAAC,IAAI,CAAC;gBACf,SAAS,EAAE,CAAC;gBACZ,IAAI,EAAE,IAAI,CAAC,IAAI;gBACf,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC;gBAChC,MAAM,EAAE,IAAI;gBACZ,UAAU,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,UAAU,GAAG,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,EAAE;gBAC3E,SAAS;gBACT,KAAK,EAAE,GAAG,CAAC,OAAO;aACnB,CAAC,CAAC;YACH,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBACpC,WAAW,GAAG,IAAI,CAAC;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,eAAe,GAAG,KAAK,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;IAC1D,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,UAAU,CAAC;IAC/C,MAAM,cAAc,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC;IAEzE,OAAO;QACL,OAAO,EAAE,KAAK,CAAC,EAAE;QACjB,SAAS,EAAE,KAAK,CAAC,IAAI;QACrB,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,KAAK,EAAE,WAAW;QAClB,eAAe;QACf,cAAc;QACd,cAAc;QACd,UAAU,EAAE,KAAK,CAAC,KAAK,CAAC,MAAM;QAC9B,WAAW;QACX,WAAW,EAAE,eAAe,CAAC,IAAI,IAAI,CAAC,WAAW;KAClD,CAAC;AACJ,CAAC;AAED,gFAAgF;AAEhF,SAAS,kBAAkB,CAAC,MAAmB;IAC7C,IAAI,CAAC;QACH,MAAM,EAAE,GAAG,KAAK,EAAE,CAAC;QACnB,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;;;MAcN,CAAC,CAAC;QAEJ,EAAE,CAAC,OAAO,CAAC,0MAA0M,CAAC,CAAC,GAAG,CACxN,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,SAAS,EAAE,MAAM,CAAC,QAAQ,EAAE,MAAM,CAAC,IAAI,EAC9D,MAAM,CAAC,cAAc,EAAE,MAAM,CAAC,UAAU,EAAE,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EACpE,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,cAAc,EACjD,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,eAAe,CAAC,OAAO,CAAC,EAC9C,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACpC,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,SAAS,EAAE,CAAC,CAAC,SAAS,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK;SAC3G,CAAC,CAAC,CAAC,CACL,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC,CAAC,yBAAyB,CAAC,CAAC;AACvC,CAAC;AAED,gFAAgF;AAEhF,KAAK,UAAU,IAAI;IACjB,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACnC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IAC7D,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,MAAM,CAAC;IAEtF,OAAO,CAAC,GAAG,CAAC,8BAA8B,SAAS,yBAAyB,CAAC,CAAC;IAE9E,gDAAgD;IAChD,MAAM,KAAK,GAAG,MAAM,yBAAyB,EAAE,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,mBAAmB,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;IAE/C,MAAM,OAAO,GAAkB,EAAE,CAAC;IAElC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5D,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACxB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,SAAS,KAAK,KAAK,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC,MAAM,aAAa,CAAC,CAAC;QAElG,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAC5C,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACrB,kBAAkB,CAAC,MAAM,CAAC,CAAC;QAE3B,MAAM,MAAM,GAAG,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC1F,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,KAAK,MAAM,CAAC,cAAc,IAAI,MAAM,CAAC,UAAU,WAAW,MAAM,CAAC,cAAc,KAAK,CAAC,CAAC;QAE3G,kCAAkC;QAClC,IAAI,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC;YACxB,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,EAAE,CAAC;gBAChC,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC;gBAClD,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,IAAI,IAAI,CAAC,IAAI,KAAK,IAAI,CAAC,UAAU,CAAC,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC,SAAS,KAAK,CAAC,CAAC;YAC/I,CAAC;YACD,KAAK,MAAM,MAAM,IAAI,MAAM,CAAC,eAAe,CAAC,OAAO,EAAE,CAAC;gBACpD,OAAO,CAAC,GAAG,CAAC,SAAS,MAAM,EAAE,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;IACH,CAAC;IAED,UAAU;IACV,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC;IACzD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC;IACzD,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,CAAC;IACjE,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,cAAc,EAAE,CAAC,CAAC,CAAC;IACzE,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,cAAc,EAAE,CAAC,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAElG,OAAO,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;IAC/D,OAAO,CAAC,GAAG,CAAC,uBAAuB,MAAM,IAAI,OAAO,CAAC,MAAM,KAAK,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;IAC/G,OAAO,CAAC,GAAG,CAAC,uBAAuB,cAAc,IAAI,UAAU,KAAK,IAAI,CAAC,KAAK,CAAC,cAAc,GAAG,UAAU,GAAG,GAAG,CAAC,IAAI,CAAC,CAAC;IACvH,OAAO,CAAC,GAAG,CAAC,uBAAuB,MAAM,EAAE,CAAC,CAAC;IAC7C,OAAO,CAAC,GAAG,CAAC,uBAAuB,UAAU,IAAI,CAAC,CAAC;IACnD,OAAO,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;IAE/D,0BAA0B;IAC1B,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;IAC3B,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,MAAM,IAAI,GAAG,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;QACvC,OAAO,CAAC,GAAG,CAAC,OAAO,IAAI,IAAI,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC,UAAU,WAAW,CAAC,CAAC,cAAc,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACnI,CAAC;IACD,OAAO,CAAC,GAAG,EAAE,CAAC;IAEd,OAAO,CAAC,IAAI,CAAC,MAAM,KAAK,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;AAClD,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,GAAG,OAAO,CAAC,KAAK,CAAC,mBAAmB,EAAE,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC"}
|
|
@@ -84,6 +84,7 @@ export interface RegressionItem {
|
|
|
84
84
|
export declare function generateQueryCorpus(): EvalQuery[];
|
|
85
85
|
export declare function detectRegressions(currentRunId: string, baselineRunId: string): RegressionItem[];
|
|
86
86
|
export declare function detectImprovements(currentRunId: string, baselineRunId: string): RegressionItem[];
|
|
87
|
+
export type Surface = "mcp" | "app";
|
|
87
88
|
export interface RunOptions {
|
|
88
89
|
queryLimit: number;
|
|
89
90
|
persona?: Persona;
|
|
@@ -93,6 +94,8 @@ export interface RunOptions {
|
|
|
93
94
|
dryRun?: boolean;
|
|
94
95
|
/** If true, run self-improving flywheel loop: eval → diagnose → grow → re-eval */
|
|
95
96
|
flywheel?: boolean;
|
|
97
|
+
/** Which surface to test: "mcp" (tool handlers) or "app" (web /search endpoint). Default: "mcp" */
|
|
98
|
+
surface?: Surface;
|
|
96
99
|
}
|
|
97
100
|
export declare function runLlmJudgeEval(options: RunOptions): Promise<RunSummary>;
|
|
98
101
|
export type FailureRootCause = "tool_not_found" | "tool_error" | "empty_output" | "criteria_mismatch" | "heuristic_too_strict";
|
|
@@ -851,7 +851,13 @@ async function executeQueryTools(query, allTools) {
|
|
|
851
851
|
}
|
|
852
852
|
}
|
|
853
853
|
}
|
|
854
|
-
// 3. Execute each expected tool
|
|
854
|
+
// 3. Execute each expected tool as a CHAIN — output from tool N feeds into tool N+1
|
|
855
|
+
// This simulates real agent usage where context accumulates across steps.
|
|
856
|
+
const chainContext = {};
|
|
857
|
+
if (webResults.length > 0) {
|
|
858
|
+
chainContext.webResults = webResults;
|
|
859
|
+
chainContext.webSnippets = webResults.map(r => `${r.title}: ${r.snippet}`).join("\n");
|
|
860
|
+
}
|
|
855
861
|
for (const toolName of effectiveTools) {
|
|
856
862
|
if (toolName === "discover_tools")
|
|
857
863
|
continue; // already called
|
|
@@ -862,18 +868,61 @@ async function executeQueryTools(query, allTools) {
|
|
|
862
868
|
}
|
|
863
869
|
const tool = findTool(allTools, toolName);
|
|
864
870
|
if (tool) {
|
|
865
|
-
// Build
|
|
871
|
+
// Build args from BOTH static patterns AND accumulated chain context
|
|
866
872
|
const args = buildMinimalArgs(toolName, query);
|
|
873
|
+
// ── Chain injection: pass prior tool outputs as context ──
|
|
874
|
+
// founder_local_synthesize consumes gather + web results
|
|
867
875
|
if (toolName === "founder_local_synthesize") {
|
|
868
|
-
if (webResults
|
|
869
|
-
args.webResults = webResults;
|
|
876
|
+
if (chainContext.webResults)
|
|
877
|
+
args.webResults = chainContext.webResults;
|
|
870
878
|
args.lens = query.persona;
|
|
879
|
+
// If gather ran before, pass its output as context
|
|
880
|
+
if (chainContext.gatherOutput)
|
|
881
|
+
args.priorContext = chainContext.gatherOutput;
|
|
882
|
+
if (chainContext.reconOutput)
|
|
883
|
+
args.reconFindings = chainContext.reconOutput;
|
|
884
|
+
}
|
|
885
|
+
// enrich_recon consumes run_recon output
|
|
886
|
+
if (toolName === "enrich_recon" && chainContext.reconOutput) {
|
|
887
|
+
args.findings = chainContext.reconOutput;
|
|
888
|
+
}
|
|
889
|
+
// export_artifact_packet consumes synthesize output
|
|
890
|
+
if (toolName === "export_artifact_packet" && chainContext.synthesizeOutput) {
|
|
891
|
+
args.packet = chainContext.synthesizeOutput;
|
|
892
|
+
}
|
|
893
|
+
// render_decision_memo consumes gather + synthesize
|
|
894
|
+
if (toolName === "render_decision_memo") {
|
|
895
|
+
if (chainContext.gatherOutput)
|
|
896
|
+
args.context = chainContext.gatherOutput;
|
|
897
|
+
if (chainContext.synthesizeOutput)
|
|
898
|
+
args.packet = chainContext.synthesizeOutput;
|
|
899
|
+
}
|
|
900
|
+
// detect_contradictions consumes web or gather output
|
|
901
|
+
if (toolName === "detect_contradictions" && chainContext.webSnippets) {
|
|
902
|
+
args.context = chainContext.webSnippets;
|
|
871
903
|
}
|
|
872
904
|
const result = await callTool(tool, args);
|
|
873
905
|
totalMs += result.ms;
|
|
874
906
|
if (result.ok) {
|
|
875
907
|
toolsFired.push(toolName);
|
|
876
|
-
|
|
908
|
+
const extracted = extractText(result.result);
|
|
909
|
+
outputs[toolName] = extracted;
|
|
910
|
+
// ── Store output in chain context for downstream tools ──
|
|
911
|
+
if (toolName === "founder_local_gather" || toolName === "founder_deep_context_gather") {
|
|
912
|
+
chainContext.gatherOutput = result.result;
|
|
913
|
+
}
|
|
914
|
+
if (toolName === "run_recon" || toolName === "enrich_recon") {
|
|
915
|
+
chainContext.reconOutput = result.result;
|
|
916
|
+
}
|
|
917
|
+
if (toolName === "founder_local_synthesize") {
|
|
918
|
+
chainContext.synthesizeOutput = result.result;
|
|
919
|
+
}
|
|
920
|
+
if (toolName === "web_search") {
|
|
921
|
+
chainContext.webResults = result.result?.results ?? [];
|
|
922
|
+
}
|
|
923
|
+
if (toolName === "founder_local_weekly_reset") {
|
|
924
|
+
chainContext.weeklyOutput = result.result;
|
|
925
|
+
}
|
|
877
926
|
}
|
|
878
927
|
else {
|
|
879
928
|
// Check if this is a "needs seed data" error — retry once after seeding
|
|
@@ -888,14 +937,25 @@ async function executeQueryTools(query, allTools) {
|
|
|
888
937
|
if (seedResult.ok && !toolsFired.includes("founder_deep_context_gather")) {
|
|
889
938
|
toolsFired.push("founder_deep_context_gather");
|
|
890
939
|
outputs["founder_deep_context_gather"] = extractText(seedResult.result);
|
|
940
|
+
chainContext.gatherOutput = seedResult.result;
|
|
891
941
|
}
|
|
892
942
|
}
|
|
893
|
-
// Retry the original tool
|
|
894
|
-
const
|
|
943
|
+
// Retry the original tool with chain context
|
|
944
|
+
const retryArgs = buildMinimalArgs(toolName, query);
|
|
945
|
+
if (toolName === "founder_local_synthesize" && chainContext.gatherOutput) {
|
|
946
|
+
retryArgs.priorContext = chainContext.gatherOutput;
|
|
947
|
+
if (chainContext.webResults)
|
|
948
|
+
retryArgs.webResults = chainContext.webResults;
|
|
949
|
+
retryArgs.lens = query.persona;
|
|
950
|
+
}
|
|
951
|
+
const retry = await callTool(tool, retryArgs);
|
|
895
952
|
totalMs += retry.ms;
|
|
896
953
|
if (retry.ok) {
|
|
897
954
|
toolsFired.push(toolName);
|
|
898
955
|
outputs[toolName] = extractText(retry.result);
|
|
956
|
+
// Store in chain context even on retry
|
|
957
|
+
if (toolName === "founder_local_synthesize")
|
|
958
|
+
chainContext.synthesizeOutput = retry.result;
|
|
899
959
|
}
|
|
900
960
|
else {
|
|
901
961
|
toolsFired.push(toolName);
|
|
@@ -1248,6 +1308,29 @@ function heuristicJudge(query, toolOutputs) {
|
|
|
1248
1308
|
evidence += pass ? `${nonEmptyOutputCount} tools produced output` : `only ${nonEmptyOutputCount} tool(s) produced output`;
|
|
1249
1309
|
return { criterion: bc.criterion, pass, evidence };
|
|
1250
1310
|
}
|
|
1311
|
+
// ── "Chain coherence — downstream output references upstream data" ──
|
|
1312
|
+
if (criterion.includes("chain coherence") || criterion.includes("downstream references upstream") || criterion.includes("output references prior")) {
|
|
1313
|
+
// Check if the LAST tool's output contains entities/keywords from FIRST tool's output
|
|
1314
|
+
const toolKeys = Object.keys(toolOutputs).filter(k => toolOutputs[k] && toolOutputs[k] !== "(null)");
|
|
1315
|
+
if (toolKeys.length >= 2) {
|
|
1316
|
+
const firstOutput = toolOutputs[toolKeys[0]].toLowerCase();
|
|
1317
|
+
const lastOutput = toolOutputs[toolKeys[toolKeys.length - 1]].toLowerCase();
|
|
1318
|
+
// Extract significant words from first output
|
|
1319
|
+
const firstWords = firstOutput.split(/\s+/)
|
|
1320
|
+
.filter((w) => w.length > 4 && !STOPWORDS.has(w))
|
|
1321
|
+
.slice(0, 20);
|
|
1322
|
+
const sharedWords = firstWords.filter((w) => lastOutput.includes(w));
|
|
1323
|
+
pass = sharedWords.length >= 2; // at least 2 shared significant words
|
|
1324
|
+
evidence += pass
|
|
1325
|
+
? `${sharedWords.length} shared terms across chain: ${sharedWords.slice(0, 5).join(", ")}`
|
|
1326
|
+
: `only ${sharedWords.length} shared terms — chain may be disconnected`;
|
|
1327
|
+
}
|
|
1328
|
+
else {
|
|
1329
|
+
pass = false;
|
|
1330
|
+
evidence += "fewer than 2 tools produced output";
|
|
1331
|
+
}
|
|
1332
|
+
return { criterion: bc.criterion, pass, evidence };
|
|
1333
|
+
}
|
|
1251
1334
|
// ── Negation patterns: "does not hallucinate/fabricate/invent" ──
|
|
1252
1335
|
if (criterion.includes("not hallucinate") || criterion.includes("not fabricate") || criterion.includes("not invent") || criterion.includes("does not")) {
|
|
1253
1336
|
pass = combined.length > 0 && combined.length < 50000;
|
|
@@ -1592,13 +1675,195 @@ function printReport(summary, regressions, improvements, scenarioFlags) {
|
|
|
1592
1675
|
}
|
|
1593
1676
|
console.log("");
|
|
1594
1677
|
}
|
|
1678
|
+
// ══════════════════════════════════════════════════════════════════════════════
|
|
1679
|
+
// APP SURFACE — /search endpoint validation
|
|
1680
|
+
// ══════════════════════════════════════════════════════════════════════════════
|
|
1681
|
+
const APP_SEARCH_BASE_URL = "http://localhost:5191/search";
|
|
1682
|
+
/** Map persona → lens name used by the /search endpoint */
|
|
1683
|
+
function personaToLens(persona) {
|
|
1684
|
+
switch (persona) {
|
|
1685
|
+
case "founder": return "founder";
|
|
1686
|
+
case "banker": return "banker";
|
|
1687
|
+
case "investor": return "investor";
|
|
1688
|
+
case "researcher": return "researcher";
|
|
1689
|
+
case "student": return "student";
|
|
1690
|
+
case "operator": return "operator";
|
|
1691
|
+
case "legal": return "legal";
|
|
1692
|
+
case "ceo": return "ceo";
|
|
1693
|
+
case "pm": return "pm";
|
|
1694
|
+
case "contractor": return "contractor";
|
|
1695
|
+
case "content": return "content";
|
|
1696
|
+
default: return "founder";
|
|
1697
|
+
}
|
|
1698
|
+
}
|
|
1699
|
+
/** App-specific boolean criteria for judging /search responses */
|
|
1700
|
+
const APP_CRITERIA = [
|
|
1701
|
+
{ criterion: "Response contains a substantive answer (not just errors or empty)", weight: 2 },
|
|
1702
|
+
{ criterion: "Response includes entity or topic names from the query", weight: 1 },
|
|
1703
|
+
{ criterion: "Response includes structured signals or data points", weight: 1 },
|
|
1704
|
+
{ criterion: "Response does not contain error messages or stack traces", weight: 2 },
|
|
1705
|
+
{ criterion: "Response includes source citations or trace steps", weight: 1 },
|
|
1706
|
+
];
|
|
1707
|
+
/** 20 app-specific queries across personas for /search endpoint testing */
|
|
1708
|
+
function generateAppQueryCorpus() {
|
|
1709
|
+
return [
|
|
1710
|
+
// founder (4)
|
|
1711
|
+
{ id: "app_founder_valuation", query: "What is Anthropic's current valuation and revenue?", persona: "founder", scenario: "company_search", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1712
|
+
{ id: "app_founder_weekly", query: "Weekly reset for my startup", persona: "founder", scenario: "weekly_reset", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1713
|
+
{ id: "app_founder_delegation", query: "Prepare a delegation packet for the engineering lead on auth refactor", persona: "founder", scenario: "delegation", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1714
|
+
{ id: "app_founder_changes", query: "What changed in the AI developer tools market this week?", persona: "founder", scenario: "important_change", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1715
|
+
// banker (3)
|
|
1716
|
+
{ id: "app_banker_diligence", query: "Diligence memo on Series B fintech startup", persona: "banker", scenario: "company_search", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1717
|
+
{ id: "app_banker_risk", query: "Top 5 risks for a $50M fintech lending platform", persona: "banker", scenario: "company_search", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1718
|
+
{ id: "app_banker_compare", query: "Compare Stripe vs Adyen payment infrastructure for enterprise", persona: "banker", scenario: "competitor_brief", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1719
|
+
// investor (3)
|
|
1720
|
+
{ id: "app_investor_landscape", query: "AI infrastructure competitive landscape 2026", persona: "investor", scenario: "competitor_brief", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1721
|
+
{ id: "app_investor_shopify", query: "Compare Shopify vs Amazon AI commerce strategy", persona: "investor", scenario: "competitor_brief", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1722
|
+
{ id: "app_investor_portfolio", query: "What changed for Databricks, Snowflake, and Confluent this quarter?", persona: "investor", scenario: "important_change", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1723
|
+
// researcher (3)
|
|
1724
|
+
{ id: "app_researcher_mcp", query: "MCP protocol adoption trends and ecosystem growth", persona: "researcher", scenario: "company_search", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1725
|
+
{ id: "app_researcher_agents", query: "State of autonomous AI agents in enterprise workflows", persona: "researcher", scenario: "company_search", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1726
|
+
{ id: "app_researcher_openai", query: "OpenAI's latest model releases and API changes", persona: "researcher", scenario: "company_search", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1727
|
+
// student (2)
|
|
1728
|
+
{ id: "app_student_explain", query: "Explain how retrieval augmented generation works for my thesis", persona: "student", scenario: "company_search", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1729
|
+
{ id: "app_student_career", query: "What skills should I learn for an AI engineering career in 2026?", persona: "student", scenario: "company_search", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1730
|
+
// operator (2)
|
|
1731
|
+
{ id: "app_operator_incident", query: "Generate an incident response checklist for API gateway outage", persona: "operator", scenario: "delegation", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1732
|
+
{ id: "app_operator_cost", query: "Cloud cost optimization strategies for GPU compute workloads", persona: "operator", scenario: "company_search", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1733
|
+
// ceo (1)
|
|
1734
|
+
{ id: "app_ceo_board", query: "Prepare board meeting talking points on Q1 2026 product milestones", persona: "ceo", scenario: "memo_export", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1735
|
+
// legal (1)
|
|
1736
|
+
{ id: "app_legal_compliance", query: "AI regulation compliance requirements for enterprise SaaS in the EU", persona: "legal", scenario: "company_search", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1737
|
+
// pm (1)
|
|
1738
|
+
{ id: "app_pm_roadmap", query: "Competitive feature analysis for developer tools roadmap planning", persona: "pm", scenario: "competitor_brief", expectedTools: [], forbiddenTools: [], booleanCriteria: APP_CRITERIA },
|
|
1739
|
+
];
|
|
1740
|
+
}
|
|
1741
|
+
/** Check if the /search endpoint is reachable */
|
|
1742
|
+
async function checkAppEndpoint() {
|
|
1743
|
+
try {
|
|
1744
|
+
const resp = await fetch(APP_SEARCH_BASE_URL.replace("/search", "/health"), {
|
|
1745
|
+
signal: AbortSignal.timeout(3_000),
|
|
1746
|
+
});
|
|
1747
|
+
return resp.ok;
|
|
1748
|
+
}
|
|
1749
|
+
catch {
|
|
1750
|
+
// Health endpoint may not exist — try a lightweight POST to /search
|
|
1751
|
+
try {
|
|
1752
|
+
const resp = await fetch(APP_SEARCH_BASE_URL, {
|
|
1753
|
+
method: "POST",
|
|
1754
|
+
headers: { "Content-Type": "application/json" },
|
|
1755
|
+
body: JSON.stringify({ query: "ping" }),
|
|
1756
|
+
signal: AbortSignal.timeout(5_000),
|
|
1757
|
+
});
|
|
1758
|
+
return resp.status !== 0; // Any HTTP response means server is up
|
|
1759
|
+
}
|
|
1760
|
+
catch {
|
|
1761
|
+
return false;
|
|
1762
|
+
}
|
|
1763
|
+
}
|
|
1764
|
+
}
|
|
1765
|
+
/** Execute a query against the /search web endpoint, returning outputs for the judge */
|
|
1766
|
+
async function executeAppQuery(query) {
|
|
1767
|
+
const startMs = Date.now();
|
|
1768
|
+
const lens = personaToLens(query.persona);
|
|
1769
|
+
try {
|
|
1770
|
+
const resp = await fetch(APP_SEARCH_BASE_URL, {
|
|
1771
|
+
method: "POST",
|
|
1772
|
+
headers: { "Content-Type": "application/json" },
|
|
1773
|
+
body: JSON.stringify({ query: query.query, lens }),
|
|
1774
|
+
signal: AbortSignal.timeout(30_000),
|
|
1775
|
+
});
|
|
1776
|
+
const totalMs = Date.now() - startMs;
|
|
1777
|
+
if (!resp.ok) {
|
|
1778
|
+
const errText = await resp.text().catch(() => "");
|
|
1779
|
+
return {
|
|
1780
|
+
toolsFired: ["search_endpoint"],
|
|
1781
|
+
outputs: { search_endpoint: `HTTP ${resp.status}: ${errText.slice(0, 500)}` },
|
|
1782
|
+
totalMs,
|
|
1783
|
+
skipped: [],
|
|
1784
|
+
};
|
|
1785
|
+
}
|
|
1786
|
+
const data = await resp.json();
|
|
1787
|
+
// Extract structured fields from the response
|
|
1788
|
+
const parts = [];
|
|
1789
|
+
// answer / result
|
|
1790
|
+
const result = data.result;
|
|
1791
|
+
if (result) {
|
|
1792
|
+
// canonicalEntity
|
|
1793
|
+
const entity = result.canonicalEntity;
|
|
1794
|
+
if (entity) {
|
|
1795
|
+
parts.push(`Entity: ${entity.name ?? "unknown"}`);
|
|
1796
|
+
if (entity.canonicalMission)
|
|
1797
|
+
parts.push(`Mission: ${String(entity.canonicalMission).slice(0, 300)}`);
|
|
1798
|
+
}
|
|
1799
|
+
// signals
|
|
1800
|
+
const signals = result.signals;
|
|
1801
|
+
if (Array.isArray(signals) && signals.length > 0) {
|
|
1802
|
+
parts.push(`Signals: ${signals.map(s => s.name ?? s.direction ?? "").join("; ")}`);
|
|
1803
|
+
}
|
|
1804
|
+
// whatChanged
|
|
1805
|
+
const whatChanged = result.whatChanged;
|
|
1806
|
+
if (Array.isArray(whatChanged) && whatChanged.length > 0) {
|
|
1807
|
+
parts.push(`Changes: ${whatChanged.map(w => w.description ?? "").join("; ")}`);
|
|
1808
|
+
}
|
|
1809
|
+
// contradictions
|
|
1810
|
+
const contradictions = result.contradictions;
|
|
1811
|
+
if (Array.isArray(contradictions) && contradictions.length > 0) {
|
|
1812
|
+
parts.push(`Contradictions: ${contradictions.map(c => c.claim ?? "").join("; ")}`);
|
|
1813
|
+
}
|
|
1814
|
+
// nextActions
|
|
1815
|
+
const nextActions = result.nextActions;
|
|
1816
|
+
if (Array.isArray(nextActions) && nextActions.length > 0) {
|
|
1817
|
+
parts.push(`Next actions: ${nextActions.map(a => a.action ?? "").join("; ")}`);
|
|
1818
|
+
}
|
|
1819
|
+
// entityProfile (from web enrichment)
|
|
1820
|
+
const entityProfile = result.entityProfile;
|
|
1821
|
+
if (entityProfile) {
|
|
1822
|
+
parts.push(`Profile: ${JSON.stringify(entityProfile).slice(0, 500)}`);
|
|
1823
|
+
}
|
|
1824
|
+
}
|
|
1825
|
+
// trace steps
|
|
1826
|
+
const trace = data.trace;
|
|
1827
|
+
if (Array.isArray(trace) && trace.length > 0) {
|
|
1828
|
+
parts.push(`Trace: ${trace.map(t => `${t.step}(${t.tool ?? ""}) ${t.status}`).join(" → ")}`);
|
|
1829
|
+
}
|
|
1830
|
+
// classification & lens
|
|
1831
|
+
if (data.classification)
|
|
1832
|
+
parts.push(`Classification: ${data.classification}`);
|
|
1833
|
+
if (data.lens)
|
|
1834
|
+
parts.push(`Lens: ${data.lens}`);
|
|
1835
|
+
// judge verdict (if present)
|
|
1836
|
+
const judge = data.judge;
|
|
1837
|
+
if (judge) {
|
|
1838
|
+
parts.push(`Judge: ${JSON.stringify(judge).slice(0, 300)}`);
|
|
1839
|
+
}
|
|
1840
|
+
const outputText = parts.length > 0 ? parts.join("\n") : JSON.stringify(data).slice(0, 2000);
|
|
1841
|
+
return {
|
|
1842
|
+
toolsFired: ["search_endpoint"],
|
|
1843
|
+
outputs: { search_endpoint: outputText },
|
|
1844
|
+
totalMs,
|
|
1845
|
+
skipped: [],
|
|
1846
|
+
};
|
|
1847
|
+
}
|
|
1848
|
+
catch (err) {
|
|
1849
|
+
const totalMs = Date.now() - startMs;
|
|
1850
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
1851
|
+
return {
|
|
1852
|
+
toolsFired: ["search_endpoint"],
|
|
1853
|
+
outputs: { search_endpoint: `Connection error: ${message}` },
|
|
1854
|
+
totalMs,
|
|
1855
|
+
skipped: [],
|
|
1856
|
+
};
|
|
1857
|
+
}
|
|
1858
|
+
}
|
|
1595
1859
|
export async function runLlmJudgeEval(options) {
|
|
1860
|
+
const surface = options.surface ?? "mcp";
|
|
1596
1861
|
// 1. Wire up DB and seed realistic test data
|
|
1597
1862
|
_setDbAccessor(getDb);
|
|
1598
1863
|
ensureSchema();
|
|
1599
1864
|
seedTestData();
|
|
1600
1865
|
// 2. Generate corpus and filter
|
|
1601
|
-
let corpus = generateQueryCorpus();
|
|
1866
|
+
let corpus = surface === "app" ? generateAppQueryCorpus() : generateQueryCorpus();
|
|
1602
1867
|
if (options.persona) {
|
|
1603
1868
|
corpus = corpus.filter((q) => q.persona === options.persona);
|
|
1604
1869
|
}
|
|
@@ -1614,6 +1879,31 @@ export async function runLlmJudgeEval(options) {
|
|
|
1614
1879
|
.map((x) => x.q)
|
|
1615
1880
|
.slice(0, options.queryLimit);
|
|
1616
1881
|
}
|
|
1882
|
+
// 3b. For app surface, verify the endpoint is reachable before running
|
|
1883
|
+
if (surface === "app") {
|
|
1884
|
+
console.log(`[llmJudgeEval] Checking app endpoint at ${APP_SEARCH_BASE_URL}...`);
|
|
1885
|
+
const reachable = await checkAppEndpoint();
|
|
1886
|
+
if (!reachable) {
|
|
1887
|
+
console.error(`\n[ERROR] App endpoint not reachable at ${APP_SEARCH_BASE_URL}`);
|
|
1888
|
+
console.error(` Make sure the web app is running: npm run dev (or npx vite --port 5191)`);
|
|
1889
|
+
console.error(` Then re-run with: npx tsx src/benchmarks/llmJudgeEval.ts --surface app\n`);
|
|
1890
|
+
cleanupTestData();
|
|
1891
|
+
return {
|
|
1892
|
+
runId: "app-unreachable",
|
|
1893
|
+
timestamp: new Date().toISOString(),
|
|
1894
|
+
queryCount: 0,
|
|
1895
|
+
passRate: 0,
|
|
1896
|
+
avgToolPrecision: 0,
|
|
1897
|
+
avgToolRecall: 0,
|
|
1898
|
+
totalForbiddenViolations: 0,
|
|
1899
|
+
avgCriteriaPassRate: 0,
|
|
1900
|
+
byPersona: {},
|
|
1901
|
+
byScenario: {},
|
|
1902
|
+
byCriterion: {},
|
|
1903
|
+
};
|
|
1904
|
+
}
|
|
1905
|
+
console.log(`[llmJudgeEval] App endpoint reachable.\n`);
|
|
1906
|
+
}
|
|
1617
1907
|
if (options.dryRun) {
|
|
1618
1908
|
console.log(`[DRY RUN] Corpus: ${corpus.length} queries`);
|
|
1619
1909
|
const personaCounts = {};
|
|
@@ -1638,25 +1928,31 @@ export async function runLlmJudgeEval(options) {
|
|
|
1638
1928
|
byCriterion: {},
|
|
1639
1929
|
};
|
|
1640
1930
|
}
|
|
1641
|
-
// 4. Load all tools
|
|
1642
|
-
|
|
1643
|
-
|
|
1644
|
-
|
|
1931
|
+
// 4. Load all tools (skip for app surface — we POST to the endpoint instead)
|
|
1932
|
+
let allTools = [];
|
|
1933
|
+
if (surface === "mcp") {
|
|
1934
|
+
console.log("[llmJudgeEval] Loading all toolsets...");
|
|
1935
|
+
allTools = await loadToolsets(ALL_DOMAIN_KEYS);
|
|
1936
|
+
console.log(`[llmJudgeEval] Loaded ${allTools.length} tools across ${ALL_DOMAIN_KEYS.length} domains`);
|
|
1937
|
+
}
|
|
1645
1938
|
// 5. Run eval
|
|
1646
1939
|
const runId = genId("ljeval");
|
|
1647
1940
|
const results = [];
|
|
1648
|
-
|
|
1941
|
+
const surfaceTag = `[surface:${surface}]`;
|
|
1942
|
+
console.log(`[llmJudgeEval] ${surfaceTag} Running ${corpus.length} queries (run: ${runId})...\n`);
|
|
1649
1943
|
for (let i = 0; i < corpus.length; i++) {
|
|
1650
1944
|
const query = corpus[i];
|
|
1651
1945
|
const progress = `[${i + 1}/${corpus.length}]`;
|
|
1652
|
-
// Execute
|
|
1653
|
-
const execution =
|
|
1946
|
+
// Execute — branch on surface
|
|
1947
|
+
const execution = surface === "app"
|
|
1948
|
+
? await executeAppQuery(query)
|
|
1949
|
+
: await executeQueryTools(query, allTools);
|
|
1654
1950
|
// Judge
|
|
1655
1951
|
const { response: judgeResult, judgeType } = await callGeminiJudge(query, execution.outputs);
|
|
1656
1952
|
// Compute metrics
|
|
1657
|
-
const toolPrecision = computeToolPrecision(query.expectedTools, execution.toolsFired);
|
|
1658
|
-
const toolRecall = computeToolRecall(query.expectedTools, execution.toolsFired);
|
|
1659
|
-
const forbiddenViolations = countForbiddenViolations(query.forbiddenTools, execution.toolsFired);
|
|
1953
|
+
const toolPrecision = surface === "app" ? 1 : computeToolPrecision(query.expectedTools, execution.toolsFired);
|
|
1954
|
+
const toolRecall = surface === "app" ? 1 : computeToolRecall(query.expectedTools, execution.toolsFired);
|
|
1955
|
+
const forbiddenViolations = surface === "app" ? 0 : countForbiddenViolations(query.forbiddenTools, execution.toolsFired);
|
|
1660
1956
|
const criteriaPassRate = computeCriteriaPassRate(judgeResult.criteria, query.booleanCriteria);
|
|
1661
1957
|
// Pass if weighted criteria pass rate >= 60% AND no forbidden tool violations
|
|
1662
1958
|
const overallPass = criteriaPassRate >= 0.60 && forbiddenViolations === 0;
|
|
@@ -1675,10 +1971,10 @@ export async function runLlmJudgeEval(options) {
|
|
|
1675
1971
|
results.push(qr);
|
|
1676
1972
|
saveResult(runId, qr);
|
|
1677
1973
|
const status = overallPass ? "PASS" : "FAIL";
|
|
1678
|
-
process.stdout.write(`${progress} [judge:${judgeType}] ${query.id} ${status} (precision=${toolPrecision.toFixed(2)}, criteria=${criteriaPassRate.toFixed(2)}) ${execution.totalMs}ms\n`);
|
|
1974
|
+
process.stdout.write(`${progress} ${surfaceTag} [judge:${judgeType}] ${query.id} ${status} (precision=${toolPrecision.toFixed(2)}, criteria=${criteriaPassRate.toFixed(2)}) ${execution.totalMs}ms\n`);
|
|
1679
1975
|
}
|
|
1680
|
-
// 6. Build summary
|
|
1681
|
-
const fullCorpus = generateQueryCorpus();
|
|
1976
|
+
// 6. Build summary — use the correct corpus for the surface
|
|
1977
|
+
const fullCorpus = surface === "app" ? generateAppQueryCorpus() : generateQueryCorpus();
|
|
1682
1978
|
const summary = buildSummary(runId, results, fullCorpus);
|
|
1683
1979
|
saveRun(runId, results.length, summary.passRate, options.persona, options.scenario, summary);
|
|
1684
1980
|
// 7. Regression detection
|
|
@@ -1986,6 +2282,9 @@ function parseArgs(argv) {
|
|
|
1986
2282
|
case "--flywheel":
|
|
1987
2283
|
options.flywheel = true;
|
|
1988
2284
|
break;
|
|
2285
|
+
case "--surface":
|
|
2286
|
+
options.surface = argv[++i];
|
|
2287
|
+
break;
|
|
1989
2288
|
default:
|
|
1990
2289
|
if (arg.startsWith("--")) {
|
|
1991
2290
|
console.error(`Unknown flag: ${arg}`);
|
|
@@ -2026,6 +2325,7 @@ async function main() {
|
|
|
2026
2325
|
const options = parseArgs(process.argv.slice(2));
|
|
2027
2326
|
console.log("NodeBench LLM Judge Eval Harness");
|
|
2028
2327
|
console.log("================================");
|
|
2328
|
+
console.log(` Surface: ${options.surface ?? "mcp"}`);
|
|
2029
2329
|
console.log(` Queries: ${options.queryLimit}`);
|
|
2030
2330
|
console.log(` Persona: ${options.persona ?? "all"}`);
|
|
2031
2331
|
console.log(` Scenario: ${options.scenario ?? "all"}`);
|