promptfoo 0.103.13 → 0.103.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +19 -18
- package/dist/src/app/assets/{index-CNZfEf8i.js → index-CejUHECf.js} +276 -275
- package/dist/src/app/assets/{index.es-vrsSPryr.js → index.es-DWMnMsuU.js} +1 -1
- package/dist/src/app/assets/{sync-B_Y0n4tJ.js → sync-OIzapIKF.js} +1 -1
- package/dist/src/app/index.html +1 -1
- package/dist/src/assertions/refusal.d.ts.map +1 -1
- package/dist/src/assertions/refusal.js +14 -3
- package/dist/src/assertions/refusal.js.map +1 -1
- package/dist/src/fetch.d.ts +2 -0
- package/dist/src/fetch.d.ts.map +1 -1
- package/dist/src/fetch.js +2 -1
- package/dist/src/fetch.js.map +1 -1
- package/dist/src/index.d.ts +1 -0
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/providers/adaline.gateway.d.ts +13 -1
- package/dist/src/providers/adaline.gateway.d.ts.map +1 -1
- package/dist/src/providers/adaline.gateway.js +4 -2
- package/dist/src/providers/adaline.gateway.js.map +1 -1
- package/dist/src/providers/azure.d.ts.map +1 -1
- package/dist/src/providers/azure.js +22 -5
- package/dist/src/providers/azure.js.map +1 -1
- package/dist/src/providers/cloudera.d.ts +15 -0
- package/dist/src/providers/cloudera.d.ts.map +1 -0
- package/dist/src/providers/cloudera.js +22 -0
- package/dist/src/providers/cloudera.js.map +1 -0
- package/dist/src/providers/http.d.ts +5 -3
- package/dist/src/providers/http.d.ts.map +1 -1
- package/dist/src/providers/http.js +7 -6
- package/dist/src/providers/http.js.map +1 -1
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +7 -0
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/portkey.d.ts +3 -2
- package/dist/src/providers/portkey.d.ts.map +1 -1
- package/dist/src/providers/portkey.js +1 -0
- package/dist/src/providers/portkey.js.map +1 -1
- package/dist/src/providers/watsonx.d.ts +2 -2
- package/dist/src/providers.d.ts +2 -0
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +36 -3
- package/dist/src/providers.js.map +1 -1
- package/dist/src/redteam/commands/generate.d.ts.map +1 -1
- package/dist/src/redteam/commands/generate.js +31 -10
- package/dist/src/redteam/commands/generate.js.map +1 -1
- package/dist/src/redteam/commands/run.d.ts.map +1 -1
- package/dist/src/redteam/commands/run.js +1 -1
- package/dist/src/redteam/commands/run.js.map +1 -1
- package/dist/src/redteam/graders.d.ts.map +1 -1
- package/dist/src/redteam/graders.js +13 -13
- package/dist/src/redteam/graders.js.map +1 -1
- package/dist/src/redteam/index.d.ts +1 -0
- package/dist/src/redteam/index.d.ts.map +1 -1
- package/dist/src/redteam/index.js +1 -1
- package/dist/src/redteam/index.js.map +1 -1
- package/dist/src/redteam/plugins/harmful/graders.d.ts +52 -0
- package/dist/src/redteam/plugins/harmful/graders.d.ts.map +1 -1
- package/dist/src/redteam/plugins/harmful/graders.js +577 -22
- package/dist/src/redteam/plugins/harmful/graders.js.map +1 -1
- package/dist/src/redteam/providers/crescendo/index.d.ts +2 -1
- package/dist/src/redteam/providers/crescendo/index.d.ts.map +1 -1
- package/dist/src/redteam/providers/crescendo/index.js +5 -5
- package/dist/src/redteam/providers/crescendo/index.js.map +1 -1
- package/dist/src/redteam/providers/goat.d.ts +2 -1
- package/dist/src/redteam/providers/goat.d.ts.map +1 -1
- package/dist/src/redteam/providers/goat.js +5 -5
- package/dist/src/redteam/providers/goat.js.map +1 -1
- package/dist/src/redteam/providers/shared.js +1 -1
- package/dist/src/redteam/providers/shared.js.map +1 -1
- package/dist/src/redteam/sharedFrontend.d.ts.map +1 -1
- package/dist/src/redteam/sharedFrontend.js +18 -2
- package/dist/src/redteam/sharedFrontend.js.map +1 -1
- package/dist/src/redteam/types.d.ts +1 -0
- package/dist/src/redteam/types.d.ts.map +1 -1
- package/dist/src/redteam/util.d.ts.map +1 -1
- package/dist/src/redteam/util.js +2 -0
- package/dist/src/redteam/util.js.map +1 -1
- package/dist/src/server/server.d.ts.map +1 -1
- package/dist/src/server/server.js +1 -7
- package/dist/src/server/server.js.map +1 -1
- package/dist/src/validators/redteam.d.ts +3 -0
- package/dist/src/validators/redteam.d.ts.map +1 -1
- package/dist/src/validators/redteam.js +2 -0
- package/dist/src/validators/redteam.js.map +1 -1
- package/dist/test/assertions/answerRelevance.test.d.ts +2 -0
- package/dist/test/assertions/answerRelevance.test.d.ts.map +1 -0
- package/dist/test/assertions/answerRelevance.test.js +177 -0
- package/dist/test/assertions/answerRelevance.test.js.map +1 -0
- package/dist/test/assertions/contextFaithfulness.test.d.ts +2 -0
- package/dist/test/assertions/contextFaithfulness.test.d.ts.map +1 -0
- package/dist/test/assertions/contextFaithfulness.test.js +226 -0
- package/dist/test/assertions/contextFaithfulness.test.js.map +1 -0
- package/dist/test/assertions/contextRecall.test.d.ts +2 -0
- package/dist/test/assertions/contextRecall.test.d.ts.map +1 -0
- package/dist/test/assertions/contextRecall.test.js +243 -0
- package/dist/test/assertions/contextRecall.test.js.map +1 -0
- package/dist/test/assertions/contextRelevance.test.d.ts +2 -0
- package/dist/test/assertions/contextRelevance.test.d.ts.map +1 -0
- package/dist/test/assertions/contextRelevance.test.js +238 -0
- package/dist/test/assertions/contextRelevance.test.js.map +1 -0
- package/dist/test/assertions/geval.test.d.ts +2 -0
- package/dist/test/assertions/geval.test.d.ts.map +1 -0
- package/dist/test/assertions/geval.test.js +222 -0
- package/dist/test/assertions/geval.test.js.map +1 -0
- package/dist/test/assertions/modelGradedClosedQa.test.d.ts +2 -0
- package/dist/test/assertions/modelGradedClosedQa.test.d.ts.map +1 -0
- package/dist/test/assertions/modelGradedClosedQa.test.js +200 -0
- package/dist/test/assertions/modelGradedClosedQa.test.js.map +1 -0
- package/dist/test/assertions/refusal.test.d.ts +2 -0
- package/dist/test/assertions/refusal.test.d.ts.map +1 -0
- package/dist/test/assertions/refusal.test.js +300 -0
- package/dist/test/assertions/refusal.test.js.map +1 -0
- package/dist/test/fetch.test.js +66 -18
- package/dist/test/fetch.test.js.map +1 -1
- package/dist/test/providers/adaline.gateway.test.d.ts +2 -0
- package/dist/test/providers/adaline.gateway.test.d.ts.map +1 -0
- package/dist/test/providers/adaline.gateway.test.js +245 -0
- package/dist/test/providers/adaline.gateway.test.js.map +1 -0
- package/dist/test/providers/azure.test.js +73 -12
- package/dist/test/providers/azure.test.js.map +1 -1
- package/dist/test/providers/http.test.js +70 -2
- package/dist/test/providers/http.test.js.map +1 -1
- package/dist/test/providers/index.test.js +35 -459
- package/dist/test/providers/index.test.js.map +1 -1
- package/dist/test/providers/openai.test.js +509 -0
- package/dist/test/providers/openai.test.js.map +1 -1
- package/dist/test/providers/portkey.test.js +57 -0
- package/dist/test/providers/portkey.test.js.map +1 -1
- package/dist/test/providers/simulatedUser.test.js +1 -0
- package/dist/test/providers/simulatedUser.test.js.map +1 -1
- package/dist/test/providers.test.js +10 -0
- package/dist/test/providers.test.js.map +1 -1
- package/dist/test/redteam/commands/generate.test.js +7 -0
- package/dist/test/redteam/commands/generate.test.js.map +1 -1
- package/dist/test/redteam/extraction/purpose.test.js +1 -0
- package/dist/test/redteam/extraction/purpose.test.js.map +1 -1
- package/dist/test/redteam/plugins/asciiSmuggling.test.d.ts +2 -0
- package/dist/test/redteam/plugins/asciiSmuggling.test.d.ts.map +1 -0
- package/dist/test/redteam/plugins/asciiSmuggling.test.js +75 -0
- package/dist/test/redteam/plugins/asciiSmuggling.test.js.map +1 -0
- package/dist/test/redteam/providers/crescendo/index.test.js +3 -3
- package/dist/test/redteam/providers/crescendo/index.test.js.map +1 -1
- package/dist/test/redteam/strategies/index.test.js +1 -0
- package/dist/test/redteam/strategies/index.test.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +19 -18
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"contextRecall.test.js","sourceRoot":"","sources":["../../../test/assertions/contextRecall.test.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,sEAAyE;AACzE,6DAA+C;AAG/C,IAAI,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;AAEhC,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,MAAM,wBAAwB,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,sBAAsB,CAAC,CAAC;IAE9E,UAAU,CAAC,GAAG,EAAE;QACd,IAAI,CAAC,aAAa,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kDAAkD,EAAE,KAAK,IAAI,EAAE;QAChE,MAAM,UAAU,GAAG;YACjB,IAAI,EAAE,IAAI;YACV,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,aAAa;SACtB,CAAC;QACF,wBAAwB,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;QAEvD,MAAM,YAAY,GAAgB;YAChC,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;YACzB,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;SACnB,CAAC;QAEF,MAAM,MAAM,GAAoB;YAC9B,SAAS,EAAE,EAAE,IAAI,EAAE,gBAAgB,EAAE,SAAS,EAAE,GAAG,EAAE;YACrD,aAAa,EAAE,aAAa;YAC5B,MAAM,EAAE,cAAc;YACtB,IAAI,EAAE;gBACJ,IAAI,EAAE,EAAE;gBACR,OAAO,EAAE,EAAE;aACZ;YACD,QAAQ,EAAE,gBAAgB;YAC1B,OAAO,EAAE;gBACP,MAAM,EAAE,cAAc;gBACtB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE,EAAE;oBACR,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE,YAAY;gBACtB,gBAAgB,EAAE,SAAS;aAC5B;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,aAAa;YACrB,YAAY,EAAE,aAAa;YAC3B,QAAQ,EAAE,YAAY;YACtB,gBAAgB,EAAE,EAAsB;SACzC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,IAAA,mCAAmB,EAAC,MAAM,CAAC,CAAC;QAEjD,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;YACrB,SAAS,EAAE,EAAE,IAAI,EAAE,gBAAgB,EAAE,SAAS,EAAE,GAAG,EAAE;YACrD,GAAG,UAAU;SACd,CAAC,CAAC;QACH,MAAM,CAAC,wBAAwB,CAAC,CAAC,oBAAoB,CACnD,cAAc,EACd,aAAa,EACb,GAAG,EACH,EAAE,EACF,EAAE,CACH,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gDAAgD,EAAE,KAAK,IAAI,EAAE;QAC9D,MAAM,UAAU,GAAG;YACjB,IAAI,EAAE,IAAI;YACV,KAAK,EAAE,IAAI;YACX,MAAM,EAAE,uBAAuB;SAChC,CAAC;QACF,wBAAwB,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;QAEvD,MAAM,YAAY,GAAgB;YAChC,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;YACzB,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;SACnB,CAAC;QAEF,MAAM,MAAM,GAAoB;YAC9B,SAAS,EAAE,EAAE,IAAI,EAAE,gBAAgB,EAAE,SAAS,EAAE,GAAG,EAAE;YACrD,aAAa,EAAE,aAAa;YAC5B,MAAM,EAAE,kBAAkB;YAC1B,IAAI,EAAE;gBACJ,IAAI,EAAE,EAAE,OAAO,EAAE,mBAAmB,EAAE;gBACtC,OAAO,EAAE,EAAE;aACZ;YACD,QAAQ,EAAE,gBAAgB;YAC1B,OAAO,EAAE;gBACP,MAAM,EAAE,kBAAkB;gBAC1B,IAAI,EAAE,EAAE,OAAO,EAAE,mBAAmB,EAAE;gBACtC,IAAI,EAAE;oBACJ,IAAI,EAAE,EAAE,OAAO,EAAE,mBAAmB,EAAE;oBACtC,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE,YAAY;gBACtB,gBAAgB,EAAE,SAAS;aAC5B;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,aAAa;YACrB,YAAY,EAAE,aAAa;YAC3B,QAAQ,EAAE,YAAY;YACtB,gBAAgB,EAAE,EAAsB;SACzC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,IAAA,mCAAmB,EAAC,MAAM,CAAC,CAAC;QAEjD,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;YACrB,SAAS,EAAE,EAAE,IAAI,EAAE,gBAAgB,EAAE,SAAS,EAAE,GAAG,EAAE;YACrD,GAAG,UAAU;SACd,CAAC,CAAC;QACH,MAAM,CAAC,wBAAwB,CAAC,CAAC,oBAAoB,CACnD,mBAAmB,EACnB,aAAa,EACb,GAAG,EACH,EAAE,EACF,EAAE,OAAO,EAAE,mBAAmB,EAAE,CACjC,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qDAAqD,EAAE,KAAK,IAAI,EAAE;QACnE,MAAM,UAAU,GAAG;YACjB,IAAI,EAAE,IAAI;YACV,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,wBAAwB;SACjC,CAAC;QACF,wBAAwB,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;QAEvD,MAAM,YAAY,GAAgB;YAChC,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;YACzB,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;SACnB,CAAC;QAEF,MAAM,MAAM,GAAoB;YAC9B,SAAS,EAAE,EAAE,IAAI,EAAE,gBAAgB,EAAE;YACrC,aAAa,EAAE,aAAa;YAC5B,MAAM,EAAE,cAAc;YACtB,IAAI,EAAE;gBACJ,IAAI,EAAE,EAAE;gBACR,OAAO,EAAE,EAAE;aACZ;YACD,QAAQ,EAAE,gBAAgB;YAC1B,OAAO,EAAE;gBACP,MAAM,EAAE,cAAc;gBACtB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE,EAAE;oBACR,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE,YAAY;gBACtB,gBAAgB,EAAE,SAAS;aAC5B;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,aAAa;YACrB,YAAY,EAAE,aAAa;YAC3B,QAAQ,EAAE,YAAY;YACtB,gBAAgB,EAAE,EAAsB;SACzC,CAAC;QAEF,MAAM,MAAM,GAAG,MAAM,IAAA,mCAAmB,EAAC,MAAM,CAAC,CAAC;QAEjD,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;YACrB,SAAS,EAAE,EAAE,IAAI,EAAE,gBAAgB,EAAE;YACrC,GAAG,UAAU;SACd,CAAC,CAAC;QACH,MAAM,CAAC,wBAAwB,CAAC,CAAC,oBAAoB,CAAC,cAAc,EAAE,aAAa,EAAE,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC;IAClG,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,KAAK,IAAI,EAAE;QACrE,MAAM,YAAY,GAAgB;YAChC,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;YACzB,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;SACnB,CAAC;QAEF,MAAM,MAAM,GAAoB;YAC9B,SAAS,EAAE,EAAE,IAAI,EAAE,gBAAgB,EAAE;YACrC,aAAa,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE,iDAAiD;YAChF,MAAM,EAAE,cAAc;YACtB,IAAI,EAAE;gBACJ,IAAI,EAAE,EAAE;gBACR,OAAO,EAAE,EAAE;aACZ;YACD,QAAQ,EAAE,gBAAgB;YAC1B,OAAO,EAAE;gBACP,MAAM,EAAE,cAAc;gBACtB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE,EAAE;oBACR,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE,YAAY;gBACtB,gBAAgB,EAAE,SAAS;aAC5B;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,KAAK;YACb,YAAY,EAAE,KAAK;YACnB,QAAQ,EAAE,YAAY;YACtB,gBAAgB,EAAE,EAAsB;SACzC,CAAC;QAEF,MAAM,MAAM,CAAC,IAAA,mCAAmB,EAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CACvD,wDAAwD,CACzD,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;QACzD,MAAM,YAAY,GAAgB;YAChC,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;YACzB,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;SACnB,CAAC;QAEF,MAAM,MAAM,GAAoB;YAC9B,SAAS,EAAE,EAAE,IAAI,EAAE,gBAAgB,EAAE;YACrC,aAAa,EAAE,aAAa;YAC5B,MAAM,EAAE,SAAS;YACjB,IAAI,EAAE;gBACJ,IAAI,EAAE,EAAE;gBACR,OAAO,EAAE,EAAE;aACZ;YACD,QAAQ,EAAE,gBAAgB;YAC1B,OAAO,EAAE;gBACP,MAAM,EAAE,SAAS;gBACjB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE,EAAE;oBACR,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE,YAAY;gBACtB,gBAAgB,EAAE,SAAS;aAC5B;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,aAAa;YACrB,YAAY,EAAE,aAAa;YAC3B,QAAQ,EAAE,YAAY;YACtB,gBAAgB,EAAE,EAAsB;SACzC,CAAC;QAEF,MAAM,MAAM,CAAC,IAAA,mCAAmB,EAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CACvD,kDAAkD,CACnD,CAAC;IACJ,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"contextRelevance.test.d.ts","sourceRoot":"","sources":["../../../test/assertions/contextRelevance.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const contextRelevance_1 = require("../../src/assertions/contextRelevance");
|
|
4
|
+
const matchers_1 = require("../../src/matchers");
|
|
5
|
+
jest.mock('../../src/matchers');
|
|
6
|
+
describe('handleContextRelevance', () => {
|
|
7
|
+
beforeEach(() => {
|
|
8
|
+
jest.resetAllMocks();
|
|
9
|
+
});
|
|
10
|
+
it('should handle valid input', async () => {
|
|
11
|
+
const mockResult = {
|
|
12
|
+
pass: true,
|
|
13
|
+
score: 0.8,
|
|
14
|
+
reason: 'test reason',
|
|
15
|
+
};
|
|
16
|
+
jest.mocked(matchers_1.matchesContextRelevance).mockResolvedValue(mockResult);
|
|
17
|
+
const result = await (0, contextRelevance_1.handleContextRelevance)({
|
|
18
|
+
assertion: {
|
|
19
|
+
type: 'context-relevance',
|
|
20
|
+
threshold: 0.7,
|
|
21
|
+
},
|
|
22
|
+
test: {
|
|
23
|
+
vars: {
|
|
24
|
+
query: 'test query',
|
|
25
|
+
context: 'test context',
|
|
26
|
+
},
|
|
27
|
+
options: {},
|
|
28
|
+
},
|
|
29
|
+
baseType: 'context-relevance',
|
|
30
|
+
context: {
|
|
31
|
+
prompt: 'test prompt',
|
|
32
|
+
vars: {},
|
|
33
|
+
test: {
|
|
34
|
+
vars: {
|
|
35
|
+
query: 'test query',
|
|
36
|
+
context: 'test context',
|
|
37
|
+
},
|
|
38
|
+
options: {},
|
|
39
|
+
},
|
|
40
|
+
logProbs: undefined,
|
|
41
|
+
provider: {
|
|
42
|
+
id: () => 'test-provider',
|
|
43
|
+
config: {},
|
|
44
|
+
callApi: jest.fn(),
|
|
45
|
+
},
|
|
46
|
+
providerResponse: {
|
|
47
|
+
output: 'test output',
|
|
48
|
+
tokenUsage: {},
|
|
49
|
+
},
|
|
50
|
+
},
|
|
51
|
+
inverse: false,
|
|
52
|
+
output: 'test output',
|
|
53
|
+
outputString: 'test output',
|
|
54
|
+
providerResponse: {
|
|
55
|
+
output: 'test output',
|
|
56
|
+
tokenUsage: {},
|
|
57
|
+
},
|
|
58
|
+
});
|
|
59
|
+
expect(result).toEqual({
|
|
60
|
+
assertion: {
|
|
61
|
+
type: 'context-relevance',
|
|
62
|
+
threshold: 0.7,
|
|
63
|
+
},
|
|
64
|
+
...mockResult,
|
|
65
|
+
});
|
|
66
|
+
expect(matchers_1.matchesContextRelevance).toHaveBeenCalledWith('test query', 'test context', 0.7, {});
|
|
67
|
+
});
|
|
68
|
+
it('should throw error if vars is missing', async () => {
|
|
69
|
+
await expect((0, contextRelevance_1.handleContextRelevance)({
|
|
70
|
+
assertion: {
|
|
71
|
+
type: 'context-relevance',
|
|
72
|
+
},
|
|
73
|
+
test: {},
|
|
74
|
+
baseType: 'context-relevance',
|
|
75
|
+
context: {
|
|
76
|
+
prompt: 'test prompt',
|
|
77
|
+
vars: {},
|
|
78
|
+
test: {
|
|
79
|
+
vars: {
|
|
80
|
+
query: 'test query',
|
|
81
|
+
context: 'test context',
|
|
82
|
+
},
|
|
83
|
+
options: {},
|
|
84
|
+
},
|
|
85
|
+
logProbs: undefined,
|
|
86
|
+
provider: {
|
|
87
|
+
id: () => 'test-provider',
|
|
88
|
+
config: {},
|
|
89
|
+
callApi: jest.fn(),
|
|
90
|
+
},
|
|
91
|
+
providerResponse: {
|
|
92
|
+
output: 'test output',
|
|
93
|
+
tokenUsage: {},
|
|
94
|
+
},
|
|
95
|
+
},
|
|
96
|
+
inverse: false,
|
|
97
|
+
output: 'test output',
|
|
98
|
+
outputString: 'test output',
|
|
99
|
+
providerResponse: {
|
|
100
|
+
output: 'test output',
|
|
101
|
+
tokenUsage: {},
|
|
102
|
+
},
|
|
103
|
+
})).rejects.toThrow('context-relevance assertion type must have a vars object');
|
|
104
|
+
});
|
|
105
|
+
it('should throw error if query is missing', async () => {
|
|
106
|
+
await expect((0, contextRelevance_1.handleContextRelevance)({
|
|
107
|
+
assertion: {
|
|
108
|
+
type: 'context-relevance',
|
|
109
|
+
},
|
|
110
|
+
test: {
|
|
111
|
+
vars: {
|
|
112
|
+
context: 'test context',
|
|
113
|
+
},
|
|
114
|
+
},
|
|
115
|
+
baseType: 'context-relevance',
|
|
116
|
+
context: {
|
|
117
|
+
prompt: 'test prompt',
|
|
118
|
+
vars: {},
|
|
119
|
+
test: {
|
|
120
|
+
vars: {
|
|
121
|
+
query: 'test query',
|
|
122
|
+
context: 'test context',
|
|
123
|
+
},
|
|
124
|
+
options: {},
|
|
125
|
+
},
|
|
126
|
+
logProbs: undefined,
|
|
127
|
+
provider: {
|
|
128
|
+
id: () => 'test-provider',
|
|
129
|
+
config: {},
|
|
130
|
+
callApi: jest.fn(),
|
|
131
|
+
},
|
|
132
|
+
providerResponse: {
|
|
133
|
+
output: 'test output',
|
|
134
|
+
tokenUsage: {},
|
|
135
|
+
},
|
|
136
|
+
},
|
|
137
|
+
inverse: false,
|
|
138
|
+
output: 'test output',
|
|
139
|
+
outputString: 'test output',
|
|
140
|
+
providerResponse: {
|
|
141
|
+
output: 'test output',
|
|
142
|
+
tokenUsage: {},
|
|
143
|
+
},
|
|
144
|
+
})).rejects.toThrow('context-relevance assertion type must have a query var');
|
|
145
|
+
});
|
|
146
|
+
it('should throw error if context is missing', async () => {
|
|
147
|
+
await expect((0, contextRelevance_1.handleContextRelevance)({
|
|
148
|
+
assertion: {
|
|
149
|
+
type: 'context-relevance',
|
|
150
|
+
},
|
|
151
|
+
test: {
|
|
152
|
+
vars: {
|
|
153
|
+
query: 'test query',
|
|
154
|
+
},
|
|
155
|
+
},
|
|
156
|
+
baseType: 'context-relevance',
|
|
157
|
+
context: {
|
|
158
|
+
prompt: 'test prompt',
|
|
159
|
+
vars: {},
|
|
160
|
+
test: {
|
|
161
|
+
vars: {
|
|
162
|
+
query: 'test query',
|
|
163
|
+
context: 'test context',
|
|
164
|
+
},
|
|
165
|
+
options: {},
|
|
166
|
+
},
|
|
167
|
+
logProbs: undefined,
|
|
168
|
+
provider: {
|
|
169
|
+
id: () => 'test-provider',
|
|
170
|
+
config: {},
|
|
171
|
+
callApi: jest.fn(),
|
|
172
|
+
},
|
|
173
|
+
providerResponse: {
|
|
174
|
+
output: 'test output',
|
|
175
|
+
tokenUsage: {},
|
|
176
|
+
},
|
|
177
|
+
},
|
|
178
|
+
inverse: false,
|
|
179
|
+
output: 'test output',
|
|
180
|
+
outputString: 'test output',
|
|
181
|
+
providerResponse: {
|
|
182
|
+
output: 'test output',
|
|
183
|
+
tokenUsage: {},
|
|
184
|
+
},
|
|
185
|
+
})).rejects.toThrow('context-relevance assertion type must have a context var');
|
|
186
|
+
});
|
|
187
|
+
it('should use default threshold of 0 if not specified', async () => {
|
|
188
|
+
const mockResult = {
|
|
189
|
+
pass: true,
|
|
190
|
+
score: 0.5,
|
|
191
|
+
reason: 'test reason',
|
|
192
|
+
};
|
|
193
|
+
jest.mocked(matchers_1.matchesContextRelevance).mockResolvedValue(mockResult);
|
|
194
|
+
await (0, contextRelevance_1.handleContextRelevance)({
|
|
195
|
+
assertion: {
|
|
196
|
+
type: 'context-relevance',
|
|
197
|
+
},
|
|
198
|
+
test: {
|
|
199
|
+
vars: {
|
|
200
|
+
query: 'test query',
|
|
201
|
+
context: 'test context',
|
|
202
|
+
},
|
|
203
|
+
options: {},
|
|
204
|
+
},
|
|
205
|
+
baseType: 'context-relevance',
|
|
206
|
+
context: {
|
|
207
|
+
prompt: 'test prompt',
|
|
208
|
+
vars: {},
|
|
209
|
+
test: {
|
|
210
|
+
vars: {
|
|
211
|
+
query: 'test query',
|
|
212
|
+
context: 'test context',
|
|
213
|
+
},
|
|
214
|
+
options: {},
|
|
215
|
+
},
|
|
216
|
+
logProbs: undefined,
|
|
217
|
+
provider: {
|
|
218
|
+
id: () => 'test-provider',
|
|
219
|
+
config: {},
|
|
220
|
+
callApi: jest.fn(),
|
|
221
|
+
},
|
|
222
|
+
providerResponse: {
|
|
223
|
+
output: 'test output',
|
|
224
|
+
tokenUsage: {},
|
|
225
|
+
},
|
|
226
|
+
},
|
|
227
|
+
inverse: false,
|
|
228
|
+
output: 'test output',
|
|
229
|
+
outputString: 'test output',
|
|
230
|
+
providerResponse: {
|
|
231
|
+
output: 'test output',
|
|
232
|
+
tokenUsage: {},
|
|
233
|
+
},
|
|
234
|
+
});
|
|
235
|
+
expect(matchers_1.matchesContextRelevance).toHaveBeenCalledWith('test query', 'test context', 0, {});
|
|
236
|
+
});
|
|
237
|
+
});
|
|
238
|
+
//# sourceMappingURL=contextRelevance.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"contextRelevance.test.js","sourceRoot":"","sources":["../../../test/assertions/contextRelevance.test.ts"],"names":[],"mappings":";;AAAA,4EAA+E;AAC/E,iDAA6D;AAE7D,IAAI,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;AAEhC,QAAQ,CAAC,wBAAwB,EAAE,GAAG,EAAE;IACtC,UAAU,CAAC,GAAG,EAAE;QACd,IAAI,CAAC,aAAa,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2BAA2B,EAAE,KAAK,IAAI,EAAE;QACzC,MAAM,UAAU,GAAG;YACjB,IAAI,EAAE,IAAI;YACV,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,aAAa;SACtB,CAAC;QAEF,IAAI,CAAC,MAAM,CAAC,kCAAuB,CAAC,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;QAEnE,MAAM,MAAM,GAAG,MAAM,IAAA,yCAAsB,EAAC;YAC1C,SAAS,EAAE;gBACT,IAAI,EAAE,mBAAmB;gBACzB,SAAS,EAAE,GAAG;aACf;YACD,IAAI,EAAE;gBACJ,IAAI,EAAE;oBACJ,KAAK,EAAE,YAAY;oBACnB,OAAO,EAAE,cAAc;iBACxB;gBACD,OAAO,EAAE,EAAE;aACZ;YACD,QAAQ,EAAE,mBAAmB;YAC7B,OAAO,EAAE;gBACP,MAAM,EAAE,aAAa;gBACrB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE;wBACJ,KAAK,EAAE,YAAY;wBACnB,OAAO,EAAE,cAAc;qBACxB;oBACD,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE;oBACR,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;oBACzB,MAAM,EAAE,EAAE;oBACV,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;iBACnB;gBACD,gBAAgB,EAAE;oBAChB,MAAM,EAAE,aAAa;oBACrB,UAAU,EAAE,EAAE;iBACf;aACF;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,aAAa;YACrB,YAAY,EAAE,aAAa;YAC3B,gBAAgB,EAAE;gBAChB,MAAM,EAAE,aAAa;gBACrB,UAAU,EAAE,EAAE;aACf;SACK,CAAC,CAAC;QAEV,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;YACrB,SAAS,EAAE;gBACT,IAAI,EAAE,mBAAmB;gBACzB,SAAS,EAAE,GAAG;aACf;YACD,GAAG,UAAU;SACd,CAAC,CAAC;QAEH,MAAM,CAAC,kCAAuB,CAAC,CAAC,oBAAoB,CAAC,YAAY,EAAE,cAAc,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;IAC9F,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;QACrD,MAAM,MAAM,CACV,IAAA,yCAAsB,EAAC;YACrB,SAAS,EAAE;gBACT,IAAI,EAAE,mBAAmB;aAC1B;YACD,IAAI,EAAE,EAAE;YACR,QAAQ,EAAE,mBAAmB;YAC7B,OAAO,EAAE;gBACP,MAAM,EAAE,aAAa;gBACrB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE;wBACJ,KAAK,EAAE,YAAY;wBACnB,OAAO,EAAE,cAAc;qBACxB;oBACD,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE;oBACR,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;oBACzB,MAAM,EAAE,EAAE;oBACV,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;iBACnB;gBACD,gBAAgB,EAAE;oBAChB,MAAM,EAAE,aAAa;oBACrB,UAAU,EAAE,EAAE;iBACf;aACF;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,aAAa;YACrB,YAAY,EAAE,aAAa;YAC3B,gBAAgB,EAAE;gBAChB,MAAM,EAAE,aAAa;gBACrB,UAAU,EAAE,EAAE;aACf;SACK,CAAC,CACV,CAAC,OAAO,CAAC,OAAO,CAAC,0DAA0D,CAAC,CAAC;IAChF,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,KAAK,IAAI,EAAE;QACtD,MAAM,MAAM,CACV,IAAA,yCAAsB,EAAC;YACrB,SAAS,EAAE;gBACT,IAAI,EAAE,mBAAmB;aAC1B;YACD,IAAI,EAAE;gBACJ,IAAI,EAAE;oBACJ,OAAO,EAAE,cAAc;iBACxB;aACF;YACD,QAAQ,EAAE,mBAAmB;YAC7B,OAAO,EAAE;gBACP,MAAM,EAAE,aAAa;gBACrB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE;wBACJ,KAAK,EAAE,YAAY;wBACnB,OAAO,EAAE,cAAc;qBACxB;oBACD,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE;oBACR,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;oBACzB,MAAM,EAAE,EAAE;oBACV,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;iBACnB;gBACD,gBAAgB,EAAE;oBAChB,MAAM,EAAE,aAAa;oBACrB,UAAU,EAAE,EAAE;iBACf;aACF;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,aAAa;YACrB,YAAY,EAAE,aAAa;YAC3B,gBAAgB,EAAE;gBAChB,MAAM,EAAE,aAAa;gBACrB,UAAU,EAAE,EAAE;aACf;SACK,CAAC,CACV,CAAC,OAAO,CAAC,OAAO,CAAC,wDAAwD,CAAC,CAAC;IAC9E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,CACV,IAAA,yCAAsB,EAAC;YACrB,SAAS,EAAE;gBACT,IAAI,EAAE,mBAAmB;aAC1B;YACD,IAAI,EAAE;gBACJ,IAAI,EAAE;oBACJ,KAAK,EAAE,YAAY;iBACpB;aACF;YACD,QAAQ,EAAE,mBAAmB;YAC7B,OAAO,EAAE;gBACP,MAAM,EAAE,aAAa;gBACrB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE;wBACJ,KAAK,EAAE,YAAY;wBACnB,OAAO,EAAE,cAAc;qBACxB;oBACD,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE;oBACR,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;oBACzB,MAAM,EAAE,EAAE;oBACV,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;iBACnB;gBACD,gBAAgB,EAAE;oBAChB,MAAM,EAAE,aAAa;oBACrB,UAAU,EAAE,EAAE;iBACf;aACF;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,aAAa;YACrB,YAAY,EAAE,aAAa;YAC3B,gBAAgB,EAAE;gBAChB,MAAM,EAAE,aAAa;gBACrB,UAAU,EAAE,EAAE;aACf;SACK,CAAC,CACV,CAAC,OAAO,CAAC,OAAO,CAAC,0DAA0D,CAAC,CAAC;IAChF,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oDAAoD,EAAE,KAAK,IAAI,EAAE;QAClE,MAAM,UAAU,GAAG;YACjB,IAAI,EAAE,IAAI;YACV,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,aAAa;SACtB,CAAC;QAEF,IAAI,CAAC,MAAM,CAAC,kCAAuB,CAAC,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC;QAEnE,MAAM,IAAA,yCAAsB,EAAC;YAC3B,SAAS,EAAE;gBACT,IAAI,EAAE,mBAAmB;aAC1B;YACD,IAAI,EAAE;gBACJ,IAAI,EAAE;oBACJ,KAAK,EAAE,YAAY;oBACnB,OAAO,EAAE,cAAc;iBACxB;gBACD,OAAO,EAAE,EAAE;aACZ;YACD,QAAQ,EAAE,mBAAmB;YAC7B,OAAO,EAAE;gBACP,MAAM,EAAE,aAAa;gBACrB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE;wBACJ,KAAK,EAAE,YAAY;wBACnB,OAAO,EAAE,cAAc;qBACxB;oBACD,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE;oBACR,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;oBACzB,MAAM,EAAE,EAAE;oBACV,OAAO,EAAE,IAAI,CAAC,EAAE,EAAE;iBACnB;gBACD,gBAAgB,EAAE;oBAChB,MAAM,EAAE,aAAa;oBACrB,UAAU,EAAE,EAAE;iBACf;aACF;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,aAAa;YACrB,YAAY,EAAE,aAAa;YAC3B,gBAAgB,EAAE;gBAChB,MAAM,EAAE,aAAa;gBACrB,UAAU,EAAE,EAAE;aACf;SACK,CAAC,CAAC;QAEV,MAAM,CAAC,kCAAuB,CAAC,CAAC,oBAAoB,CAAC,YAAY,EAAE,cAAc,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;IAC5F,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"geval.test.d.ts","sourceRoot":"","sources":["../../../test/assertions/geval.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const geval_1 = require("../../src/assertions/geval");
|
|
4
|
+
const matchers_1 = require("../../src/matchers");
|
|
5
|
+
jest.mock('../../src/matchers');
|
|
6
|
+
describe('handleGEval', () => {
|
|
7
|
+
beforeEach(() => {
|
|
8
|
+
jest.resetAllMocks();
|
|
9
|
+
});
|
|
10
|
+
it('should handle string renderedValue', async () => {
|
|
11
|
+
const mockMatchesGEval = jest.mocked(matchers_1.matchesGEval);
|
|
12
|
+
mockMatchesGEval.mockResolvedValue({
|
|
13
|
+
pass: true,
|
|
14
|
+
score: 0.8,
|
|
15
|
+
reason: 'test reason',
|
|
16
|
+
});
|
|
17
|
+
const result = await (0, geval_1.handleGEval)({
|
|
18
|
+
assertion: {
|
|
19
|
+
type: 'g-eval',
|
|
20
|
+
value: 'test criteria',
|
|
21
|
+
threshold: 0.7,
|
|
22
|
+
},
|
|
23
|
+
renderedValue: 'test criteria',
|
|
24
|
+
prompt: 'test prompt',
|
|
25
|
+
outputString: 'test output',
|
|
26
|
+
test: {
|
|
27
|
+
vars: {},
|
|
28
|
+
assert: [],
|
|
29
|
+
options: {},
|
|
30
|
+
},
|
|
31
|
+
baseType: 'g-eval',
|
|
32
|
+
context: {
|
|
33
|
+
prompt: 'test prompt',
|
|
34
|
+
vars: {},
|
|
35
|
+
test: {
|
|
36
|
+
vars: {},
|
|
37
|
+
assert: [],
|
|
38
|
+
options: {},
|
|
39
|
+
},
|
|
40
|
+
logProbs: undefined,
|
|
41
|
+
provider: {
|
|
42
|
+
id: () => 'test-provider',
|
|
43
|
+
callApi: async () => ({ output: 'test' }),
|
|
44
|
+
},
|
|
45
|
+
providerResponse: {
|
|
46
|
+
output: 'test output',
|
|
47
|
+
error: undefined,
|
|
48
|
+
},
|
|
49
|
+
},
|
|
50
|
+
inverse: false,
|
|
51
|
+
output: 'test output',
|
|
52
|
+
providerResponse: {
|
|
53
|
+
output: 'test output',
|
|
54
|
+
error: undefined,
|
|
55
|
+
},
|
|
56
|
+
});
|
|
57
|
+
expect(result).toEqual({
|
|
58
|
+
assertion: {
|
|
59
|
+
type: 'g-eval',
|
|
60
|
+
value: 'test criteria',
|
|
61
|
+
threshold: 0.7,
|
|
62
|
+
},
|
|
63
|
+
pass: true,
|
|
64
|
+
score: 0.8,
|
|
65
|
+
reason: 'test reason',
|
|
66
|
+
});
|
|
67
|
+
expect(mockMatchesGEval).toHaveBeenCalledWith('test criteria', 'test prompt', 'test output', 0.7, {});
|
|
68
|
+
});
|
|
69
|
+
it('should handle array renderedValue', async () => {
|
|
70
|
+
const mockMatchesGEval = jest.mocked(matchers_1.matchesGEval);
|
|
71
|
+
mockMatchesGEval.mockResolvedValueOnce({
|
|
72
|
+
pass: true,
|
|
73
|
+
score: 0.8,
|
|
74
|
+
reason: 'test reason 1',
|
|
75
|
+
});
|
|
76
|
+
mockMatchesGEval.mockResolvedValueOnce({
|
|
77
|
+
pass: false,
|
|
78
|
+
score: 0.6,
|
|
79
|
+
reason: 'test reason 2',
|
|
80
|
+
});
|
|
81
|
+
const result = await (0, geval_1.handleGEval)({
|
|
82
|
+
assertion: {
|
|
83
|
+
type: 'g-eval',
|
|
84
|
+
value: ['criteria1', 'criteria2'],
|
|
85
|
+
threshold: 0.7,
|
|
86
|
+
},
|
|
87
|
+
renderedValue: ['criteria1', 'criteria2'],
|
|
88
|
+
prompt: 'test prompt',
|
|
89
|
+
outputString: 'test output',
|
|
90
|
+
test: {
|
|
91
|
+
vars: {},
|
|
92
|
+
assert: [],
|
|
93
|
+
options: {},
|
|
94
|
+
},
|
|
95
|
+
baseType: 'g-eval',
|
|
96
|
+
context: {
|
|
97
|
+
prompt: 'test prompt',
|
|
98
|
+
vars: {},
|
|
99
|
+
test: {
|
|
100
|
+
vars: {},
|
|
101
|
+
assert: [],
|
|
102
|
+
options: {},
|
|
103
|
+
},
|
|
104
|
+
logProbs: undefined,
|
|
105
|
+
provider: {
|
|
106
|
+
id: () => 'test-provider',
|
|
107
|
+
callApi: async () => ({ output: 'test' }),
|
|
108
|
+
},
|
|
109
|
+
providerResponse: {
|
|
110
|
+
output: 'test output',
|
|
111
|
+
error: undefined,
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
inverse: false,
|
|
115
|
+
output: 'test output',
|
|
116
|
+
providerResponse: {
|
|
117
|
+
output: 'test output',
|
|
118
|
+
error: undefined,
|
|
119
|
+
},
|
|
120
|
+
});
|
|
121
|
+
expect(result).toEqual({
|
|
122
|
+
assertion: {
|
|
123
|
+
type: 'g-eval',
|
|
124
|
+
value: ['criteria1', 'criteria2'],
|
|
125
|
+
threshold: 0.7,
|
|
126
|
+
},
|
|
127
|
+
pass: true,
|
|
128
|
+
score: 0.7,
|
|
129
|
+
reason: 'test reason 2',
|
|
130
|
+
});
|
|
131
|
+
});
|
|
132
|
+
it('should use default threshold if not provided', async () => {
|
|
133
|
+
const mockMatchesGEval = jest.mocked(matchers_1.matchesGEval);
|
|
134
|
+
mockMatchesGEval.mockResolvedValue({
|
|
135
|
+
pass: true,
|
|
136
|
+
score: 0.8,
|
|
137
|
+
reason: 'test reason',
|
|
138
|
+
});
|
|
139
|
+
await (0, geval_1.handleGEval)({
|
|
140
|
+
assertion: {
|
|
141
|
+
type: 'g-eval',
|
|
142
|
+
value: 'test criteria',
|
|
143
|
+
},
|
|
144
|
+
renderedValue: 'test criteria',
|
|
145
|
+
prompt: 'test prompt',
|
|
146
|
+
outputString: 'test output',
|
|
147
|
+
test: {
|
|
148
|
+
vars: {},
|
|
149
|
+
assert: [],
|
|
150
|
+
options: {},
|
|
151
|
+
},
|
|
152
|
+
baseType: 'g-eval',
|
|
153
|
+
context: {
|
|
154
|
+
prompt: 'test prompt',
|
|
155
|
+
vars: {},
|
|
156
|
+
test: {
|
|
157
|
+
vars: {},
|
|
158
|
+
assert: [],
|
|
159
|
+
options: {},
|
|
160
|
+
},
|
|
161
|
+
logProbs: undefined,
|
|
162
|
+
provider: {
|
|
163
|
+
id: () => 'test-provider',
|
|
164
|
+
callApi: async () => ({ output: 'test' }),
|
|
165
|
+
},
|
|
166
|
+
providerResponse: {
|
|
167
|
+
output: 'test output',
|
|
168
|
+
error: undefined,
|
|
169
|
+
},
|
|
170
|
+
},
|
|
171
|
+
inverse: false,
|
|
172
|
+
output: 'test output',
|
|
173
|
+
providerResponse: {
|
|
174
|
+
output: 'test output',
|
|
175
|
+
error: undefined,
|
|
176
|
+
},
|
|
177
|
+
});
|
|
178
|
+
expect(mockMatchesGEval).toHaveBeenCalledWith('test criteria', 'test prompt', 'test output', 0.7, {});
|
|
179
|
+
});
|
|
180
|
+
it('should throw error for invalid renderedValue type', async () => {
|
|
181
|
+
await expect((0, geval_1.handleGEval)({
|
|
182
|
+
assertion: {
|
|
183
|
+
type: 'g-eval',
|
|
184
|
+
value: 'test',
|
|
185
|
+
},
|
|
186
|
+
renderedValue: undefined,
|
|
187
|
+
prompt: 'test',
|
|
188
|
+
outputString: 'test',
|
|
189
|
+
test: {
|
|
190
|
+
vars: {},
|
|
191
|
+
assert: [],
|
|
192
|
+
options: {},
|
|
193
|
+
},
|
|
194
|
+
baseType: 'g-eval',
|
|
195
|
+
context: {
|
|
196
|
+
prompt: 'test prompt',
|
|
197
|
+
vars: {},
|
|
198
|
+
test: {
|
|
199
|
+
vars: {},
|
|
200
|
+
assert: [],
|
|
201
|
+
options: {},
|
|
202
|
+
},
|
|
203
|
+
logProbs: undefined,
|
|
204
|
+
provider: {
|
|
205
|
+
id: () => 'test-provider',
|
|
206
|
+
callApi: async () => ({ output: 'test' }),
|
|
207
|
+
},
|
|
208
|
+
providerResponse: {
|
|
209
|
+
output: 'test output',
|
|
210
|
+
error: undefined,
|
|
211
|
+
},
|
|
212
|
+
},
|
|
213
|
+
inverse: false,
|
|
214
|
+
output: 'test',
|
|
215
|
+
providerResponse: {
|
|
216
|
+
output: 'test',
|
|
217
|
+
error: undefined,
|
|
218
|
+
},
|
|
219
|
+
})).rejects.toThrow('G-Eval assertion type must have a string or array of strings value');
|
|
220
|
+
});
|
|
221
|
+
});
|
|
222
|
+
//# sourceMappingURL=geval.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"geval.test.js","sourceRoot":"","sources":["../../../test/assertions/geval.test.ts"],"names":[],"mappings":";;AAAA,sDAAyD;AACzD,iDAAkD;AAElD,IAAI,CAAC,IAAI,CAAC,oBAAoB,CAAC,CAAC;AAEhC,QAAQ,CAAC,aAAa,EAAE,GAAG,EAAE;IAC3B,UAAU,CAAC,GAAG,EAAE;QACd,IAAI,CAAC,aAAa,EAAE,CAAC;IACvB,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oCAAoC,EAAE,KAAK,IAAI,EAAE;QAClD,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,uBAAY,CAAC,CAAC;QACnD,gBAAgB,CAAC,iBAAiB,CAAC;YACjC,IAAI,EAAE,IAAI;YACV,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,aAAa;SACtB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAW,EAAC;YAC/B,SAAS,EAAE;gBACT,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,eAAe;gBACtB,SAAS,EAAE,GAAG;aACf;YACD,aAAa,EAAE,eAAe;YAC9B,MAAM,EAAE,aAAa;YACrB,YAAY,EAAE,aAAa;YAC3B,IAAI,EAAE;gBACJ,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,EAAE;gBACV,OAAO,EAAE,EAAE;aACZ;YACD,QAAQ,EAAE,QAAQ;YAClB,OAAO,EAAE;gBACP,MAAM,EAAE,aAAa;gBACrB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,EAAE;oBACV,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE;oBACR,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;oBACzB,OAAO,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;iBAC1C;gBACD,gBAAgB,EAAE;oBAChB,MAAM,EAAE,aAAa;oBACrB,KAAK,EAAE,SAAS;iBACjB;aACF;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,aAAa;YACrB,gBAAgB,EAAE;gBAChB,MAAM,EAAE,aAAa;gBACrB,KAAK,EAAE,SAAS;aACjB;SACF,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;YACrB,SAAS,EAAE;gBACT,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,eAAe;gBACtB,SAAS,EAAE,GAAG;aACf;YACD,IAAI,EAAE,IAAI;YACV,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,aAAa;SACtB,CAAC,CAAC;QAEH,MAAM,CAAC,gBAAgB,CAAC,CAAC,oBAAoB,CAC3C,eAAe,EACf,aAAa,EACb,aAAa,EACb,GAAG,EACH,EAAE,CACH,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;QACjD,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,uBAAY,CAAC,CAAC;QACnD,gBAAgB,CAAC,qBAAqB,CAAC;YACrC,IAAI,EAAE,IAAI;YACV,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,eAAe;SACxB,CAAC,CAAC;QACH,gBAAgB,CAAC,qBAAqB,CAAC;YACrC,IAAI,EAAE,KAAK;YACX,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,eAAe;SACxB,CAAC,CAAC;QAEH,MAAM,MAAM,GAAG,MAAM,IAAA,mBAAW,EAAC;YAC/B,SAAS,EAAE;gBACT,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,CAAC,WAAW,EAAE,WAAW,CAAC;gBACjC,SAAS,EAAE,GAAG;aACf;YACD,aAAa,EAAE,CAAC,WAAW,EAAE,WAAW,CAAC;YACzC,MAAM,EAAE,aAAa;YACrB,YAAY,EAAE,aAAa;YAC3B,IAAI,EAAE;gBACJ,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,EAAE;gBACV,OAAO,EAAE,EAAE;aACZ;YACD,QAAQ,EAAE,QAAQ;YAClB,OAAO,EAAE;gBACP,MAAM,EAAE,aAAa;gBACrB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,EAAE;oBACV,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE;oBACR,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;oBACzB,OAAO,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;iBAC1C;gBACD,gBAAgB,EAAE;oBAChB,MAAM,EAAE,aAAa;oBACrB,KAAK,EAAE,SAAS;iBACjB;aACF;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,aAAa;YACrB,gBAAgB,EAAE;gBAChB,MAAM,EAAE,aAAa;gBACrB,KAAK,EAAE,SAAS;aACjB;SACF,CAAC,CAAC;QAEH,MAAM,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC;YACrB,SAAS,EAAE;gBACT,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,CAAC,WAAW,EAAE,WAAW,CAAC;gBACjC,SAAS,EAAE,GAAG;aACf;YACD,IAAI,EAAE,IAAI;YACV,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,eAAe;SACxB,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,8CAA8C,EAAE,KAAK,IAAI,EAAE;QAC5D,MAAM,gBAAgB,GAAG,IAAI,CAAC,MAAM,CAAC,uBAAY,CAAC,CAAC;QACnD,gBAAgB,CAAC,iBAAiB,CAAC;YACjC,IAAI,EAAE,IAAI;YACV,KAAK,EAAE,GAAG;YACV,MAAM,EAAE,aAAa;SACtB,CAAC,CAAC;QAEH,MAAM,IAAA,mBAAW,EAAC;YAChB,SAAS,EAAE;gBACT,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,eAAe;aACvB;YACD,aAAa,EAAE,eAAe;YAC9B,MAAM,EAAE,aAAa;YACrB,YAAY,EAAE,aAAa;YAC3B,IAAI,EAAE;gBACJ,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,EAAE;gBACV,OAAO,EAAE,EAAE;aACZ;YACD,QAAQ,EAAE,QAAQ;YAClB,OAAO,EAAE;gBACP,MAAM,EAAE,aAAa;gBACrB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,EAAE;oBACV,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE;oBACR,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;oBACzB,OAAO,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;iBAC1C;gBACD,gBAAgB,EAAE;oBAChB,MAAM,EAAE,aAAa;oBACrB,KAAK,EAAE,SAAS;iBACjB;aACF;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,aAAa;YACrB,gBAAgB,EAAE;gBAChB,MAAM,EAAE,aAAa;gBACrB,KAAK,EAAE,SAAS;aACjB;SACF,CAAC,CAAC;QAEH,MAAM,CAAC,gBAAgB,CAAC,CAAC,oBAAoB,CAC3C,eAAe,EACf,aAAa,EACb,aAAa,EACb,GAAG,EACH,EAAE,CACH,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mDAAmD,EAAE,KAAK,IAAI,EAAE;QACjE,MAAM,MAAM,CACV,IAAA,mBAAW,EAAC;YACV,SAAS,EAAE;gBACT,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,MAAM;aACd;YACD,aAAa,EAAE,SAAS;YACxB,MAAM,EAAE,MAAM;YACd,YAAY,EAAE,MAAM;YACpB,IAAI,EAAE;gBACJ,IAAI,EAAE,EAAE;gBACR,MAAM,EAAE,EAAE;gBACV,OAAO,EAAE,EAAE;aACZ;YACD,QAAQ,EAAE,QAAQ;YAClB,OAAO,EAAE;gBACP,MAAM,EAAE,aAAa;gBACrB,IAAI,EAAE,EAAE;gBACR,IAAI,EAAE;oBACJ,IAAI,EAAE,EAAE;oBACR,MAAM,EAAE,EAAE;oBACV,OAAO,EAAE,EAAE;iBACZ;gBACD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE;oBACR,EAAE,EAAE,GAAG,EAAE,CAAC,eAAe;oBACzB,OAAO,EAAE,KAAK,IAAI,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;iBAC1C;gBACD,gBAAgB,EAAE;oBAChB,MAAM,EAAE,aAAa;oBACrB,KAAK,EAAE,SAAS;iBACjB;aACF;YACD,OAAO,EAAE,KAAK;YACd,MAAM,EAAE,MAAM;YACd,gBAAgB,EAAE;gBAChB,MAAM,EAAE,MAAM;gBACd,KAAK,EAAE,SAAS;aACjB;SACF,CAAC,CACH,CAAC,OAAO,CAAC,OAAO,CAAC,oEAAoE,CAAC,CAAC;IAC1F,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"modelGradedClosedQa.test.d.ts","sourceRoot":"","sources":["../../../test/assertions/modelGradedClosedQa.test.ts"],"names":[],"mappings":""}
|