promptfoo 0.91.3 → 0.92.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/dist/drizzle/0006_harsh_caretaker.sql +42 -0
  2. package/dist/drizzle/0007_cloudy_wong.sql +1 -0
  3. package/dist/drizzle/meta/0006_snapshot.json +721 -0
  4. package/dist/drizzle/meta/0007_snapshot.json +723 -0
  5. package/dist/drizzle/meta/_journal.json +14 -0
  6. package/dist/package.json +10 -8
  7. package/dist/src/app/assets/{index-C6z1nbLN.js → index-BpjzEMiv.js} +243 -241
  8. package/dist/src/app/assets/{index.es-oqbvfIxR.js → index.es-ihzvEu35.js} +1 -1
  9. package/dist/src/app/assets/{sync-D2s75VlC.js → sync-BosjlpGJ.js} +1 -1
  10. package/dist/src/app/index.html +3 -3
  11. package/dist/src/assertions.js +2 -2
  12. package/dist/src/assertions.js.map +1 -1
  13. package/dist/src/commands/cache.d.ts.map +1 -1
  14. package/dist/src/commands/cache.js +0 -2
  15. package/dist/src/commands/cache.js.map +1 -1
  16. package/dist/src/commands/eval.d.ts.map +1 -1
  17. package/dist/src/commands/eval.js +19 -16
  18. package/dist/src/commands/eval.js.map +1 -1
  19. package/dist/src/commands/export.d.ts.map +1 -1
  20. package/dist/src/commands/export.js +8 -31
  21. package/dist/src/commands/export.js.map +1 -1
  22. package/dist/src/commands/import.d.ts.map +1 -1
  23. package/dist/src/commands/import.js +52 -13
  24. package/dist/src/commands/import.js.map +1 -1
  25. package/dist/src/commands/list.d.ts.map +1 -1
  26. package/dist/src/commands/list.js +35 -7
  27. package/dist/src/commands/list.js.map +1 -1
  28. package/dist/src/commands/share.d.ts +2 -2
  29. package/dist/src/commands/share.d.ts.map +1 -1
  30. package/dist/src/commands/share.js +12 -13
  31. package/dist/src/commands/share.js.map +1 -1
  32. package/dist/src/commands/show.d.ts.map +1 -1
  33. package/dist/src/commands/show.js +10 -6
  34. package/dist/src/commands/show.js.map +1 -1
  35. package/dist/src/constants.d.ts +1 -0
  36. package/dist/src/constants.d.ts.map +1 -1
  37. package/dist/src/constants.js +2 -1
  38. package/dist/src/constants.js.map +1 -1
  39. package/dist/src/database/index.js +1 -1
  40. package/dist/src/database/index.js.map +1 -1
  41. package/dist/src/database/tables.d.ts +609 -11
  42. package/dist/src/database/tables.d.ts.map +1 -1
  43. package/dist/src/database/tables.js +111 -52
  44. package/dist/src/database/tables.js.map +1 -1
  45. package/dist/src/database/types.d.ts +3 -3
  46. package/dist/src/database/types.d.ts.map +1 -1
  47. package/dist/src/evaluator.d.ts +3 -2
  48. package/dist/src/evaluator.d.ts.map +1 -1
  49. package/dist/src/evaluator.js +75 -104
  50. package/dist/src/evaluator.js.map +1 -1
  51. package/dist/src/evaluatorHelpers.d.ts.map +1 -1
  52. package/dist/src/evaluatorHelpers.js +2 -1
  53. package/dist/src/evaluatorHelpers.js.map +1 -1
  54. package/dist/src/index.d.ts +2 -1
  55. package/dist/src/index.d.ts.map +1 -1
  56. package/dist/src/index.js +18 -10
  57. package/dist/src/index.js.map +1 -1
  58. package/dist/src/models/eval.d.ts +95 -0
  59. package/dist/src/models/eval.d.ts.map +1 -0
  60. package/dist/src/models/eval.js +390 -0
  61. package/dist/src/models/eval.js.map +1 -0
  62. package/dist/src/models/evalResult.d.ts +50 -0
  63. package/dist/src/models/evalResult.d.ts.map +1 -0
  64. package/dist/src/models/evalResult.js +122 -0
  65. package/dist/src/models/evalResult.js.map +1 -0
  66. package/dist/src/models/provider.d.ts +9 -0
  67. package/dist/src/models/provider.d.ts.map +1 -0
  68. package/dist/src/models/provider.js +47 -0
  69. package/dist/src/models/provider.js.map +1 -0
  70. package/dist/src/prompts/index.d.ts.map +1 -1
  71. package/dist/src/prompts/index.js +2 -1
  72. package/dist/src/prompts/index.js.map +1 -1
  73. package/dist/src/prompts/utils.d.ts +1 -0
  74. package/dist/src/prompts/utils.d.ts.map +1 -1
  75. package/dist/src/prompts/utils.js +7 -0
  76. package/dist/src/prompts/utils.js.map +1 -1
  77. package/dist/src/providers/fal.d.ts +2 -2
  78. package/dist/src/providers/fal.d.ts.map +1 -1
  79. package/dist/src/providers/fal.js +2 -1
  80. package/dist/src/providers/fal.js.map +1 -1
  81. package/dist/src/providers/http.js +2 -2
  82. package/dist/src/providers/http.js.map +1 -1
  83. package/dist/src/providers/palm.d.ts +4 -3
  84. package/dist/src/providers/palm.d.ts.map +1 -1
  85. package/dist/src/providers/palm.js +13 -3
  86. package/dist/src/providers/palm.js.map +1 -1
  87. package/dist/src/providers.js +5 -5
  88. package/dist/src/providers.js.map +1 -1
  89. package/dist/src/redteam/eval/excessive-agency/llm_rubric-20240617.json +10 -0
  90. package/dist/src/redteam/eval/excessive-agency/llm_rubric-20240618.json +10 -0
  91. package/dist/src/redteam/eval/harmful/llm_rubric-20240723.json +10 -0
  92. package/dist/src/redteam/eval/harmful/llm_rubric-20240724.json +10 -0
  93. package/dist/src/server/server.d.ts +1 -0
  94. package/dist/src/server/server.d.ts.map +1 -1
  95. package/dist/src/server/server.js +70 -31
  96. package/dist/src/server/server.js.map +1 -1
  97. package/dist/src/share.d.ts +2 -2
  98. package/dist/src/share.d.ts.map +1 -1
  99. package/dist/src/share.js +93 -34
  100. package/dist/src/share.js.map +1 -1
  101. package/dist/src/table.d.ts +2 -2
  102. package/dist/src/table.d.ts.map +1 -1
  103. package/dist/src/table.js +3 -3
  104. package/dist/src/table.js.map +1 -1
  105. package/dist/src/types/index.d.ts +163 -11
  106. package/dist/src/types/index.d.ts.map +1 -1
  107. package/dist/src/types/index.js +21 -1
  108. package/dist/src/types/index.js.map +1 -1
  109. package/dist/src/util/config/load.d.ts.map +1 -1
  110. package/dist/src/util/config/load.js +2 -1
  111. package/dist/src/util/config/load.js.map +1 -1
  112. package/dist/src/util/config/manage.d.ts.map +1 -1
  113. package/dist/src/util/config/manage.js.map +1 -1
  114. package/dist/src/util/convertEvalResultsToTable.d.ts +16 -0
  115. package/dist/src/util/convertEvalResultsToTable.d.ts.map +1 -0
  116. package/dist/src/util/convertEvalResultsToTable.js +136 -0
  117. package/dist/src/util/convertEvalResultsToTable.js.map +1 -0
  118. package/dist/src/util/createHash.d.ts +1 -0
  119. package/dist/src/util/createHash.d.ts.map +1 -1
  120. package/dist/src/util/createHash.js +9 -0
  121. package/dist/src/util/createHash.js.map +1 -1
  122. package/dist/src/util/file.d.ts +8 -0
  123. package/dist/src/util/file.d.ts.map +1 -0
  124. package/dist/src/util/file.js +13 -0
  125. package/dist/src/util/file.js.map +1 -0
  126. package/dist/src/util/index.d.ts +9 -14
  127. package/dist/src/util/index.d.ts.map +1 -1
  128. package/dist/src/util/index.js +132 -268
  129. package/dist/src/util/index.js.map +1 -1
  130. package/dist/src/util/time.d.ts +2 -0
  131. package/dist/src/util/time.d.ts.map +1 -0
  132. package/dist/src/util/time.js +7 -0
  133. package/dist/src/util/time.js.map +1 -0
  134. package/dist/src/util/transform.js +2 -2
  135. package/dist/src/util/transform.js.map +1 -1
  136. package/dist/src/validators/providers.d.ts +6 -0
  137. package/dist/src/validators/providers.d.ts.map +1 -1
  138. package/dist/src/validators/providers.js +1 -0
  139. package/dist/src/validators/providers.js.map +1 -1
  140. package/dist/src/validators/redteam.d.ts +6 -0
  141. package/dist/src/validators/redteam.d.ts.map +1 -1
  142. package/dist/test/commands/eval/filterFailingTests.test.js +24 -2
  143. package/dist/test/commands/eval/filterFailingTests.test.js.map +1 -1
  144. package/dist/test/evaluator.test.js +152 -74
  145. package/dist/test/evaluator.test.js.map +1 -1
  146. package/dist/test/factories/data/eval/database_records.d.ts +142 -0
  147. package/dist/test/factories/data/eval/database_records.d.ts.map +1 -0
  148. package/dist/test/factories/data/eval/database_records.js +251 -0
  149. package/dist/test/factories/data/eval/database_records.js.map +1 -0
  150. package/dist/test/factories/evalFactory.d.ts +768 -0
  151. package/dist/test/factories/evalFactory.d.ts.map +1 -0
  152. package/dist/test/factories/evalFactory.js +121 -0
  153. package/dist/test/factories/evalFactory.js.map +1 -0
  154. package/dist/test/index.test.js +20 -35
  155. package/dist/test/index.test.js.map +1 -1
  156. package/dist/test/models/eval.test.d.ts +2 -0
  157. package/dist/test/models/eval.test.d.ts.map +1 -0
  158. package/dist/test/models/eval.test.js +34 -0
  159. package/dist/test/models/eval.test.js.map +1 -0
  160. package/dist/test/providers.test.js +3 -3
  161. package/dist/test/providers.test.js.map +1 -1
  162. package/dist/test/server/share.test.d.ts +2 -0
  163. package/dist/test/server/share.test.d.ts.map +1 -0
  164. package/dist/test/server/share.test.js +36 -0
  165. package/dist/test/server/share.test.js.map +1 -0
  166. package/dist/test/server/v3evalToShare.json +507 -0
  167. package/dist/test/server/v4evalToShare.json +421 -0
  168. package/dist/test/types.test.js +56 -3
  169. package/dist/test/types.test.js.map +1 -1
  170. package/dist/test/util.file.test.d.ts +2 -0
  171. package/dist/test/util.file.test.d.ts.map +1 -0
  172. package/dist/test/util.file.test.js +32 -0
  173. package/dist/test/util.file.test.js.map +1 -0
  174. package/dist/test/util.listPrevious.test.d.ts +2 -0
  175. package/dist/test/util.listPrevious.test.d.ts.map +1 -0
  176. package/dist/test/util.listPrevious.test.js +37 -0
  177. package/dist/test/util.listPrevious.test.js.map +1 -0
  178. package/dist/test/util.test.js +38 -311
  179. package/dist/test/util.test.js.map +1 -1
  180. package/dist/tsconfig.tsbuildinfo +1 -0
  181. package/package.json +10 -8
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAmJ4B,4BAAQ;AAnJpC,oEAAuC;AACvC,8DAAsC;AAkJ7B,qBAlJF,oBAAU,CAkJE;AAjJnB,+CAAiC;AAiJZ,sBAAK;AAhJ1B,2CAAqD;AACrD,uCAA+D;AAC/D,yDAAyD;AA8InB,oBA9I/B,mBAAS,CA8I+B;AA7I/C,2CAA+C;AAC/C,4DAAgE;AAChE,0DAAoE;AACpE,+CAA4C;AAC5C,+CAA4C;AAC5C,iDAA8E;AAC9E,qDAAkD;AAClD,4DAAoC;AACpC,2CAAwC;AASxC,iCAMgB;AAEhB,0CAAwB;AAExB,iCAAwC;AAA/B,sGAAA,aAAa,OAAA;AAEtB,KAAK,UAAU,QAAQ,CAAC,SAA4B,EAAE,UAA2B,EAAE;IACjF,MAAM,oBAAoB,GAAc;QACtC,GAAG,SAAS;QACZ,SAAS,EAAE,SAAS,CAAC,SAAuB;QAC5C,SAAS,EAAE,MAAM,IAAA,4BAAgB,EAAC,SAAS,CAAC,SAAS,EAAE;YACrD,GAAG,EAAE,SAAS,CAAC,GAAG;SACnB,CAAC;QACF,KAAK,EAAE,MAAM,IAAA,qBAAS,EAAC,SAAS,CAAC,KAAK,CAAC;QAEvC,eAAe,EAAE,MAAM,IAAA,kBAAW,EAAC,SAAS,CAAC,eAAe,IAAI,EAAE,CAAC;QAEnE,wCAAwC;QACxC,OAAO,EAAE,CACP,MAAM,OAAO,CAAC,GAAG,CACf,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,WAAW,EAAE,EAAE;YAC1C,IAAI,OAAO,WAAW,KAAK,UAAU,EAAE,CAAC;gBACtC,OAAO;oBACL,GAAG,EAAE,WAAW,CAAC,QAAQ,EAAE;oBAC3B,KAAK,EAAE,WAAW,EAAE,IAAI,IAAI,WAAW,CAAC,QAAQ,EAAE;oBAClD,QAAQ,EAAE,WAA6B;iBACxC,CAAC;YACJ,CAAC;iBAAM,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;gBAC3C,OAAO,IAAA,qBAAW,EAAC,WAAW,CAAC,CAAC;YAClC,CAAC;iBAAM,CAAC;gBACN,OAAO;oBACL,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;oBAChC,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;iBACnC,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CACH,CACF,CAAC,IAAI,EAAE;KACT,CAAC;IAEF,2BAA2B;IAC3B,KAAK,MAAM,IAAI,IAAI,oBAAoB,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;QACpD,IAAI,IAAI,CAAC,OAAO,EAAE,QAAQ,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;YAC1E,IAAI,CAAC,OAAO,CAAC,QAAQ,GAAG,MAAM,IAAA,2BAAe,EAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACvE,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBACpC,IAAI,SAAS,CAAC,IAAI,KAAK,YAAY,IAAI,OAAO,SAAS,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;oBAChF,SAAS;gBACX,CAAC;gBAED,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;oBACvB,IAAI,OAAO,SAAS,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;wBAC3C,MAAM,MAAM,GAAG,SAAS,CAAC,QAA2B,CAAC;wBACrD,IAAA,wBAAS,EAAC,MAAM,CAAC,EAAE,EAAE,iCAAiC,CAAC,CAAC;wBACxD,SAAS,CAAC,QAAQ,GAAG,MAAM,IAAA,2BAAe,EAAC,MAAM,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;oBAC7E,CAAC;yBAAM,IAAI,OAAO,SAAS,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;wBAClD,SAAS,CAAC,QAAQ,GAAG,MAAM,IAAA,2BAAe,EAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;oBACjE,CAAC;yBAAM,CAAC;wBACN,MAAM,IAAI,KAAK,CAAC,uBAAuB,CAAC,CAAC;oBAC3C,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,iBAAiB;IACjB,IAAI,OAAO,CAAC,KAAK,KAAK,KAAK,IAAI,CAAC,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;QACtE,KAAK,CAAC,YAAY,EAAE,CAAC;IACvB,CAAC;IAED,MAAM,uBAAuB,GAAG,IAAA,+BAAqB,EAAC,SAAS,EAAE,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAE/F,gBAAgB;IAChB,MAAM,GAAG,GAAG,MAAM,IAAA,oBAAU,EAC1B;QACE,GAAG,oBAAoB;QACvB,iBAAiB,EAAE,uBAAuB;KAC3C,EACD;QACE,WAAW,EAAE,SAAS;QACtB,GAAG,OAAO;KACX,CACF,CAAC;IAEF,MAAM,aAAa,GAAG,EAAE,GAAG,SAAS,EAAE,OAAO,EAAE,oBAAoB,CAAC,OAAO,EAAE,CAAC;IAC9E,IAAI,MAAM,GAAkB,IAAI,CAAC;IACjC,IAAI,SAAS,CAAC,kBAAkB,EAAE,CAAC;QACjC,MAAM,IAAA,6CAAsC,GAAE,CAAC;QAC/C,MAAM,GAAG,MAAM,IAAA,6BAAsB,EAAC,GAAG,EAAE,aAAa,CAAC,CAAC;IAC5D,CAAC;IAED,IAAI,SAAS,CAAC,UAAU,EAAE,CAAC;QACzB,IAAI,OAAO,SAAS,CAAC,UAAU,KAAK,QAAQ,EAAE,CAAC;YAC7C,MAAM,IAAA,kBAAW,EAAC,SAAS,CAAC,UAAU,EAAE,MAAM,EAAE,GAAG,EAAE,aAAa,EAAE,IAAI,CAAC,CAAC;QAC5E,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/C,MAAM,IAAA,2BAAoB,EAAC,SAAS,CAAC,UAAU,EAAE,MAAM,EAAE,GAAG,EAAE,aAAa,EAAE,IAAI,CAAC,CAAC;QACrF,CAAC;IACH,CAAC;IAED,MAAM,mBAAS,CAAC,IAAI,EAAE,CAAC;IACvB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,OAAO,GAAG;IACd,UAAU,EAAE;QACV,eAAe,EAAf,0BAAe;QACf,oBAAoB,EAApB,8BAAoB;KACrB;IACD,OAAO,EAAE,iBAAO;IAChB,OAAO,EAAP,iBAAO;IACP,UAAU,EAAV,uBAAU;IACV,IAAI,EAAE;QACJ,MAAM,EAAE,wBAAiB;QACzB,MAAM,EAAE,wBAAiB;KAC1B;CACF,CAAC;AAE+C,0BAAO;AAExD,kBAAe;IACb,UAAU,EAAV,oBAAU;IACV,KAAK;IACL,QAAQ;IACR,SAAS,EAAT,mBAAS;IACT,OAAO;CACR,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAwJ4B,4BAAQ;AAxJpC,oEAAuC;AACvC,8DAAsC;AAuJ7B,qBAvJF,oBAAU,CAuJE;AAtJnB,+CAAiC;AAsJZ,sBAAK;AArJ1B,2CAAqD;AACrD,uCAA4C;AAC5C,yDAAiC;AACjC,uCAA+D;AAC/D,yDAAyD;AAiJnB,oBAjJ/B,mBAAS,CAiJ+B;AAhJ/C,2CAA+C;AAC/C,4DAAgE;AAChE,0DAAoE;AACpE,+CAA4C;AAC5C,+CAA4C;AAC5C,iDAA8E;AAC9E,qDAAkD;AAClD,4DAAoC;AACpC,2CAAwC;AASxC,iCAAwE;AACxE,kDAAoD;AAEpD,0CAAwB;AAExB,iCAAwC;AAA/B,sGAAA,aAAa,OAAA;AAEtB,KAAK,UAAU,QAAQ,CAAC,SAA4B,EAAE,UAA2B,EAAE;IACjF,IAAI,SAAS,CAAC,kBAAkB,EAAE,CAAC;QACjC,MAAM,IAAA,yBAAe,GAAE,CAAC;IAC1B,CAAC;IAED,MAAM,oBAAoB,GAAc;QACtC,GAAG,SAAS;QACZ,SAAS,EAAE,SAAS,CAAC,SAAuB;QAC5C,SAAS,EAAE,MAAM,IAAA,4BAAgB,EAAC,SAAS,CAAC,SAAS,EAAE;YACrD,GAAG,EAAE,SAAS,CAAC,GAAG;SACnB,CAAC;QACF,KAAK,EAAE,MAAM,IAAA,qBAAS,EAAC,SAAS,CAAC,KAAK,CAAC;QAEvC,eAAe,EAAE,MAAM,IAAA,kBAAW,EAAC,SAAS,CAAC,eAAe,IAAI,EAAE,CAAC;QAEnE,wCAAwC;QACxC,OAAO,EAAE,CACP,MAAM,OAAO,CAAC,GAAG,CACf,SAAS,CAAC,OAAO,CAAC,GAAG,CAAC,KAAK,EAAE,WAAW,EAAE,EAAE;YAC1C,IAAI,OAAO,WAAW,KAAK,UAAU,EAAE,CAAC;gBACtC,OAAO;oBACL,GAAG,EAAE,WAAW,CAAC,QAAQ,EAAE;oBAC3B,KAAK,EAAE,WAAW,EAAE,IAAI,IAAI,WAAW,CAAC,QAAQ,EAAE;oBAClD,QAAQ,EAAE,WAA6B;iBACxC,CAAC;YACJ,CAAC;iBAAM,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;gBAC3C,OAAO,IAAA,qBAAW,EAAC,WAAW,CAAC,CAAC;YAClC,CAAC;YACD,IAAI,CAAC;gBACH,OAAO,sBAAY,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;YACzC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,OAAO,CAAC,IAAI,CACV,8CAA8C,KAAK,kDAAkD,CACtG,CAAC;gBACF,OAAO;oBACL,GAAG,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;oBAChC,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,WAAW,CAAC;iBACnC,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CACH,CACF,CAAC,IAAI,EAAE;KACT,CAAC;IAEF,2BAA2B;IAC3B,KAAK,MAAM,IAAI,IAAI,oBAAoB,CAAC,KAAK,IAAI,EAAE,EAAE,CAAC;QACpD,IAAI,IAAI,CAAC,OAAO,EAAE,QAAQ,IAAI,OAAO,IAAI,CAAC,OAAO,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;YAC1E,IAAI,CAAC,OAAO,CAAC,QAAQ,GAAG,MAAM,IAAA,2BAAe,EAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACvE,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBACpC,IAAI,SAAS,CAAC,IAAI,KAAK,YAAY,IAAI,OAAO,SAAS,CAAC,QAAQ,KAAK,UAAU,EAAE,CAAC;oBAChF,SAAS;gBACX,CAAC;gBAED,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC;oBACvB,IAAI,OAAO,SAAS,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;wBAC3C,MAAM,MAAM,GAAG,SAAS,CAAC,QAA2B,CAAC;wBACrD,IAAA,wBAAS,EAAC,MAAM,CAAC,EAAE,EAAE,iCAAiC,CAAC,CAAC;wBACxD,SAAS,CAAC,QAAQ,GAAG,MAAM,IAAA,2BAAe,EAAC,MAAM,CAAC,EAAE,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;oBAC7E,CAAC;yBAAM,IAAI,OAAO,SAAS,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;wBAClD,SAAS,CAAC,QAAQ,GAAG,MAAM,IAAA,2BAAe,EAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;oBACjE,CAAC;yBAAM,CAAC;wBACN,MAAM,IAAI,KAAK,CAAC,uBAAuB,CAAC,CAAC;oBAC3C,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,iBAAiB;IACjB,IAAI,OAAO,CAAC,KAAK,KAAK,KAAK,IAAI,CAAC,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;QACtE,KAAK,CAAC,YAAY,EAAE,CAAC;IACvB,CAAC;IAED,MAAM,uBAAuB,GAAG,IAAA,+BAAqB,EAAC,SAAS,EAAE,oBAAoB,CAAC,OAAO,CAAC,CAAC;IAC/F,MAAM,aAAa,GAAG,EAAE,GAAG,SAAS,EAAE,OAAO,EAAE,oBAAoB,CAAC,OAAO,EAAE,CAAC;IAC9E,MAAM,UAAU,GAAG,SAAS,CAAC,kBAAkB;QAC7C,CAAC,CAAC,MAAM,cAAI,CAAC,MAAM,CAAC,aAAa,EAAE,oBAAoB,CAAC,OAAO,CAAC;QAChE,CAAC,CAAC,IAAI,cAAI,CAAC,aAAa,CAAC,CAAC;IAE5B,gBAAgB;IAChB,MAAM,GAAG,GAAG,MAAM,IAAA,oBAAU,EAC1B;QACE,GAAG,oBAAoB;QACvB,iBAAiB,EAAE,uBAAuB;KAC3C,EACD,UAAU,EACV;QACE,WAAW,EAAE,SAAS;QACtB,GAAG,OAAO;KACX,CACF,CAAC;IAEF,IAAI,SAAS,CAAC,UAAU,EAAE,CAAC;QACzB,IAAI,OAAO,SAAS,CAAC,UAAU,KAAK,QAAQ,EAAE,CAAC;YAC7C,MAAM,IAAA,kBAAW,EAAC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC;QAC5D,CAAC;aAAM,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/C,MAAM,IAAA,2BAAoB,EAAC,SAAS,CAAC,UAAU,EAAE,UAAU,EAAE,IAAI,CAAC,CAAC;QACrE,CAAC;IACH,CAAC;IAED,MAAM,mBAAS,CAAC,IAAI,EAAE,CAAC;IACvB,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,OAAO,GAAG;IACd,UAAU,EAAE;QACV,eAAe,EAAf,0BAAe;QACf,oBAAoB,EAApB,8BAAoB;KACrB;IACD,OAAO,EAAE,iBAAO;IAChB,OAAO,EAAP,iBAAO;IACP,UAAU,EAAV,uBAAU;IACV,IAAI,EAAE;QACJ,MAAM,EAAE,wBAAiB;QACzB,MAAM,EAAE,wBAAiB;KAC1B;CACF,CAAC;AAE+C,0BAAO;AAExD,kBAAe;IACb,UAAU,EAAV,oBAAU;IACV,KAAK;IACL,QAAQ;IACR,SAAS,EAAT,mBAAS;IACT,OAAO;CACR,CAAC"}
@@ -0,0 +1,95 @@
1
+ import type { AtomicTestCase, CompletedPrompt, EvaluateResult, EvaluateSummaryV3, EvaluateSummaryV2, EvaluateTable, Prompt, ResultsFile, UnifiedConfig } from '../types';
2
+ import EvalResult from './evalResult';
3
+ import type Provider from './provider';
4
+ export declare function createEvalId(createdAt?: Date): string;
5
+ export declare class EvalQueries {
6
+ static getVarsFromEvals(evals: Eval[]): Promise<Record<string, string[]>>;
7
+ }
8
+ export default class Eval {
9
+ id: string;
10
+ createdAt: number;
11
+ author?: string;
12
+ description?: string;
13
+ config: Partial<UnifiedConfig>;
14
+ results: EvalResult[];
15
+ datasetId?: string;
16
+ prompts: CompletedPrompt[];
17
+ oldResults?: EvaluateSummaryV2;
18
+ persisted: boolean;
19
+ static latest(): Promise<Eval | undefined>;
20
+ static findById(id: string): Promise<Eval | undefined>;
21
+ static getMany(limit?: number): Promise<Eval[]>;
22
+ static create(config: Partial<UnifiedConfig>, renderedPrompts: Prompt[], // The config doesn't contain the actual prompts, so we need to pass them in separately
23
+ opts?: {
24
+ id?: string;
25
+ createdAt?: Date;
26
+ author?: string;
27
+ results?: EvalResult[];
28
+ }): Promise<Eval>;
29
+ constructor(config: Partial<UnifiedConfig>, opts?: {
30
+ id?: string;
31
+ createdAt?: Date;
32
+ author?: string;
33
+ description?: string;
34
+ prompts?: CompletedPrompt[];
35
+ datasetId?: string;
36
+ persisted?: boolean;
37
+ });
38
+ version(): 3 | 4;
39
+ useOldResults(): boolean;
40
+ setTable(table: EvaluateTable): void;
41
+ save(): Promise<void>;
42
+ getVars(): Promise<string[]>;
43
+ getPrompts(): {
44
+ provider: string;
45
+ raw: string;
46
+ label: string;
47
+ function?: ((args_0: {
48
+ vars: Record<string, any>;
49
+ provider?: import("../types").ApiProvider | undefined;
50
+ }, ...args: unknown[]) => Promise<any>) | undefined;
51
+ id?: string | undefined;
52
+ config?: any;
53
+ display?: string | undefined;
54
+ metrics?: {
55
+ cost: number;
56
+ tokenUsage: {
57
+ cached?: number | undefined;
58
+ completion?: number | undefined;
59
+ prompt?: number | undefined;
60
+ total?: number | undefined;
61
+ };
62
+ score: number;
63
+ testPassCount: number;
64
+ testFailCount: number;
65
+ assertPassCount: number;
66
+ assertFailCount: number;
67
+ totalLatencyMs: number;
68
+ namedScores: Record<string, number>;
69
+ namedScoresCount: Record<string, number>;
70
+ redteam?: {
71
+ pluginPassCount: Record<string, number>;
72
+ pluginFailCount: Record<string, number>;
73
+ strategyPassCount: Record<string, number>;
74
+ strategyFailCount: Record<string, number>;
75
+ } | undefined;
76
+ } | undefined;
77
+ }[];
78
+ getTable(): Promise<EvaluateTable>;
79
+ addResult(result: EvaluateResult, test: AtomicTestCase): Promise<void>;
80
+ addPrompts(prompts: CompletedPrompt[]): Promise<void>;
81
+ addProviders(providers: Provider[]): Promise<void>;
82
+ loadResults(): Promise<void>;
83
+ getResults(): Promise<EvaluateResult[] | EvalResult[]>;
84
+ toEvaluateSummary(): Promise<EvaluateSummaryV3 | EvaluateSummaryV2>;
85
+ toResultsFile(): Promise<ResultsFile>;
86
+ delete(): Promise<void>;
87
+ }
88
+ export declare function getSummaryofLatestEvals(limit?: number, filterDescription?: string, datasetId?: string): Promise<{
89
+ evalId: string;
90
+ createdAt: number;
91
+ description: string | null;
92
+ numTests: number;
93
+ datasetId: string | null;
94
+ }[]>;
95
+ //# sourceMappingURL=eval.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval.d.ts","sourceRoot":"","sources":["../../../src/models/eval.ts"],"names":[],"mappings":"AAkBA,OAAO,KAAK,EACV,cAAc,EACd,eAAe,EACf,cAAc,EAEd,iBAAiB,EACjB,iBAAiB,EACjB,aAAa,EACb,MAAM,EACN,WAAW,EACX,aAAa,EACd,MAAM,UAAU,CAAC;AAIlB,OAAO,UAAU,MAAM,cAAc,CAAC;AACtC,OAAO,KAAK,QAAQ,MAAM,YAAY,CAAC;AAEvC,wBAAgB,YAAY,CAAC,SAAS,GAAE,IAAiB,UAExD;AAED,qBAAa,WAAW;WACT,gBAAgB,CAAC,KAAK,EAAE,IAAI,EAAE;CAe5C;AAED,MAAM,CAAC,OAAO,OAAO,IAAI;IACvB,EAAE,EAAE,MAAM,CAAC;IACX,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,CAAC;IAE/B,OAAO,EAAE,UAAU,EAAE,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,eAAe,EAAE,CAAC;IAC3B,UAAU,CAAC,EAAE,iBAAiB,CAAC;IAC/B,SAAS,EAAE,OAAO,CAAC;WAEN,MAAM;WAiBN,QAAQ,CAAC,EAAE,EAAE,MAAM;WAuCnB,OAAO,CAAC,KAAK,GAAE,MAA4B;WAqB3C,MAAM,CACjB,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,EAC9B,eAAe,EAAE,MAAM,EAAE,EAAE,uFAAuF;IAClH,IAAI,CAAC,EAAE;QACL,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,SAAS,CAAC,EAAE,IAAI,CAAC;QACjB,MAAM,CAAC,EAAE,MAAM,CAAC;QAEhB,OAAO,CAAC,EAAE,UAAU,EAAE,CAAC;KACxB,GACA,OAAO,CAAC,IAAI,CAAC;gBAgGd,MAAM,EAAE,OAAO,CAAC,aAAa,CAAC,EAC9B,IAAI,CAAC,EAAE;QACL,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,SAAS,CAAC,EAAE,IAAI,CAAC;QACjB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,WAAW,CAAC,EAAE,MAAM,CAAC;QACrB,OAAO,CAAC,EAAE,eAAe,EAAE,CAAC;QAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,SAAS,CAAC,EAAE,OAAO,CAAC;KACrB;IAaH,OAAO;IAQP,aAAa;IAIb,QAAQ,CAAC,KAAK,EAAE,aAAa;IAMvB,IAAI;IAkBJ,OAAO;IAcb,UAAU;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;IAQJ,QAAQ,IAAI,OAAO,CAAC,aAAa,CAAC;IAOlC,SAAS,CAAC,MAAM,EAAE,cAAc,EAAE,IAAI,EAAE,cAAc;IAOtD,UAAU,CAAC,OAAO,EAAE,eAAe,EAAE;IAQrC,YAAY,CAAC,SAAS,EAAE,QAAQ,EAAE;IAkBlC,WAAW;IAIX,UAAU,IAAI,OAAO,CAAC,cAAc,EAAE,GAAG,UAAU,EAAE,CAAC;IAQtD,iBAAiB,IAAI,OAAO,CAAC,iBAAiB,GAAG,iBAAiB,CAAC;IA2CnE,aAAa,IAAI,OAAO,CAAC,WAAW,CAAC;IAcrC,MAAM;CAWb;AAED,wBAAsB,uBAAuB,CAC3C,KAAK,GAAE,MAA4B,EACnC,iBAAiB,CAAC,EAAE,MAAM,EAC1B,SAAS,CAAC,EAAE,MAAM;;;;;;KAuCnB"}
@@ -0,0 +1,390 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.EvalQueries = void 0;
7
+ exports.createEvalId = createEvalId;
8
+ exports.getSummaryofLatestEvals = getSummaryofLatestEvals;
9
+ const crypto_1 = require("crypto");
10
+ const drizzle_orm_1 = require("drizzle-orm");
11
+ const tiny_invariant_1 = __importDefault(require("tiny-invariant"));
12
+ const constants_1 = require("../constants");
13
+ const database_1 = require("../database");
14
+ const tables_1 = require("../database/tables");
15
+ const logger_1 = __importDefault(require("../logger"));
16
+ const utils_1 = require("../prompts/utils");
17
+ const convertEvalResultsToTable_1 = require("../util/convertEvalResultsToTable");
18
+ const createHash_1 = require("../util/createHash");
19
+ const time_1 = require("../util/time");
20
+ const evalResult_1 = __importDefault(require("./evalResult"));
21
+ function createEvalId(createdAt = new Date()) {
22
+ return `eval-${(0, createHash_1.randomSequence)(3)}-${createdAt.toISOString().slice(0, 19)}`;
23
+ }
24
+ class EvalQueries {
25
+ static async getVarsFromEvals(evals) {
26
+ const db = (0, database_1.getDb)();
27
+ const query = drizzle_orm_1.sql.raw(`SELECT DISTINCT j.key, eval_id from (SELECT eval_id, json_extract(eval_results.test_case, '$.vars') as vars
28
+ FROM eval_results where eval_id IN (${evals.map((e) => `'${e.id}'`).join(',')})) t, json_each(t.vars) j;`);
29
+ // @ts-ignore
30
+ const results = await db.all(query);
31
+ const vars = results.reduce((acc, r) => {
32
+ acc[r.eval_id] = acc[r.eval_id] || [];
33
+ acc[r.eval_id].push(r.key);
34
+ return acc;
35
+ }, {});
36
+ return vars;
37
+ }
38
+ }
39
+ exports.EvalQueries = EvalQueries;
40
+ class Eval {
41
+ static async latest() {
42
+ const db = (0, database_1.getDb)();
43
+ const db_results = await db
44
+ .select({
45
+ id: tables_1.evalsTable.id,
46
+ })
47
+ .from(tables_1.evalsTable)
48
+ .orderBy((0, drizzle_orm_1.desc)(tables_1.evalsTable.createdAt))
49
+ .limit(1);
50
+ if (db_results.length === 0) {
51
+ return undefined;
52
+ }
53
+ return await Eval.findById(db_results[0].id);
54
+ }
55
+ static async findById(id) {
56
+ const db = (0, database_1.getDb)();
57
+ const { evals, datasetResults } = await db.transaction(async (tx) => {
58
+ const evals = await tx.select().from(tables_1.evalsTable).where((0, drizzle_orm_1.eq)(tables_1.evalsTable.id, id));
59
+ const datasetResults = await tx
60
+ .select({
61
+ datasetId: tables_1.evalsToDatasetsTable.datasetId,
62
+ })
63
+ .from(tables_1.evalsToDatasetsTable)
64
+ .where((0, drizzle_orm_1.eq)(tables_1.evalsToDatasetsTable.evalId, id))
65
+ .limit(1);
66
+ return { evals, datasetResults };
67
+ });
68
+ if (evals.length === 0) {
69
+ return undefined;
70
+ }
71
+ const eval_ = evals[0];
72
+ const datasetId = datasetResults[0]?.datasetId;
73
+ const evalInstance = new Eval(eval_.config, {
74
+ id: eval_.id,
75
+ createdAt: new Date(eval_.createdAt),
76
+ author: eval_.author || undefined,
77
+ description: eval_.description || undefined,
78
+ prompts: eval_.prompts || [],
79
+ datasetId,
80
+ persisted: true,
81
+ });
82
+ if (eval_.results && 'table' in eval_.results) {
83
+ evalInstance.oldResults = eval_.results;
84
+ }
85
+ return evalInstance;
86
+ }
87
+ static async getMany(limit = constants_1.DEFAULT_QUERY_LIMIT) {
88
+ const db = (0, database_1.getDb)();
89
+ const evals = await db
90
+ .select()
91
+ .from(tables_1.evalsTable)
92
+ .limit(limit)
93
+ .orderBy((0, drizzle_orm_1.desc)(tables_1.evalsTable.createdAt))
94
+ .all();
95
+ return evals.map((e) => new Eval(e.config, {
96
+ id: e.id,
97
+ createdAt: new Date(e.createdAt),
98
+ author: e.author || undefined,
99
+ description: e.description || undefined,
100
+ prompts: e.prompts || [],
101
+ persisted: true,
102
+ }));
103
+ }
104
+ static async create(config, renderedPrompts, // The config doesn't contain the actual prompts, so we need to pass them in separately
105
+ opts) {
106
+ const createdAt = opts?.createdAt || new Date();
107
+ const evalId = opts?.id || createEvalId(createdAt);
108
+ const db = (0, database_1.getDb)();
109
+ await db.transaction((tx) => {
110
+ tx.insert(tables_1.evalsTable)
111
+ .values({
112
+ id: evalId,
113
+ createdAt: createdAt.getTime(),
114
+ author: opts?.author,
115
+ description: config.description,
116
+ config,
117
+ results: {},
118
+ })
119
+ .run();
120
+ if (opts?.results) {
121
+ const res = tx
122
+ .insert(tables_1.evalResultsTable)
123
+ .values(opts.results?.map((r) => ({ ...r, evalId, id: (0, crypto_1.randomUUID)() })))
124
+ .run();
125
+ logger_1.default.debug(`Inserted ${res.changes} eval results`);
126
+ }
127
+ for (const prompt of renderedPrompts) {
128
+ const label = prompt.label || prompt.display || prompt.raw;
129
+ const promptId = (0, utils_1.hashPrompt)(prompt);
130
+ tx.insert(tables_1.promptsTable)
131
+ .values({
132
+ id: promptId,
133
+ prompt: label,
134
+ })
135
+ .onConflictDoNothing()
136
+ .run();
137
+ tx.insert(tables_1.evalsToPromptsTable)
138
+ .values({
139
+ evalId,
140
+ promptId,
141
+ })
142
+ .onConflictDoNothing()
143
+ .run();
144
+ logger_1.default.debug(`Inserting prompt ${promptId}`);
145
+ }
146
+ // Record dataset relation
147
+ const datasetId = (0, createHash_1.sha256)(JSON.stringify(config.tests || []));
148
+ tx.insert(tables_1.datasetsTable)
149
+ .values({
150
+ id: datasetId,
151
+ tests: config.tests,
152
+ })
153
+ .onConflictDoNothing()
154
+ .run();
155
+ tx.insert(tables_1.evalsToDatasetsTable)
156
+ .values({
157
+ evalId,
158
+ datasetId,
159
+ })
160
+ .onConflictDoNothing()
161
+ .run();
162
+ logger_1.default.debug(`Inserting dataset ${datasetId}`);
163
+ // Record tags
164
+ if (config.tags) {
165
+ for (const [tagKey, tagValue] of Object.entries(config.tags)) {
166
+ const tagId = (0, createHash_1.sha256)(`${tagKey}:${tagValue}`);
167
+ tx.insert(tables_1.tagsTable)
168
+ .values({
169
+ id: tagId,
170
+ name: tagKey,
171
+ value: tagValue,
172
+ })
173
+ .onConflictDoNothing()
174
+ .run();
175
+ tx.insert(tables_1.evalsToTagsTable)
176
+ .values({
177
+ evalId,
178
+ tagId,
179
+ })
180
+ .onConflictDoNothing()
181
+ .run();
182
+ logger_1.default.debug(`Inserting tag ${tagId}`);
183
+ }
184
+ }
185
+ });
186
+ return new Eval(config, { id: evalId, author: opts?.author, createdAt, persisted: true });
187
+ }
188
+ constructor(config, opts) {
189
+ const createdAt = opts?.createdAt || new Date();
190
+ this.createdAt = createdAt.getTime();
191
+ this.id = opts?.id || createEvalId(createdAt);
192
+ this.author = opts?.author;
193
+ this.config = config;
194
+ this.results = [];
195
+ this.prompts = opts?.prompts || [];
196
+ this.datasetId = opts?.datasetId;
197
+ this.persisted = opts?.persisted || false;
198
+ }
199
+ version() {
200
+ /**
201
+ * Version 3 is the denormalized version of where the table and results are stored on the eval object.
202
+ * Version 4 is the normalized version where the results are stored in another databse table and the table for vizualization is generated by the app.
203
+ */
204
+ return this.oldResults && 'table' in this.oldResults ? 3 : 4;
205
+ }
206
+ useOldResults() {
207
+ return this.version() < 4;
208
+ }
209
+ setTable(table) {
210
+ (0, tiny_invariant_1.default)(this.version() < 4, 'Eval is not version 3');
211
+ (0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
212
+ this.oldResults.table = table;
213
+ }
214
+ async save() {
215
+ const db = (0, database_1.getDb)();
216
+ const updateObj = {
217
+ config: this.config,
218
+ prompts: this.prompts,
219
+ description: this.config.description,
220
+ author: this.author,
221
+ updatedAt: (0, time_1.getCurrentTimestamp)(),
222
+ };
223
+ if (this.useOldResults()) {
224
+ (0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
225
+ updateObj.results = this.oldResults;
226
+ }
227
+ await db.update(tables_1.evalsTable).set(updateObj).where((0, drizzle_orm_1.eq)(tables_1.evalsTable.id, this.id)).run();
228
+ this.persisted = true;
229
+ }
230
+ async getVars() {
231
+ if (this.useOldResults()) {
232
+ (0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
233
+ return this.oldResults.table?.head.vars || [];
234
+ }
235
+ const db = (0, database_1.getDb)();
236
+ const query = (0, drizzle_orm_1.sql) `SELECT DISTINCT j.key from (SELECT json_extract(test_case_results.test_case, '$.vars') as vars
237
+ FROM test_case_results where test_case_results.eval_id = ${this.id}) t, json_each(t.vars) j;`;
238
+ // @ts-ignore
239
+ const results = await db.all(query);
240
+ return results.map((r) => r.key) || [];
241
+ }
242
+ getPrompts() {
243
+ if (this.useOldResults()) {
244
+ (0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
245
+ return this.oldResults.table?.head.prompts || [];
246
+ }
247
+ return this.prompts;
248
+ }
249
+ async getTable() {
250
+ if (this.useOldResults()) {
251
+ return this.oldResults?.table || { head: { prompts: [], vars: [] }, body: [] };
252
+ }
253
+ return (0, convertEvalResultsToTable_1.convertResultsToTable)(await this.toResultsFile());
254
+ }
255
+ async addResult(result, test) {
256
+ const newResult = await evalResult_1.default.createFromEvaluateResult(this.id, result, test, {
257
+ persist: this.persisted,
258
+ });
259
+ this.results.push(newResult);
260
+ }
261
+ async addPrompts(prompts) {
262
+ this.prompts = prompts;
263
+ if (this.persisted) {
264
+ const db = (0, database_1.getDb)();
265
+ await db.update(tables_1.evalsTable).set({ prompts }).where((0, drizzle_orm_1.eq)(tables_1.evalsTable.id, this.id)).run();
266
+ }
267
+ }
268
+ async addProviders(providers) {
269
+ if (this.persisted) {
270
+ const db = (0, database_1.getDb)();
271
+ await db.transaction(async (tx) => {
272
+ for (const provider of providers) {
273
+ const id = provider.id;
274
+ tx.insert(tables_1.evalsToProvidersTable)
275
+ .values({
276
+ evalId: this.id,
277
+ providerId: id,
278
+ })
279
+ .onConflictDoNothing()
280
+ .run();
281
+ }
282
+ });
283
+ }
284
+ }
285
+ async loadResults() {
286
+ this.results = await evalResult_1.default.findManyByEvalId(this.id);
287
+ }
288
+ async getResults() {
289
+ if (this.useOldResults()) {
290
+ (0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
291
+ return this.oldResults.results;
292
+ }
293
+ await this.loadResults();
294
+ return this.results;
295
+ }
296
+ async toEvaluateSummary() {
297
+ if (this.useOldResults()) {
298
+ (0, tiny_invariant_1.default)(this.oldResults, 'Old results not found');
299
+ return {
300
+ version: 2,
301
+ timestamp: new Date(this.createdAt).toISOString(),
302
+ results: this.oldResults.results,
303
+ table: this.oldResults.table,
304
+ stats: this.oldResults.stats,
305
+ };
306
+ }
307
+ if (this.results.length === 0) {
308
+ await this.loadResults();
309
+ }
310
+ const stats = {
311
+ successes: 0,
312
+ failures: 0,
313
+ tokenUsage: {
314
+ cached: 0,
315
+ completion: 0,
316
+ prompt: 0,
317
+ total: 0,
318
+ },
319
+ };
320
+ for (const prompt of this.prompts) {
321
+ stats.successes += prompt.metrics?.testPassCount || 0;
322
+ stats.failures += prompt.metrics?.testFailCount || 0;
323
+ stats.tokenUsage.prompt += prompt.metrics?.tokenUsage.prompt || 0;
324
+ stats.tokenUsage.cached += prompt.metrics?.tokenUsage.cached || 0;
325
+ stats.tokenUsage.completion += prompt.metrics?.tokenUsage.completion || 0;
326
+ stats.tokenUsage.total += prompt.metrics?.tokenUsage.total || 0;
327
+ }
328
+ return {
329
+ version: 3,
330
+ timestamp: new Date(this.createdAt).toISOString(),
331
+ prompts: this.prompts,
332
+ results: this.results.map((r) => r.toEvaluateResult()),
333
+ stats,
334
+ };
335
+ }
336
+ async toResultsFile() {
337
+ const results = {
338
+ version: this.version(),
339
+ createdAt: new Date(this.createdAt).toISOString(),
340
+ results: await this.toEvaluateSummary(),
341
+ config: this.config,
342
+ author: this.author || null,
343
+ prompts: this.getPrompts(),
344
+ datasetId: this.datasetId || null,
345
+ };
346
+ return results;
347
+ }
348
+ async delete() {
349
+ const db = (0, database_1.getDb)();
350
+ await db.transaction(() => {
351
+ db.delete(tables_1.evalsToDatasetsTable).where((0, drizzle_orm_1.eq)(tables_1.evalsToDatasetsTable.evalId, this.id)).run();
352
+ db.delete(tables_1.evalsToPromptsTable).where((0, drizzle_orm_1.eq)(tables_1.evalsToPromptsTable.evalId, this.id)).run();
353
+ db.delete(tables_1.evalsToTagsTable).where((0, drizzle_orm_1.eq)(tables_1.evalsToTagsTable.evalId, this.id)).run();
354
+ db.delete(tables_1.evalsToProvidersTable).where((0, drizzle_orm_1.eq)(tables_1.evalsToProvidersTable.evalId, this.id)).run();
355
+ db.delete(tables_1.evalResultsTable).where((0, drizzle_orm_1.eq)(tables_1.evalResultsTable.evalId, this.id)).run();
356
+ db.delete(tables_1.evalsTable).where((0, drizzle_orm_1.eq)(tables_1.evalsTable.id, this.id)).run();
357
+ });
358
+ }
359
+ }
360
+ exports.default = Eval;
361
+ async function getSummaryofLatestEvals(limit = constants_1.DEFAULT_QUERY_LIMIT, filterDescription, datasetId) {
362
+ const db = (0, database_1.getDb)();
363
+ const startTime = performance.now();
364
+ const query = db
365
+ .select({
366
+ evalId: tables_1.evalsTable.id,
367
+ createdAt: tables_1.evalsTable.createdAt,
368
+ description: tables_1.evalsTable.description,
369
+ numTests: (0, drizzle_orm_1.sql) `COUNT(DISTINCT ${tables_1.evalResultsTable.testIdx})`.as('numTests'),
370
+ datasetId: tables_1.evalsToDatasetsTable.datasetId,
371
+ })
372
+ .from(tables_1.evalsTable)
373
+ .leftJoin(tables_1.evalsToDatasetsTable, (0, drizzle_orm_1.eq)(tables_1.evalsTable.id, tables_1.evalsToDatasetsTable.evalId))
374
+ .leftJoin(tables_1.evalResultsTable, (0, drizzle_orm_1.eq)(tables_1.evalsTable.id, tables_1.evalResultsTable.evalId))
375
+ .where((0, drizzle_orm_1.and)(datasetId ? (0, drizzle_orm_1.eq)(tables_1.evalsToDatasetsTable.datasetId, datasetId) : undefined, filterDescription ? (0, drizzle_orm_1.like)(tables_1.evalsTable.description, `%${filterDescription}%`) : undefined, (0, drizzle_orm_1.eq)(tables_1.evalsTable.results, {})))
376
+ .groupBy(tables_1.evalsTable.id);
377
+ const results = query.orderBy((0, drizzle_orm_1.desc)(tables_1.evalsTable.createdAt)).limit(limit).all();
378
+ const mappedResults = results.map((result) => ({
379
+ evalId: result.evalId,
380
+ createdAt: result.createdAt,
381
+ description: result.description,
382
+ numTests: result.numTests || 0,
383
+ datasetId: result.datasetId,
384
+ }));
385
+ const endTime = performance.now();
386
+ const executionTime = endTime - startTime;
387
+ logger_1.default.debug(`listPreviousResults execution time: ${executionTime.toFixed(2)}ms`);
388
+ return mappedResults;
389
+ }
390
+ //# sourceMappingURL=eval.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"eval.js","sourceRoot":"","sources":["../../../src/models/eval.ts"],"names":[],"mappings":";;;;;;AAoCA,oCAEC;AAkaD,0DA0CC;AAlfD,mCAAoC;AACpC,6CAAuD;AACvD,oEAAuC;AACvC,4CAAmD;AACnD,0CAAoC;AACpC,+CAU4B;AAC5B,uDAA+B;AAC/B,4CAA8C;AAa9C,iFAA0E;AAC1E,mDAA4D;AAC5D,uCAAmD;AACnD,8DAAsC;AAGtC,SAAgB,YAAY,CAAC,YAAkB,IAAI,IAAI,EAAE;IACvD,OAAO,QAAQ,IAAA,2BAAc,EAAC,CAAC,CAAC,IAAI,SAAS,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;AAC7E,CAAC;AAED,MAAa,WAAW;IACtB,MAAM,CAAC,KAAK,CAAC,gBAAgB,CAAC,KAAa;QACzC,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,KAAK,GAAG,iBAAG,CAAC,GAAG,CACnB;sCACgC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,4BAA4B,CACpG,CAAC;QACF,aAAa;QACb,MAAM,OAAO,GAAuC,MAAM,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QACxE,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,GAA6B,EAAE,CAAC,EAAE,EAAE;YAC/D,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;YACtC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAC3B,OAAO,GAAG,CAAC;QACb,CAAC,EAAE,EAAE,CAAC,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AAhBD,kCAgBC;AAED,MAAqB,IAAI;IAavB,MAAM,CAAC,KAAK,CAAC,MAAM;QACjB,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,UAAU,GAAG,MAAM,EAAE;aACxB,MAAM,CAAC;YACN,EAAE,EAAE,mBAAU,CAAC,EAAE;SAClB,CAAC;aACD,IAAI,CAAC,mBAAU,CAAC;aAChB,OAAO,CAAC,IAAA,kBAAI,EAAC,mBAAU,CAAC,SAAS,CAAC,CAAC;aACnC,KAAK,CAAC,CAAC,CAAC,CAAC;QAEZ,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,SAAS,CAAC;QACnB,CAAC;QAED,OAAO,MAAM,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;IAC/C,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAU;QAC9B,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QAEnB,MAAM,EAAE,KAAK,EAAE,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;YAClE,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,mBAAU,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,mBAAU,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC;YAC9E,MAAM,cAAc,GAAG,MAAM,EAAE;iBAC5B,MAAM,CAAC;gBACN,SAAS,EAAE,6BAAoB,CAAC,SAAS;aAC1C,CAAC;iBACD,IAAI,CAAC,6BAAoB,CAAC;iBAC1B,KAAK,CAAC,IAAA,gBAAE,EAAC,6BAAoB,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;iBAC1C,KAAK,CAAC,CAAC,CAAC,CAAC;YAEZ,OAAO,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC;QACnC,CAAC,CAAC,CAAC;QAEH,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,SAAS,CAAC;QACnB,CAAC;QACD,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEvB,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC;QAE/C,MAAM,YAAY,GAAG,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE;YAC1C,EAAE,EAAE,KAAK,CAAC,EAAE;YACZ,SAAS,EAAE,IAAI,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC;YACpC,MAAM,EAAE,KAAK,CAAC,MAAM,IAAI,SAAS;YACjC,WAAW,EAAE,KAAK,CAAC,WAAW,IAAI,SAAS;YAC3C,OAAO,EAAE,KAAK,CAAC,OAAO,IAAI,EAAE;YAC5B,SAAS;YACT,SAAS,EAAE,IAAI;SAChB,CAAC,CAAC;QACH,IAAI,KAAK,CAAC,OAAO,IAAI,OAAO,IAAI,KAAK,CAAC,OAAO,EAAE,CAAC;YAC9C,YAAY,CAAC,UAAU,GAAG,KAAK,CAAC,OAA4B,CAAC;QAC/D,CAAC;QAED,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,QAAgB,+BAAmB;QACtD,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,KAAK,GAAG,MAAM,EAAE;aACnB,MAAM,EAAE;aACR,IAAI,CAAC,mBAAU,CAAC;aAChB,KAAK,CAAC,KAAK,CAAC;aACZ,OAAO,CAAC,IAAA,kBAAI,EAAC,mBAAU,CAAC,SAAS,CAAC,CAAC;aACnC,GAAG,EAAE,CAAC;QACT,OAAO,KAAK,CAAC,GAAG,CACd,CAAC,CAAC,EAAE,EAAE,CACJ,IAAI,IAAI,CAAC,CAAC,CAAC,MAAM,EAAE;YACjB,EAAE,EAAE,CAAC,CAAC,EAAE;YACR,SAAS,EAAE,IAAI,IAAI,CAAC,CAAC,CAAC,SAAS,CAAC;YAChC,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,SAAS;YAC7B,WAAW,EAAE,CAAC,CAAC,WAAW,IAAI,SAAS;YACvC,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE;YACxB,SAAS,EAAE,IAAI;SAChB,CAAC,CACL,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,MAAM,CACjB,MAA8B,EAC9B,eAAyB,EAAE,uFAAuF;IAClH,IAMC;QAED,MAAM,SAAS,GAAG,IAAI,EAAE,SAAS,IAAI,IAAI,IAAI,EAAE,CAAC;QAChD,MAAM,MAAM,GAAG,IAAI,EAAE,EAAE,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC;QACnD,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,EAAE,CAAC,WAAW,CAAC,CAAC,EAAE,EAAE,EAAE;YAC1B,EAAE,CAAC,MAAM,CAAC,mBAAU,CAAC;iBAClB,MAAM,CAAC;gBACN,EAAE,EAAE,MAAM;gBACV,SAAS,EAAE,SAAS,CAAC,OAAO,EAAE;gBAC9B,MAAM,EAAE,IAAI,EAAE,MAAM;gBACpB,WAAW,EAAE,MAAM,CAAC,WAAW;gBAC/B,MAAM;gBACN,OAAO,EAAE,EAAE;aACZ,CAAC;iBACD,GAAG,EAAE,CAAC;YACT,IAAI,IAAI,EAAE,OAAO,EAAE,CAAC;gBAClB,MAAM,GAAG,GAAG,EAAE;qBACX,MAAM,CAAC,yBAAgB,CAAC;qBACxB,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,MAAM,EAAE,EAAE,EAAE,IAAA,mBAAU,GAAE,EAAE,CAAC,CAAC,CAAC;qBACtE,GAAG,EAAE,CAAC;gBACT,gBAAM,CAAC,KAAK,CAAC,YAAY,GAAG,CAAC,OAAO,eAAe,CAAC,CAAC;YACvD,CAAC;YAED,KAAK,MAAM,MAAM,IAAI,eAAe,EAAE,CAAC;gBACrC,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,GAAG,CAAC;gBAC3D,MAAM,QAAQ,GAAG,IAAA,kBAAU,EAAC,MAAM,CAAC,CAAC;gBAEpC,EAAE,CAAC,MAAM,CAAC,qBAAY,CAAC;qBACpB,MAAM,CAAC;oBACN,EAAE,EAAE,QAAQ;oBACZ,MAAM,EAAE,KAAK;iBACd,CAAC;qBACD,mBAAmB,EAAE;qBACrB,GAAG,EAAE,CAAC;gBAET,EAAE,CAAC,MAAM,CAAC,4BAAmB,CAAC;qBAC3B,MAAM,CAAC;oBACN,MAAM;oBACN,QAAQ;iBACT,CAAC;qBACD,mBAAmB,EAAE;qBACrB,GAAG,EAAE,CAAC;gBAET,gBAAM,CAAC,KAAK,CAAC,oBAAoB,QAAQ,EAAE,CAAC,CAAC;YAC/C,CAAC;YAED,0BAA0B;YAC1B,MAAM,SAAS,GAAG,IAAA,mBAAM,EAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,CAAC;YAC7D,EAAE,CAAC,MAAM,CAAC,sBAAa,CAAC;iBACrB,MAAM,CAAC;gBACN,EAAE,EAAE,SAAS;gBACb,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC;iBACD,mBAAmB,EAAE;iBACrB,GAAG,EAAE,CAAC;YAET,EAAE,CAAC,MAAM,CAAC,6BAAoB,CAAC;iBAC5B,MAAM,CAAC;gBACN,MAAM;gBACN,SAAS;aACV,CAAC;iBACD,mBAAmB,EAAE;iBACrB,GAAG,EAAE,CAAC;YAET,gBAAM,CAAC,KAAK,CAAC,qBAAqB,SAAS,EAAE,CAAC,CAAC;YAE/C,cAAc;YACd,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;gBAChB,KAAK,MAAM,CAAC,MAAM,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC7D,MAAM,KAAK,GAAG,IAAA,mBAAM,EAAC,GAAG,MAAM,IAAI,QAAQ,EAAE,CAAC,CAAC;oBAE9C,EAAE,CAAC,MAAM,CAAC,kBAAS,CAAC;yBACjB,MAAM,CAAC;wBACN,EAAE,EAAE,KAAK;wBACT,IAAI,EAAE,MAAM;wBACZ,KAAK,EAAE,QAAQ;qBAChB,CAAC;yBACD,mBAAmB,EAAE;yBACrB,GAAG,EAAE,CAAC;oBAET,EAAE,CAAC,MAAM,CAAC,yBAAgB,CAAC;yBACxB,MAAM,CAAC;wBACN,MAAM;wBACN,KAAK;qBACN,CAAC;yBACD,mBAAmB,EAAE;yBACrB,GAAG,EAAE,CAAC;oBAET,gBAAM,CAAC,KAAK,CAAC,iBAAiB,KAAK,EAAE,CAAC,CAAC;gBACzC,CAAC;YACH,CAAC;QACH,CAAC,CAAC,CAAC;QACH,OAAO,IAAI,IAAI,CAAC,MAAM,EAAE,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5F,CAAC;IAED,YACE,MAA8B,EAC9B,IAQC;QAED,MAAM,SAAS,GAAG,IAAI,EAAE,SAAS,IAAI,IAAI,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,OAAO,EAAE,CAAC;QACrC,IAAI,CAAC,EAAE,GAAG,IAAI,EAAE,EAAE,IAAI,YAAY,CAAC,SAAS,CAAC,CAAC;QAC9C,IAAI,CAAC,MAAM,GAAG,IAAI,EAAE,MAAM,CAAC;QAC3B,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QACrB,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC;QAClB,IAAI,CAAC,OAAO,GAAG,IAAI,EAAE,OAAO,IAAI,EAAE,CAAC;QACnC,IAAI,CAAC,SAAS,GAAG,IAAI,EAAE,SAAS,CAAC;QACjC,IAAI,CAAC,SAAS,GAAG,IAAI,EAAE,SAAS,IAAI,KAAK,CAAC;IAC5C,CAAC;IAED,OAAO;QACL;;;WAGG;QACH,OAAO,IAAI,CAAC,UAAU,IAAI,OAAO,IAAI,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC/D,CAAC;IAED,aAAa;QACX,OAAO,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;IAC5B,CAAC;IAED,QAAQ,CAAC,KAAoB;QAC3B,IAAA,wBAAS,EAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,EAAE,uBAAuB,CAAC,CAAC;QACvD,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;QACpD,IAAI,CAAC,UAAU,CAAC,KAAK,GAAG,KAAK,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,SAAS,GAAwB;YACrC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,WAAW;YACpC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,SAAS,EAAE,IAAA,0BAAmB,GAAE;SACjC,CAAC;QAEF,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,SAAS,CAAC,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC;QACtC,CAAC;QACD,MAAM,EAAE,CAAC,MAAM,CAAC,mBAAU,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,mBAAU,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QACnF,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;IACxB,CAAC;IAED,KAAK,CAAC,OAAO;QACX,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QAChD,CAAC;QACD,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,KAAK,GAAG,IAAA,iBAAG,EAAA;+DAC0C,IAAI,CAAC,EAAE,2BAA2B,CAAC;QAC9F,aAAa;QACb,MAAM,OAAO,GAAsB,MAAM,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;QAEvD,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;IACzC,CAAC;IAED,UAAU;QACR,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,OAAO,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC;QACnD,CAAC;QACD,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IAED,KAAK,CAAC,QAAQ;QACZ,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC,UAAU,EAAE,KAAK,IAAI,EAAE,IAAI,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;QACjF,CAAC;QACD,OAAO,IAAA,iDAAqB,EAAC,MAAM,IAAI,CAAC,aAAa,EAAE,CAAC,CAAC;IAC3D,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,MAAsB,EAAE,IAAoB;QAC1D,MAAM,SAAS,GAAG,MAAM,oBAAU,CAAC,wBAAwB,CAAC,IAAI,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE;YACjF,OAAO,EAAE,IAAI,CAAC,SAAS;SACxB,CAAC,CAAC;QACH,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,OAA0B;QACzC,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;YACnB,MAAM,EAAE,CAAC,MAAM,CAAC,mBAAU,CAAC,CAAC,GAAG,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,mBAAU,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QACvF,CAAC;IACH,CAAC;IAED,KAAK,CAAC,YAAY,CAAC,SAAqB;QACtC,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;YACnB,MAAM,EAAE,CAAC,WAAW,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE;gBAChC,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;oBACjC,MAAM,EAAE,GAAG,QAAQ,CAAC,EAAE,CAAC;oBACvB,EAAE,CAAC,MAAM,CAAC,8BAAqB,CAAC;yBAC7B,MAAM,CAAC;wBACN,MAAM,EAAE,IAAI,CAAC,EAAE;wBACf,UAAU,EAAE,EAAE;qBACf,CAAC;yBACD,mBAAmB,EAAE;yBACrB,GAAG,EAAE,CAAC;gBACX,CAAC;YACH,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,KAAK,CAAC,WAAW;QACf,IAAI,CAAC,OAAO,GAAG,MAAM,oBAAU,CAAC,gBAAgB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC5D,CAAC;IAED,KAAK,CAAC,UAAU;QACd,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,OAAO,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC;QACjC,CAAC;QACD,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC,OAAO,CAAC;IACtB,CAAC;IACD,KAAK,CAAC,iBAAiB;QACrB,IAAI,IAAI,CAAC,aAAa,EAAE,EAAE,CAAC;YACzB,IAAA,wBAAS,EAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;YACpD,OAAO;gBACL,OAAO,EAAE,CAAC;gBACV,SAAS,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE;gBACjD,OAAO,EAAE,IAAI,CAAC,UAAU,CAAC,OAAO;gBAChC,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK;gBAC5B,KAAK,EAAE,IAAI,CAAC,UAAU,CAAC,KAAK;aAC7B,CAAC;QACJ,CAAC;QACD,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC9B,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;QAC3B,CAAC;QACD,MAAM,KAAK,GAAkB;YAC3B,SAAS,EAAE,CAAC;YACZ,QAAQ,EAAE,CAAC;YACX,UAAU,EAAE;gBACV,MAAM,EAAE,CAAC;gBACT,UAAU,EAAE,CAAC;gBACb,MAAM,EAAE,CAAC;gBACT,KAAK,EAAE,CAAC;aACT;SACF,CAAC;QAEF,KAAK,MAAM,MAAM,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YAClC,KAAK,CAAC,SAAS,IAAI,MAAM,CAAC,OAAO,EAAE,aAAa,IAAI,CAAC,CAAC;YACtD,KAAK,CAAC,QAAQ,IAAI,MAAM,CAAC,OAAO,EAAE,aAAa,IAAI,CAAC,CAAC;YACrD,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC;YAClE,KAAK,CAAC,UAAU,CAAC,MAAM,IAAI,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,MAAM,IAAI,CAAC,CAAC;YAClE,KAAK,CAAC,UAAU,CAAC,UAAU,IAAI,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,UAAU,IAAI,CAAC,CAAC;YAC1E,KAAK,CAAC,UAAU,CAAC,KAAK,IAAI,MAAM,CAAC,OAAO,EAAE,UAAU,CAAC,KAAK,IAAI,CAAC,CAAC;QAClE,CAAC;QAED,OAAO;YACL,OAAO,EAAE,CAAC;YACV,SAAS,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE;YACjD,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,gBAAgB,EAAE,CAAC;YACtD,KAAK;SACN,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,aAAa;QACjB,MAAM,OAAO,GAAgB;YAC3B,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE;YACvB,SAAS,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE;YACjD,OAAO,EAAE,MAAM,IAAI,CAAC,iBAAiB,EAAE;YACvC,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI;YAC3B,OAAO,EAAE,IAAI,CAAC,UAAU,EAAE;YAC1B,SAAS,EAAE,IAAI,CAAC,SAAS,IAAI,IAAI;SAClC,CAAC;QAEF,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,KAAK,CAAC,MAAM;QACV,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;QACnB,MAAM,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;YACxB,EAAE,CAAC,MAAM,CAAC,6BAAoB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,6BAAoB,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YACtF,EAAE,CAAC,MAAM,CAAC,4BAAmB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,4BAAmB,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YACpF,EAAE,CAAC,MAAM,CAAC,yBAAgB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,yBAAgB,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YAC9E,EAAE,CAAC,MAAM,CAAC,8BAAqB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,8BAAqB,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YACxF,EAAE,CAAC,MAAM,CAAC,yBAAgB,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,yBAAgB,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;YAC9E,EAAE,CAAC,MAAM,CAAC,mBAAU,CAAC,CAAC,KAAK,CAAC,IAAA,gBAAE,EAAC,mBAAU,CAAC,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC;QAChE,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AA5YD,uBA4YC;AAEM,KAAK,UAAU,uBAAuB,CAC3C,QAAgB,+BAAmB,EACnC,iBAA0B,EAC1B,SAAkB;IAElB,MAAM,EAAE,GAAG,IAAA,gBAAK,GAAE,CAAC;IACnB,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IACpC,MAAM,KAAK,GAAG,EAAE;SACb,MAAM,CAAC;QACN,MAAM,EAAE,mBAAU,CAAC,EAAE;QACrB,SAAS,EAAE,mBAAU,CAAC,SAAS;QAC/B,WAAW,EAAE,mBAAU,CAAC,WAAW;QACnC,QAAQ,EAAE,IAAA,iBAAG,EAAA,kBAAkB,yBAAgB,CAAC,OAAO,GAAG,CAAC,EAAE,CAAC,UAAU,CAAC;QACzE,SAAS,EAAE,6BAAoB,CAAC,SAAS;KAC1C,CAAC;SACD,IAAI,CAAC,mBAAU,CAAC;SAChB,QAAQ,CAAC,6BAAoB,EAAE,IAAA,gBAAE,EAAC,mBAAU,CAAC,EAAE,EAAE,6BAAoB,CAAC,MAAM,CAAC,CAAC;SAC9E,QAAQ,CAAC,yBAAgB,EAAE,IAAA,gBAAE,EAAC,mBAAU,CAAC,EAAE,EAAE,yBAAgB,CAAC,MAAM,CAAC,CAAC;SACtE,KAAK,CACJ,IAAA,iBAAG,EACD,SAAS,CAAC,CAAC,CAAC,IAAA,gBAAE,EAAC,6BAAoB,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,EACrE,iBAAiB,CAAC,CAAC,CAAC,IAAA,kBAAI,EAAC,mBAAU,CAAC,WAAW,EAAE,IAAI,iBAAiB,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,EACtF,IAAA,gBAAE,EAAC,mBAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAC3B,CACF;SACA,OAAO,CAAC,mBAAU,CAAC,EAAE,CAAC,CAAC;IAE1B,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,IAAA,kBAAI,EAAC,mBAAU,CAAC,SAAS,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,CAAC;IAE7E,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;QAC7C,MAAM,EAAE,MAAM,CAAC,MAAM;QACrB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,QAAQ,EAAG,MAAM,CAAC,QAAmB,IAAI,CAAC;QAC1C,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC,CAAC,CAAC;IAEJ,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAClC,MAAM,aAAa,GAAG,OAAO,GAAG,SAAS,CAAC;IAC1C,gBAAM,CAAC,KAAK,CAAC,uCAAuC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAElF,OAAO,aAAa,CAAC;AACvB,CAAC"}
@@ -0,0 +1,50 @@
1
+ import type { AtomicTestCase, GradingResult, Prompt, ProviderOptions, ProviderResponse } from '../types';
2
+ import { type EvaluateResult } from '../types';
3
+ export default class EvalResult {
4
+ static createFromEvaluateResult(evalId: string, result: EvaluateResult, testCase: AtomicTestCase, opts?: {
5
+ persist: boolean;
6
+ }): Promise<EvalResult>;
7
+ static createManyFromEvaluateResult(results: EvaluateResult[], evalId: string): Promise<EvalResult[]>;
8
+ static findById(id: string): Promise<EvalResult | null>;
9
+ static findManyByEvalId(evalId: string): Promise<EvalResult[]>;
10
+ id: string;
11
+ evalId: string;
12
+ description?: string | null;
13
+ promptIdx: number;
14
+ testIdx: number;
15
+ testCase: AtomicTestCase;
16
+ prompt: Prompt;
17
+ promptId: string;
18
+ error?: string | null;
19
+ success: boolean;
20
+ score: number;
21
+ response: ProviderResponse | null;
22
+ gradingResult: GradingResult | null;
23
+ namedScores: Record<string, number>;
24
+ provider: ProviderOptions;
25
+ latencyMs: number;
26
+ cost: number;
27
+ persisted: boolean;
28
+ constructor(opts: {
29
+ id: string;
30
+ evalId: string;
31
+ promptIdx: number;
32
+ testIdx: number;
33
+ testCase: AtomicTestCase;
34
+ prompt: Prompt;
35
+ promptId?: string | null;
36
+ error?: string | null;
37
+ success: boolean;
38
+ score: number;
39
+ response: ProviderResponse | null;
40
+ gradingResult: GradingResult | null;
41
+ namedScores?: Record<string, number> | null;
42
+ provider: ProviderOptions;
43
+ latencyMs?: number | null;
44
+ cost?: number | null;
45
+ persisted?: boolean;
46
+ });
47
+ save(): Promise<void>;
48
+ toEvaluateResult(): EvaluateResult;
49
+ }
50
+ //# sourceMappingURL=evalResult.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"evalResult.d.ts","sourceRoot":"","sources":["../../../src/models/evalResult.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EACV,cAAc,EACd,aAAa,EACb,MAAM,EACN,eAAe,EACf,gBAAgB,EACjB,MAAM,UAAU,CAAC;AAClB,OAAO,EAAE,KAAK,cAAc,EAAE,MAAM,UAAU,CAAC;AAG/C,MAAM,CAAC,OAAO,OAAO,UAAU;WAChB,wBAAwB,CACnC,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,cAAc,EACtB,QAAQ,EAAE,cAAc,EACxB,IAAI,CAAC,EAAE;QAAE,OAAO,EAAE,OAAO,CAAA;KAAE;WAgChB,4BAA4B,CAAC,OAAO,EAAE,cAAc,EAAE,EAAE,MAAM,EAAE,MAAM;WAetE,QAAQ,CAAC,EAAE,EAAE,MAAM;WAMnB,gBAAgB,CAAC,MAAM,EAAE,MAAM;IAS5C,EAAE,EAAE,MAAM,CAAC;IACX,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IAC5B,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,cAAc,CAAC;IACzB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACtB,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,QAAQ,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAClC,aAAa,EAAE,aAAa,GAAG,IAAI,CAAC;IACpC,WAAW,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACpC,QAAQ,EAAE,eAAe,CAAC;IAC1B,SAAS,EAAE,MAAM,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,OAAO,CAAC;gBAEP,IAAI,EAAE;QAChB,EAAE,EAAE,MAAM,CAAC;QACX,MAAM,EAAE,MAAM,CAAC;QACf,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,MAAM,CAAC;QAChB,QAAQ,EAAE,cAAc,CAAC;QACzB,MAAM,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACzB,KAAK,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACtB,OAAO,EAAE,OAAO,CAAC;QACjB,KAAK,EAAE,MAAM,CAAC;QACd,QAAQ,EAAE,gBAAgB,GAAG,IAAI,CAAC;QAClC,aAAa,EAAE,aAAa,GAAG,IAAI,CAAC;QACpC,WAAW,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,IAAI,CAAC;QAC5C,QAAQ,EAAE,eAAe,CAAC;QAC1B,SAAS,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QAC1B,IAAI,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;QACrB,SAAS,CAAC,EAAE,OAAO,CAAC;KACrB;IAqBK,IAAI;IAeV,gBAAgB,IAAI,cAAc;CAqBnC"}