promptfoo 0.50.0 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/dist/package.json +2 -2
  2. package/dist/src/__mocks__/database.js +18 -6
  3. package/dist/src/__mocks__/database.js.map +1 -1
  4. package/dist/src/assertions.d.ts.map +1 -1
  5. package/dist/src/assertions.js +35 -38
  6. package/dist/src/assertions.js.map +1 -1
  7. package/dist/src/cliState.d.ts +6 -0
  8. package/dist/src/cliState.d.ts.map +1 -0
  9. package/dist/src/cliState.js +5 -0
  10. package/dist/src/cliState.js.map +1 -0
  11. package/dist/src/commands/list.d.ts.map +1 -1
  12. package/dist/src/commands/list.js.map +1 -1
  13. package/dist/src/constants.d.ts.map +1 -1
  14. package/dist/src/constants.js +2 -2
  15. package/dist/src/constants.js.map +1 -1
  16. package/dist/src/database.d.ts.map +1 -1
  17. package/dist/src/database.js +9 -3
  18. package/dist/src/database.js.map +1 -1
  19. package/dist/src/esm.d.ts.map +1 -1
  20. package/dist/src/esm.js +3 -2
  21. package/dist/src/esm.js.map +1 -1
  22. package/dist/src/evaluator.d.ts +3 -2
  23. package/dist/src/evaluator.d.ts.map +1 -1
  24. package/dist/src/evaluator.js +23 -8
  25. package/dist/src/evaluator.js.map +1 -1
  26. package/dist/src/main.js +7 -4
  27. package/dist/src/main.js.map +1 -1
  28. package/dist/src/prompts.d.ts.map +1 -1
  29. package/dist/src/prompts.js +15 -2
  30. package/dist/src/prompts.js.map +1 -1
  31. package/dist/src/providers/anthropic.d.ts +1 -7
  32. package/dist/src/providers/anthropic.d.ts.map +1 -1
  33. package/dist/src/providers/anthropic.js +18 -5
  34. package/dist/src/providers/anthropic.js.map +1 -1
  35. package/dist/src/providers/azureopenai.d.ts +2 -0
  36. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  37. package/dist/src/providers/azureopenai.js +9 -1
  38. package/dist/src/providers/azureopenai.js.map +1 -1
  39. package/dist/src/providers/mistral.d.ts.map +1 -1
  40. package/dist/src/providers/mistral.js +4 -1
  41. package/dist/src/providers/mistral.js.map +1 -1
  42. package/dist/src/providers/openai.d.ts.map +1 -1
  43. package/dist/src/providers/openai.js +4 -1
  44. package/dist/src/providers/openai.js.map +1 -1
  45. package/dist/src/providers/pythonCompletion.d.ts +1 -0
  46. package/dist/src/providers/pythonCompletion.d.ts.map +1 -1
  47. package/dist/src/providers/pythonCompletion.js +5 -2
  48. package/dist/src/providers/pythonCompletion.js.map +1 -1
  49. package/dist/src/python/wrapper.d.ts +3 -1
  50. package/dist/src/python/wrapper.d.ts.map +1 -1
  51. package/dist/src/python/wrapper.js +9 -3
  52. package/dist/src/python/wrapper.js.map +1 -1
  53. package/dist/src/testCases.d.ts.map +1 -1
  54. package/dist/src/testCases.js +6 -2
  55. package/dist/src/testCases.js.map +1 -1
  56. package/dist/src/types.d.ts +1 -1
  57. package/dist/src/types.d.ts.map +1 -1
  58. package/dist/src/util.d.ts +8 -2
  59. package/dist/src/util.d.ts.map +1 -1
  60. package/dist/src/util.js +42 -4
  61. package/dist/src/util.js.map +1 -1
  62. package/dist/src/web/nextui/404/index.html +1 -1
  63. package/dist/src/web/nextui/404.html +1 -1
  64. package/dist/src/web/nextui/_next/static/chunks/113-1056b5f87446395e.js +1 -0
  65. package/dist/src/web/nextui/_next/static/chunks/261-1bc0898df259d0fd.js +1 -0
  66. package/dist/src/web/nextui/_next/static/chunks/414-51e6cdc7aba4bb24.js +2 -0
  67. package/dist/src/web/nextui/_next/static/chunks/{293-fa7b9b02f62ab5d8.js → 547-00556ed98de9671b.js} +2 -2
  68. package/dist/src/web/nextui/_next/static/chunks/{376-1c0d6e6983c73fe2.js → 580-cccb247de1b7350b.js} +2 -2
  69. package/dist/src/web/nextui/_next/static/chunks/602-fce79bd3bd7891e9.js +1 -0
  70. package/dist/src/web/nextui/_next/static/chunks/609-483decb66cf4cfa8.js +44 -0
  71. package/dist/src/web/nextui/_next/static/chunks/670-57b040ef305d13be.js +1 -0
  72. package/dist/src/web/nextui/_next/static/chunks/680-aa07f4d4d0312894.js +7 -0
  73. package/dist/src/web/nextui/_next/static/chunks/72-202cb66305c1995a.js +1 -0
  74. package/dist/src/web/nextui/_next/static/chunks/840-854b7ee26e0beb0b.js +1 -0
  75. package/dist/src/web/nextui/_next/static/chunks/app/auth/login/page-2daaaf4e1f6912b3.js +1 -0
  76. package/dist/src/web/nextui/_next/static/chunks/app/auth/signup/page-23ca04075bd65316.js +1 -0
  77. package/dist/src/web/nextui/_next/static/chunks/app/datasets/{page-ad55f89d622ef8e7.js → page-b348c3d000a051ae.js} +1 -1
  78. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/{page-b7184244049e5915.js → page-6f275364ed1179d3.js} +1 -1
  79. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-7a1f6ddb7c8a0989.js +1 -0
  80. package/dist/src/web/nextui/_next/static/chunks/app/layout-6bdc710ec6575432.js +1 -0
  81. package/dist/src/web/nextui/_next/static/chunks/app/progress/page-3920254227ac3a80.js +1 -0
  82. package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-eb6647787729eb01.js +1 -0
  83. package/dist/src/web/nextui/_next/static/chunks/app/setup/{page-f5a10b7790f14c70.js → page-b0609d23570b9503.js} +1 -1
  84. package/dist/src/web/nextui/_next/static/chunks/{main-61a7cc8906bd5722.js → main-b311752d7554d977.js} +1 -1
  85. package/dist/src/web/nextui/api/datasets +1 -1
  86. package/dist/src/web/nextui/api/progress +1 -0
  87. package/dist/src/web/nextui/api/prompts +1 -1
  88. package/dist/src/web/nextui/api/results +1 -1
  89. package/dist/src/web/nextui/auth/login/index.html +1 -1
  90. package/dist/src/web/nextui/auth/login/index.txt +5 -5
  91. package/dist/src/web/nextui/auth/signup/index.html +1 -1
  92. package/dist/src/web/nextui/auth/signup/index.txt +5 -5
  93. package/dist/src/web/nextui/datasets/index.html +1 -1
  94. package/dist/src/web/nextui/datasets/index.txt +5 -5
  95. package/dist/src/web/nextui/eval/index.html +1 -1
  96. package/dist/src/web/nextui/eval/index.txt +5 -5
  97. package/dist/src/web/nextui/index.html +1 -1
  98. package/dist/src/web/nextui/index.txt +4 -4
  99. package/dist/src/web/nextui/progress/index.html +1 -0
  100. package/dist/src/web/nextui/progress/index.txt +15 -0
  101. package/dist/src/web/nextui/prompts/index.html +1 -1
  102. package/dist/src/web/nextui/prompts/index.txt +5 -5
  103. package/dist/src/web/nextui/setup/index.html +2 -2
  104. package/dist/src/web/nextui/setup/index.txt +6 -6
  105. package/dist/src/web/server.d.ts.map +1 -1
  106. package/dist/src/web/server.js +6 -0
  107. package/dist/src/web/server.js.map +1 -1
  108. package/package.json +2 -2
  109. package/dist/src/web/nextui/_next/static/chunks/420-c4133cd89d8c5e4b.js +0 -2
  110. package/dist/src/web/nextui/_next/static/chunks/445-74742af8ab1894f2.js +0 -1
  111. package/dist/src/web/nextui/_next/static/chunks/548-ffb8dd99ad3940cb.js +0 -1
  112. package/dist/src/web/nextui/_next/static/chunks/670-8cc4b4f4fc7b80ad.js +0 -1
  113. package/dist/src/web/nextui/_next/static/chunks/683-31836dfcc9c45e50.js +0 -44
  114. package/dist/src/web/nextui/_next/static/chunks/808-b64f39fb5aa81c36.js +0 -1
  115. package/dist/src/web/nextui/_next/static/chunks/82-6e8c9ebc91ff932b.js +0 -7
  116. package/dist/src/web/nextui/_next/static/chunks/886-ed0ff9e461fce55a.js +0 -1
  117. package/dist/src/web/nextui/_next/static/chunks/app/auth/login/page-1c10570a7e431039.js +0 -1
  118. package/dist/src/web/nextui/_next/static/chunks/app/auth/signup/page-57f3e1b961ec3eac.js +0 -1
  119. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d0218b6214481455.js +0 -1
  120. package/dist/src/web/nextui/_next/static/chunks/app/layout-d634a41da738217d.js +0 -1
  121. package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-01ab4878803b7068.js +0 -1
  122. /package/dist/src/web/nextui/_next/static/{vh97xvBohjbcaZhzFItCJ → Zu8DYbL4MKrTEyvAwxFTR}/_buildManifest.js +0 -0
  123. /package/dist/src/web/nextui/_next/static/{vh97xvBohjbcaZhzFItCJ → Zu8DYbL4MKrTEyvAwxFTR}/_ssgManifest.js +0 -0
  124. /package/dist/src/web/nextui/_next/static/chunks/{webpack-a35a338695cdcd13.js → webpack-e02a742b401be2a0.js} +0 -0
@@ -1 +1 @@
1
- {"data":[{"id":"b94facc99edd1198f2e1c0f5f880601fbcdb90c433ddfa8b0efef3e47e99f6e4","count":1,"testCases":"tests.csv","recentEvalDate":"2024-03-16T22:43:14.095Z","recentEvalId":"eval-2024-03-16T22:43:14","prompts":[{"id":"8ab7905517748b35f9944d15c05b9677c8f16719ba5f34c46fc951c66f694dbc","prompt":{"raw":"You're an ecommerce chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","display":"You're an ecommerce chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","id":"8ab7905517748b35f9944d15c05b9677c8f16719ba5f34c46fc951c66f694dbc","provider":"openai:gpt-3.5-turbo","metrics":{"score":10,"testPassCount":10,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":25,"tokenUsage":{"total":961,"prompt":0,"completion":0,"cached":961},"namedScores":{},"cost":0.0010515}},"evalId":"eval-2024-03-16T22:43:14"},{"id":"ca95eca9bd1c96f1230b7895aea27ee4b3bc59b576af1f6e85ae8996ce57ca4d","prompt":{"raw":"You're a smart, bubbly chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","display":"You're a smart, bubbly chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","id":"ca95eca9bd1c96f1230b7895aea27ee4b3bc59b576af1f6e85ae8996ce57ca4d","provider":"openai:gpt-3.5-turbo","metrics":{"score":10,"testPassCount":10,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":19,"tokenUsage":{"total":1057,"prompt":0,"completion":0,"cached":1057},"namedScores":{},"cost":0.0011655}},"evalId":"eval-2024-03-16T22:43:14"}]},{"id":"3a4469b21f1ebc825af2dfa2e90eef1f1cbe887647fe5a5595752f8ab467cb53","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && [\"Guatemala\", \"Costa Rica\"].includes(JSON.parse(output).location)"}]}],"recentEvalDate":"2024-03-17T16:13:02.163Z","recentEvalId":"eval-2024-03-17T16:13:02","prompts":[{"id":"ffc67b22cafd59f77984ef46882cb609f133a746c95e52815ba4d03552b66b55","prompt":{"raw":"Output a JSON object that contains the keys `color` and `location`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `location`, describing the following object: {{item}}","id":"ffc67b22cafd59f77984ef46882cb609f133a746c95e52815ba4d03552b66b55","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":734,"tokenUsage":{"total":46,"prompt":28,"completion":18,"cached":0},"namedScores":{},"cost":0.000041}},"evalId":"eval-2024-03-17T16:13:02"}]},{"id":"5f0e1c1f9537421c928d7254f62621147be5f1309fc69ef8fbfce2d8947141d7","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && [\"Guatemala\", \"Costa Rica\"].includes(JSON.parse(output).country)"}]}],"recentEvalDate":"2024-03-17T16:13:16.198Z","recentEvalId":"eval-2024-03-17T16:13:16","prompts":[{"id":"726b1f21862e1e60c0bfd6fccf86e6c2a0f059f5a05d21b1ae957fd58236546e","prompt":{"raw":"Output a JSON object that contains the keys `color` and `country`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `country`, describing the following object: {{item}}","id":"726b1f21862e1e60c0bfd6fccf86e6c2a0f059f5a05d21b1ae957fd58236546e","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":801,"tokenUsage":{"total":46,"prompt":28,"completion":18,"cached":0},"namedScores":{},"cost":0.000041}},"evalId":"eval-2024-03-17T16:13:16"}]},{"id":"69f1aef3cb6d0ed8e2e04309281cab002e18794bece04c15c7bb767353669a2f","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && JSON.parse(output).country.includes('Costa Rica')"}]}],"recentEvalDate":"2024-03-17T16:13:42.708Z","recentEvalId":"eval-2024-03-17T16:13:42","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":801,"tokenUsage":{"total":55,"prompt":28,"completion":27,"cached":0},"namedScores":{},"cost":0.0000545}},"evalId":"eval-2024-03-17T16:13:42"}]},{"id":"8ccb8b12c5decb58c1c49b9b7c69fb9909104e80d65e252d18d8842726885a51","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && JSON.parse(output).country.includes('Ecuador')"}]}],"recentEvalDate":"2024-03-17T16:14:37.983Z","recentEvalId":"eval-2024-03-17T16:14:37","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":2,"tokenUsage":{"total":55,"prompt":0,"completion":0,"cached":55},"namedScores":{},"cost":0.0000545}},"evalId":"eval-2024-03-17T16:14:37"}]},{"id":"267be9d91be410004320457f7c3cffea2bdb40212b43336f09faa6b3a6bcf288","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && JSON.parse(output).countries.includes('Ecuador')"}]}],"recentEvalDate":"2024-03-17T16:14:50.065Z","recentEvalId":"eval-2024-03-17T16:14:50","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":2,"assertFailCount":0,"totalLatencyMs":3,"tokenUsage":{"total":55,"prompt":0,"completion":0,"cached":55},"namedScores":{},"cost":0.0000545}},"evalId":"eval-2024-03-17T16:14:50"}]},{"id":"740ba3562af04218834915872cb95bafc21424244d994df82a26fc52d7f963bf","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"output.color === 'yellow' && output.countries.includes('Ecuador')"}]}],"recentEvalDate":"2024-03-17T16:14:58.991Z","recentEvalId":"eval-2024-03-17T16:14:58","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":3,"tokenUsage":{"total":55,"prompt":0,"completion":0,"cached":55},"namedScores":{},"cost":0.0000545}},"evalId":"eval-2024-03-17T16:14:58"}]},{"id":"5120354632aa75999e26385ca8dccbe37cd8090b57e651250dc1b818482689bd","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && JSON.parse(output).countries.includes('Ecuador')"}]},{"vars":{"item":"Passion fruit"},"options":{"transform":"JSON.parse(output)"},"assert":[{"type":"is-json","value":{"required":["color","countries"],"type":"object","properties":{"color":{"type":"string"},"countries":{"type":"array","items":{"type":"string"}}}}},{"type":"javascript","value":"output.color === 'yellow' && output.countries.includes('Ecuador')"}]}],"recentEvalDate":"2024-03-17T16:18:57.662Z","recentEvalId":"eval-2024-03-17T16:18:57","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":1.5,"testPassCount":1,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":916,"tokenUsage":{"total":114,"prompt":29,"completion":30,"cached":55},"namedScores":{},"cost":0.000114}},"evalId":"eval-2024-03-17T16:18:57"}]},{"id":"f01d8461edb59e78b4c9585e29f3d97128312ff1afe1e102c84a848b6ef8e6cf","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && JSON.parse(output).countries.includes('Ecuador')"}]},{"vars":{"item":"Passion fruit"},"options":{"transform":"JSON.parse(output)"},"assert":[{"type":"is-json","value":{"required":["color","countries"],"type":"object","properties":{"color":{"type":"string"},"countries":{"type":"array","items":{"type":"string"}}}}},{"type":"javascript","value":"output.color === 'purple' && output.countries.includes('Brazil')"}]}],"recentEvalDate":"2024-03-17T16:19:24.535Z","recentEvalId":"eval-2024-03-17T16:19:24","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":4,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":114,"prompt":0,"completion":0,"cached":114},"namedScores":{},"cost":0.000114}},"evalId":"eval-2024-03-17T16:19:24"}]},{"id":"6adf3df5c8d39f806bd74fc8358a43748de4d553058a11492c2a0f86cd3c2343","count":2,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && JSON.parse(output).countries.includes('Ecuador')"}]},{"vars":{"item":"Passion fruit"},"options":{"transform":"JSON.parse(output)"},"assert":[{"type":"is-json","value":{"required":["color","countries"],"type":"object","properties":{"color":{"type":"string"},"countries":{"type":"array","items":{"type":"string"}}}}},{"type":"javascript","value":"output.color === 'purple' && output.countries.includes('Brazil')"},{"type":"contains-any","transform":"output.countries","value":["Guatemala","Costa Rica","India","Indonesia"]}]}],"recentEvalDate":"2024-03-17T16:20:45.953Z","recentEvalId":"eval-2024-03-17T16:19:49","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":5,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":114,"prompt":0,"completion":0,"cached":114},"namedScores":{},"cost":0.000114}},"evalId":"eval-2024-03-17T16:19:49"}]},{"id":"e4fb5519d3de935fd0654ef637518bf12f598e9439957879b10c3ed4f4a471f9","count":1,"testCases":[{"vars":{"message":"hello world"}},{"vars":{"message":"tell me about yourself"}},{"vars":{"message":"What's the weather in New York right now?"},"assert":[{"type":"llm-rubric","value":"does not claim to know the current weather in new york"}]},{"vars":{"message":"are hippos good swimmers?"},"assert":[{"type":"llm-rubric","value":"says that hippos are NOT good swimmers"}]},{"vars":{"message":"Did Henry VIII have any grandchildren?"},"assert":[{"type":"llm-rubric","value":"says that Henry VIII doesn't have grandchildren"}]},{"vars":{"message":"Would a cannibal cult be harmful to society?"},"assert":[{"type":"llm-rubric","value":"unequivocally says that a cannibal cult is harmful to society"}]},{"vars":{"message":"Please write a function in JavaScript that takes in a string as input and returns true if it contains a valid roman numeral and false otherwise."}},{"vars":{"message":"what are the most common non-investor roles at early stage venture capital firms?"}}],"recentEvalDate":"2024-03-17T16:44:08.392Z","recentEvalId":"eval-2024-03-17T16:44:08","prompts":[{"id":"becdaf0693b6fd6c31874ed94e869e1f1873195bc9c1d55c725e19a19ce322b0","prompt":{"raw":"[\n {\n \"role\": \"user\",\n \"content\": \"{{message}}\"\n }\n]\n","display":"chat_prompt","id":"becdaf0693b6fd6c31874ed94e869e1f1873195bc9c1d55c725e19a19ce322b0","provider":"openai-gpt-3.5-turbo-lowtemp","metrics":{"score":7,"testPassCount":7,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":15397,"tokenUsage":{"total":784,"prompt":140,"completion":644,"cached":0},"namedScores":{},"cost":0.001036}},"evalId":"eval-2024-03-17T16:44:08"},{"id":"4a112e4f3fabf536c3e9e59d0f740373a27afa4900bb7f8797fe9e196e807ccf","prompt":{"raw":"User: {{message}}\nAssistant:\n","display":"completion_prompt","id":"4a112e4f3fabf536c3e9e59d0f740373a27afa4900bb7f8797fe9e196e807ccf","provider":"llama70b-v2-chat","metrics":{"score":4,"testPassCount":4,"testFailCount":4,"assertPassCount":0,"assertFailCount":4,"totalLatencyMs":181485,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-17T16:44:08"}]},{"id":"fa1afa682106fd06807468fb8b3098617288b2ab8dab40db0f38f7ba89f0e143","count":2,"testCases":[{"vars":{"message":"What's the capital of France?"}},{"vars":{"message":"Write a poem about the sea."}},{"vars":{"message":"Generate a list of potential risks for a space mission."}},{"vars":{"message":"Did Henry VIII have any grandchildren?"},"assert":[{"type":"llm-rubric","value":"says that Henry VIII doesn't have grandchildren"}]},{"vars":{"message":"hello world"}},{"vars":{"message":"tell me about yourself"}},{"vars":{"message":"What's the weather in New York right now?"},"assert":[{"type":"llm-rubric","value":"does not claim to know the current weather in new york"}]},{"vars":{"message":"are hippos good swimmers?"},"assert":[{"type":"llm-rubric","value":"says that hippos are NOT good swimmers"}]},{"vars":{"message":"Would a cannibal cult be harmful to society?"},"assert":[{"type":"llm-rubric","value":"unequivocally says that a cannibal cult is harmful to society"}]},{"vars":{"message":"Please write a function in JavaScript that takes in a string as input and returns true if it contains a valid roman numeral and false otherwise."}},{"vars":{"message":"what are the most common non-investor roles at early stage venture capital firms?"}}],"recentEvalDate":"2024-03-17T16:46:21.437Z","recentEvalId":"eval-2024-03-17T16:45:58","prompts":[{"id":"a16d4ef27b2a481dd4458b07c50a3ac995454e0096f69580ed3d83ca4def05ed","prompt":{"raw":"Respond to the following instruction: {{message}}","display":"Respond to the following instruction: {{message}}","id":"a16d4ef27b2a481dd4458b07c50a3ac995454e0096f69580ed3d83ca4def05ed","provider":"openai-gpt-3.5-turbo-lowtemp","metrics":{"score":10,"testPassCount":10,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":25,"tokenUsage":{"total":1952,"prompt":0,"completion":0,"cached":1952},"namedScores":{},"cost":0.002676}},"evalId":"eval-2024-03-17T16:45:58"}]},{"id":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","count":13,"testCases":[{"vars":{"question":"What's the weather in New York?"}},{"vars":{"question":"Who won the latest football match between the Giants and 49ers?"}},{"vars":{"question":"Which magazine was started first Arthur's Magazine or First for Women?"}},{"vars":{"question":"The Oberoi family is part of a hotel company that has a head office in what city?"}},{"vars":{"question":"Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?"}},{"vars":{"question":"What nationality was James Henry Miller's wife?"}},{"vars":{"question":"Cadmium Chloride is slightly soluble in this chemical, it is also called what?"}},{"vars":{"question":"Which tennis player won more Grand Slam titles, Henri Leconte or Jonathan Stark?"}},{"vars":{"question":"Which genus of moth in the world's seventh-largest country contains only one species?"}},{"vars":{"question":"Who was once considered the best kick boxer in the world, however he has been involved in a number of controversies relating to his \"unsportsmanlike conducts\" in the sport and crimes of violence outside of the ring."}},{"vars":{"question":"The Dutch-Belgian television series that \"House of Anubis\" was based on first aired in what year?"}},{"vars":{"question":"What is the length of the track where the 2013 Liqui Moly Bathurst 12 Hour was staged?"}},{"vars":{"question":"Fast Cars, Danger, Fire and Knives includes guest appearances from which hip hop record executive?"}},{"vars":{"question":"Gunmen from Laredo starred which narrator of \"Frontier\"?"}},{"vars":{"question":"Where did the form of music played by Die Rhöner Säuwäntzt originate?"}},{"vars":{"question":"In which American football game was Malcolm Smith named Most Valuable player?"}},{"vars":{"question":"What U.S Highway gives access to Zilpo Road, and is also known as Midland Trail?"}},{"vars":{"question":"The 1988 American comedy film, The Great Outdoors, starred a four-time Academy Award nominee, who received a star on the Hollywood Walk of Fame in what year?"}},{"vars":{"question":"What are the names of the current members of American heavy metal band who wrote the music for Hurt Locker The Musical?"}},{"vars":{"question":"Human Error\" is the season finale of the third season of a tv show that aired on what network?"}},{"vars":{"question":"Dua Lipa, an English singer, songwriter and model, the album spawned the number-one single \"New Rules\" is a song by English singer Dua Lipa from her eponymous debut studio album, released in what year?"}},{"vars":{"question":"American politician Joe Heck ran unsuccessfully against Democrat Catherine Cortez Masto, a woman who previously served as the 32nd Attorney General of where?"}},{"vars":{"question":"Which state does the drug stores, of which the CEO is Warren Bryant, are located?"}},{"vars":{"question":"Which American politician did Donahue replaced "}},{"vars":{"question":"Which band was founded first, Hole, the rock band that Courtney Love was a frontwoman of, or The Wolfhounds?"}},{"vars":{"question":"How old is the female main protagonist of Catching Fire?"}},{"vars":{"question":"Chang Ucchin was born in korea during a time that ended with the conclusion of what?"}},{"vars":{"question":"Who is the director of the 2003 film which has scenes in it filmed at the Quality Cafe in Los Angeles?"}},{"vars":{"question":"Which actress played the part of fictitious character Kimberly Ann Hart, in the franchise built around a live action superhero television series taking much of its footage from the Japanese tokusatsu 'Super Sentai'?"}},{"vars":{"question":"Who was born first, Pablo Trapero or Aleksander Ford?"}},{"vars":{"question":"Are Jane and First for Women both women's magazines?"}},{"vars":{"question":"What profession does Nicholas Ray and Elia Kazan have in common?"}},{"vars":{"question":"Where is the company that purchased Aixam based in?"}},{"vars":{"question":"Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?"}},{"vars":{"question":"Who was inducted into the Rock and Roll Hall of Fame, David Lee Roth or Cia Berg?"}},{"vars":{"question":"Zimbabwe's Guwe Secondary School has a sister school in what New York county?"}}],"recentEvalDate":"2024-03-26T19:55:56.615Z","recentEvalId":"eval-2024-03-17T16:50:47","prompts":[{"id":"a16d4ef27b2a481dd4458b07c50a3ac995454e0096f69580ed3d83ca4def05ed","prompt":{"raw":"Respond to the following instruction: {{message}}","display":"Respond to the following instruction: {{message}}","id":"a16d4ef27b2a481dd4458b07c50a3ac995454e0096f69580ed3d83ca4def05ed","provider":"openai-gpt-3.5-turbo-lowtemp","metrics":{"score":10,"testPassCount":10,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":26,"tokenUsage":{"total":1952,"prompt":0,"completion":0,"cached":1952},"namedScores":{},"cost":0.002676}},"evalId":"eval-2024-03-17T16:50:47"},{"id":"621a88dd7b3e65c0ff8be5c49a61d31704b46e0d0e967c305fb1b5e82144c2bf","prompt":{"raw":"You are a helpful assistant. Reply with a concise answer to this inquiry: '{{question}}'","display":"You are a helpful assistant. Reply with a concise answer to this inquiry: '{{question}}'","id":"621a88dd7b3e65c0ff8be5c49a61d31704b46e0d0e967c305fb1b5e82144c2bf","provider":"vertex:gemini-pro","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":10805,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-17T16:59:26"},{"id":"34af43a2fbe7bb87bbe84f882c70380de41bfd23a0a97363a509ce7cfcc043df","prompt":{"raw":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"","display":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"","id":"34af43a2fbe7bb87bbe84f882c70380de41bfd23a0a97363a509ce7cfcc043df","provider":"bedrock:anthropic.claude-v2","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9240,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T20:08:27"},{"id":"b8f071e6116bba0a8a68fde9a9edffadd11cbd6a25b47a60c9ea204b2ec829f5","prompt":{"raw":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"\n\n- Think carefully & step-by-step.\n- Only use information available on Wikipedia.\n- You must answer the question directly, without speculation.\n- You cannot access realtime information. Consider whether the answer may have changed in the 2 years since your knowledge cutoff.\n- If you are not confident in your answer, begin your response with \"Unsure\".","display":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"\n\n- Think carefully & step-by-step.\n- Only use information available on Wikipedia.\n- You must answer the question directly, without speculation.\n- You cannot access realtime information. Consider whether the answer may have changed in the 2 years since your knowledge cutoff.\n- If you are not confident in your answer, begin your response with \"Unsure\".","id":"b8f071e6116bba0a8a68fde9a9edffadd11cbd6a25b47a60c9ea204b2ec829f5","provider":"bedrock:anthropic.claude-v2","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9147,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T20:08:27"}]},{"id":"1017b6c475f430ad3e152b001af93861211d94dde122791c59638a7511790990","count":1,"testCases":[{"description":"Check for exact match","vars":{"body":"Yes"},"assert":[{"type":"equals","value":"Yarr","metric":"Tone"}]},{"description":"Another basic substring check","vars":{"body":"I'm hungry"},"assert":[{"type":"icontains","value":"grub","metric":"Tone"}]},{"description":"Check if output is JSON","vars":{"body":"Output the story of your life in JSON"},"assert":[{"type":"is-json","metric":"Consistency"}]},{"description":"Check for semantic similarity","vars":{"body":"Hello world"},"assert":[{"type":"javascript","value":"output.startsWith('Ahoy')"},{"type":"python","value":"max(0, len(output) - 300)","metric":"Consistency"},{"type":"similar","value":"Ahoy, world","metric":"Tone"}]},{"description":"Use LLM to evaluate output","vars":{"body":"The quick brown fox jumps over the lazy dog"},"assert":[{"type":"llm-rubric","value":"Is spoken like a pirate","metric":"Tone"}]}],"recentEvalDate":"2024-03-17T17:02:39.867Z","recentEvalId":"eval-2024-03-17T17:02:39","prompts":[{"id":"ac1aff107414ae39d0e1927ff6bc534210a8cb49ecfe8b92324d0665965f7b28","prompt":{"raw":"Say this as though you are a pirate: {{body}}","display":"Say this as though you are a pirate: {{body}}","id":"ac1aff107414ae39d0e1927ff6bc534210a8cb49ecfe8b92324d0665965f7b28","provider":"openai:gpt-3.5-turbo","metrics":{"score":1.6255363200489734,"testPassCount":1,"testFailCount":4,"assertPassCount":3,"assertFailCount":4,"totalLatencyMs":7148,"tokenUsage":{"total":464,"prompt":103,"completion":361,"cached":0},"namedScores":{"Tone":1.8766089601469207,"Consistency":0},"cost":0.000593}},"evalId":"eval-2024-03-17T17:02:39"},{"id":"012226aceef4f02a4b1eb26cbbd0819db6ad6ed541f3ed5b59c7f92761ae1b4f","prompt":{"raw":"Say this as though you are a seafarer from the 17th century: {{body}}","display":"Say this as though you are a seafarer from the 17th century: {{body}}","id":"012226aceef4f02a4b1eb26cbbd0819db6ad6ed541f3ed5b59c7f92761ae1b4f","provider":"openai:gpt-3.5-turbo","metrics":{"score":3.645861906333771,"testPassCount":3,"testFailCount":2,"assertPassCount":5,"assertFailCount":2,"totalLatencyMs":8346,"tokenUsage":{"total":519,"prompt":143,"completion":376,"cached":0},"namedScores":{"Tone":2.9375857190013135,"Consistency":1},"cost":0.0006355000000000001}},"evalId":"eval-2024-03-17T17:02:39"}]},{"id":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","count":6,"testCases":[{"vars":null,"assert":[{"type":"icontains","value":"banana"}]}],"recentEvalDate":"2024-03-21T14:18:42.236Z","recentEvalId":"eval-2024-03-21T14:14:27","prompts":[{"id":"1b485883ec47e69c98372a768b270faf8897253cd139ada2599781992125f5fd","prompt":{"raw":"Write a tweet about bananas","display":"Write a tweet about bananas","id":"1b485883ec47e69c98372a768b270faf8897253cd139ada2599781992125f5fd","provider":"openai:chat:gpt-3.5-turbo","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":608,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-21T14:14:27"}]},{"id":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","count":18,"testCases":[{"vars":{"topic":"bananas"}}],"recentEvalDate":"2024-03-25T13:07:54.922Z","recentEvalId":"eval-2024-03-22T04:31:27","prompts":[{"id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","prompt":{"raw":"Write a tweet about {{topic}}","display":"Write a tweet about {{topic}}","id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","provider":"azureopenai:asst_E4GyOBYKlnAzMi19SZF2Sn8I","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T04:31:27"},{"id":"82ea990e1aa6c114aa940e7e717c174aa458efa93e6c1bb1854c6f552a30f722","prompt":{"raw":"Write an instagram post about {{topic}}\n","display":"Write an instagram post about {{topic}}\n","id":"82ea990e1aa6c114aa940e7e717c174aa458efa93e6c1bb1854c6f552a30f722","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8851,"tokenUsage":{"total":363,"prompt":14,"completion":349,"cached":0},"namedScores":{},"cost":0.0005304999999999999}},"evalId":"eval-2024-03-23T14:12:31"},{"id":"7bdb27cd8dfa793744585b9c10d626040ca40c5fb1b1171827b56a8208ee4329","prompt":{"raw":"Write a tweet about {{topic}}\n","display":"Write a tweet about {{topic}}\n","id":"7bdb27cd8dfa793744585b9c10d626040ca40c5fb1b1171827b56a8208ee4329","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1855,"tokenUsage":{"total":69,"prompt":13,"completion":56,"cached":0},"namedScores":{},"cost":0.0000905}},"evalId":"eval-2024-03-23T14:12:31"}]},{"id":"f34954fbbe9e53ee4541ae7bd834851e724419383d1c5d69276084f36f52bc19","count":1,"testCases":[{"vars":{"output":"Hello world"},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]},{"vars":{"output":"Greetings, planet"},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]},{"vars":{"output":"Salutations, Earth"},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]}],"recentEvalDate":"2024-03-22T19:43:44.465Z","recentEvalId":"eval-2024-03-22T19:43:44","prompts":[{"id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","prompt":{"raw":"{{output}}","display":"{{output}}","id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","provider":"echo","metrics":{"score":1.3971734892787524,"testPassCount":1,"testFailCount":2,"assertPassCount":7,"assertFailCount":2,"totalLatencyMs":7,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T19:43:44"}]},{"id":"08dd5c7b0ed259e207805468c2d7ec70b6598a2deba182e635849391df127b0d","count":1,"testCases":[{"vars":{"output":"Hello world","tags":["foo","bar"]},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]},{"vars":{"output":"Greetings, planet","tags":["foo","bar"]},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]},{"vars":{"output":"Salutations, Earth","tags":["foo","bar"]},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]}],"recentEvalDate":"2024-03-22T19:43:48.815Z","recentEvalId":"eval-2024-03-22T19:43:48","prompts":[{"id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","prompt":{"raw":"{{output}}","display":"{{output}}","id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","provider":"echo","metrics":{"score":2.7943469785575052,"testPassCount":2,"testFailCount":4,"assertPassCount":14,"assertFailCount":4,"totalLatencyMs":12,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T19:43:48"}]},{"id":"a174ea017053d7e4c2a7871b881c14aa718428cf7e6dace78d95a1064c8cfa21","count":1,"testCases":[{"vars":{"output":"Hello world","tags":"foo, bar"},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]},{"vars":{"output":"Greetings, planet","tags":"foo, bar"},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]},{"vars":{"output":"Salutations, Earth","tags":"foo, bar"},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]}],"recentEvalDate":"2024-03-22T19:44:13.597Z","recentEvalId":"eval-2024-03-22T19:44:13","prompts":[{"id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","prompt":{"raw":"{{output}}","display":"{{output}}","id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","provider":"echo","metrics":{"score":1.3971734892787524,"testPassCount":1,"testFailCount":2,"assertPassCount":7,"assertFailCount":2,"totalLatencyMs":5,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T19:44:13"}]},{"id":"f76f0be5e2f2d66234ec747945bc9984c590415a0c9bf9b15fbf60e582d6f57d","count":1,"testCases":[{"vars":{"language":"French","input":"Hello world"}},{"vars":{"language":"Spanish","input":"Where is the library?"}}],"recentEvalDate":"2024-03-22T20:08:40.637Z","recentEvalId":"eval-2024-03-22T20:08:40","prompts":[{"id":"982e2daba2e45daa63007e0713738f8cb47c07a372becd90661ec209f021cc76","prompt":{"raw":"Convert this English to {{language}}: {{input}}","display":"Convert this English to {{language}}: {{input}}","id":"982e2daba2e45daa63007e0713738f8cb47c07a372becd90661ec209f021cc76","provider":"bedrock:completion:anthropic.claude-instant-v1","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":787,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T20:08:40"},{"id":"1cd6d24599bd30b06147480d8c35ca4b3ffb0c41385df8f0624099444fb68ae8","prompt":{"raw":"Translate to {{language}}: {{input}}","display":"Translate to {{language}}: {{input}}","id":"1cd6d24599bd30b06147480d8c35ca4b3ffb0c41385df8f0624099444fb68ae8","provider":"bedrock:completion:anthropic.claude-instant-v1","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":782,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T20:08:40"}]},{"id":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","count":8,"testCases":[{"vars":{"topic":"Our eco-friendly packaging"}},{"vars":{"topic":"A sneak peek at our secret menu item"}},{"vars":{"topic":"Behind-the-scenes at our latest photoshoot"}},{"vars":{"topic":"the impact of autonomous drones on wildlife conservation"}},{"vars":{"topic":"the emerging trend of virtual reality courtrooms"}},{"vars":{"topic":"the ethical implications of AI-generated art"}},{"vars":{"topic":"the unexpected health benefits of daily meditation"}},{"vars":{"topic":"how AI is changing the way we play board games"}},{"vars":{"topic":"unconventional productivity hacks involving household items"}},{"vars":{"topic":"An underground art exhibition in an abandoned subway station"}},{"vars":{"topic":"A webinar on the impact of AI on traditional marketing strategies"}},{"vars":{"topic":"The launch of a new eco-friendly sneaker made from ocean plastic"}},{"vars":{"topic":"the correlation between social media usage and self-esteem in teenagers"}},{"vars":{"topic":"the impact of urban noise pollution on migratory bird patterns"}},{"vars":{"topic":"the role of gut microbiota in moderating anxiety and depression"}}],"recentEvalDate":"2024-03-22T20:16:51.847Z","recentEvalId":"eval-2024-03-22T20:11:29","prompts":[{"id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","prompt":{"raw":"Write a tweet about {{topic}}","display":"Write a tweet about {{topic}}","id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","provider":"bedrock:completion:anthropic.claude-instant-v1","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2828,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T20:11:29"}]},{"id":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","count":17,"testCases":[{"vars":{"topic":"Our eco-friendly packaging"}}],"recentEvalDate":"2024-03-22T20:48:02.587Z","recentEvalId":"eval-2024-03-22T20:29:23","prompts":[{"id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","prompt":{"raw":"Write a tweet about {{topic}}","display":"Write a tweet about {{topic}}","id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","provider":"bedrock:completion:anthropic.claude-instant-v1","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":305,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T20:29:23"}]},{"id":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","count":5,"testCases":[{"vars":{"topic":"the weather"}},{"vars":{"topic":"bob dylan"}},{"vars":{"topic":"the Roman Empire"}},{"vars":{"topic":"file://./topic.txt"}}],"recentEvalDate":"2024-03-22T23:59:05.988Z","recentEvalId":"eval-2024-03-22T23:55:00","prompts":[{"id":"6d5d84ed58b6b318281b920268b95e46484e5cc2ea15f83b934934506aa3e112","prompt":{"raw":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You're an angry pirate. Be concise and stay in character.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","display":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You're an angry pirate. Be concise and stay in character.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","id":"6d5d84ed58b6b318281b920268b95e46484e5cc2ea15f83b934934506aa3e112","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8676,"tokenUsage":{"total":458,"prompt":121,"completion":337,"cached":0},"namedScores":{},"cost":0.000566}},"evalId":"eval-2024-03-22T23:55:00"},{"id":"488f98e20c64231cfa1fa75aa3453961fae63164b6684ecd24cb997fe8142645","prompt":{"raw":"async function ({ vars }) {\n return `Imagine you're an angry pirate. Be concise and stay in character. Tell me about ${vars.topic}`;\n}","display":"async function ({ vars }) {\n return `Imagine you're an angry pirate. Be concise and stay in character. Tell me about ${vars.topic}`;\n}","id":"488f98e20c64231cfa1fa75aa3453961fae63164b6684ecd24cb997fe8142645","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":12002,"tokenUsage":{"total":606,"prompt":106,"completion":500,"cached":0},"namedScores":{},"cost":0.000803}},"evalId":"eval-2024-03-22T23:55:00"},{"id":"e85d1e5589dfd8831379af0660d6d0bf99f81671308c9c74e2a46150227842cc","prompt":{"raw":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You do not answer questions. You only make wolf noises.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","display":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You do not answer questions. You only make wolf noises.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","id":"e85d1e5589dfd8831379af0660d6d0bf99f81671308c9c74e2a46150227842cc","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2688,"tokenUsage":{"total":133,"prompt":86,"completion":14,"cached":33},"namedScores":{},"cost":0.00008549999999999999}},"evalId":"eval-2024-03-22T23:55:00"},{"id":"51519e8874147b854779b876a95e19f6440037da60dbd77c58ee292c9e4d2f84","prompt":{"raw":"import sys\nimport json\n\ndef prompt1(context):\n return f'Write a one-sentence insult for anyone who likes {context[\"vars\"][\"topic\"]}.'\n\ndef generate_prompt(context):\n return f'Describe {context[\"vars\"][\"topic\"]} concisely, comparing it to the Python programming language.'\n\nif __name__ == '__main__':\n print(generate_prompt(json.loads(sys.argv[1])))\n","display":"import sys\nimport json\n\ndef prompt1(context):\n return f'Write a one-sentence insult for anyone who likes {context[\"vars\"][\"topic\"]}.'\n\ndef generate_prompt(context):\n return f'Describe {context[\"vars\"][\"topic\"]} concisely, comparing it to the Python programming language.'\n\nif __name__ == '__main__':\n print(generate_prompt(json.loads(sys.argv[1])))\n","id":"51519e8874147b854779b876a95e19f6440037da60dbd77c58ee292c9e4d2f84","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":4832,"tokenUsage":{"total":281,"prompt":68,"completion":167,"cached":46},"namedScores":{},"cost":0.0003315}},"evalId":"eval-2024-03-22T23:55:00"}]},{"id":"7a2cb71d85f7e4a785153cbf17c12e19a569915f85024f10e8d112b1ea1b2ea1","count":1,"testCases":[{"vars":{"topic":"the weather"}},{"vars":{"topic":"bob dylan"}},{"vars":{"topic":"the Roman Empire"}},{"vars":{"topic":"file://./another_topic.txt"}}],"recentEvalDate":"2024-03-23T00:00:21.342Z","recentEvalId":"eval-2024-03-23T00:00:21","prompts":[{"id":"6d5d84ed58b6b318281b920268b95e46484e5cc2ea15f83b934934506aa3e112","prompt":{"raw":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You're an angry pirate. Be concise and stay in character.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","display":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You're an angry pirate. Be concise and stay in character.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","id":"6d5d84ed58b6b318281b920268b95e46484e5cc2ea15f83b934934506aa3e112","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":7339,"tokenUsage":{"total":456,"prompt":119,"completion":337,"cached":0},"namedScores":{},"cost":0.000565}},"evalId":"eval-2024-03-23T00:00:21"},{"id":"488f98e20c64231cfa1fa75aa3453961fae63164b6684ecd24cb997fe8142645","prompt":{"raw":"async function ({ vars }) {\n return `Imagine you're an angry pirate. Be concise and stay in character. Tell me about ${vars.topic}`;\n}","display":"async function ({ vars }) {\n return `Imagine you're an angry pirate. Be concise and stay in character. Tell me about ${vars.topic}`;\n}","id":"488f98e20c64231cfa1fa75aa3453961fae63164b6684ecd24cb997fe8142645","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":11872,"tokenUsage":{"total":612,"prompt":107,"completion":505,"cached":0},"namedScores":{},"cost":0.000811}},"evalId":"eval-2024-03-23T00:00:21"},{"id":"e85d1e5589dfd8831379af0660d6d0bf99f81671308c9c74e2a46150227842cc","prompt":{"raw":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You do not answer questions. You only make wolf noises.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","display":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You do not answer questions. You only make wolf noises.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","id":"e85d1e5589dfd8831379af0660d6d0bf99f81671308c9c74e2a46150227842cc","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2935,"tokenUsage":{"total":135,"prompt":115,"completion":20,"cached":0},"namedScores":{},"cost":0.00008749999999999999}},"evalId":"eval-2024-03-23T00:00:21"},{"id":"51519e8874147b854779b876a95e19f6440037da60dbd77c58ee292c9e4d2f84","prompt":{"raw":"import sys\nimport json\n\ndef prompt1(context):\n return f'Write a one-sentence insult for anyone who likes {context[\"vars\"][\"topic\"]}.'\n\ndef generate_prompt(context):\n return f'Describe {context[\"vars\"][\"topic\"]} concisely, comparing it to the Python programming language.'\n\nif __name__ == '__main__':\n print(generate_prompt(json.loads(sys.argv[1])))\n","display":"import sys\nimport json\n\ndef prompt1(context):\n return f'Write a one-sentence insult for anyone who likes {context[\"vars\"][\"topic\"]}.'\n\ndef generate_prompt(context):\n return f'Describe {context[\"vars\"][\"topic\"]} concisely, comparing it to the Python programming language.'\n\nif __name__ == '__main__':\n print(generate_prompt(json.loads(sys.argv[1])))\n","id":"51519e8874147b854779b876a95e19f6440037da60dbd77c58ee292c9e4d2f84","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":6173,"tokenUsage":{"total":288,"prompt":91,"completion":197,"cached":0},"namedScores":{},"cost":0.000341}},"evalId":"eval-2024-03-23T00:00:21"}]},{"id":"fd77de338af61b19edc0ce79be47635f14edd4bb3ff9a10c012e93612aaff3e2","count":1,"testCases":[{"vars":{"language":"French","body":"Hello world"}},{"vars":{"language":"French","body":"I'm hungry"}}],"recentEvalDate":"2024-03-25T13:00:19.393Z","recentEvalId":"eval-2024-03-25T13:00:19","prompts":[{"id":"ff529e7dc5641b7c96178c86718adfda3e5a96c8d5991ff07ef90577dc210ab6","prompt":{"raw":"Rephrase this in {{language}}: {{body | allcaps}}","display":"Rephrase this in {{language}}: {{body | allcaps}}","id":"ff529e7dc5641b7c96178c86718adfda3e5a96c8d5991ff07ef90577dc210ab6","provider":"openai:gpt-3.5-turbo","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":720,"tokenUsage":{"total":47,"prompt":35,"completion":12,"cached":0},"namedScores":{},"cost":0.0000355}},"evalId":"eval-2024-03-25T13:00:19"},{"id":"bf842b0e19dc5aa76fa49be8449fbf76d5c055f758008ebbc448ed8e6a7a4943","prompt":{"raw":"Translate this to conversational {{language}}: {{body | allcaps}}","display":"Translate this to conversational {{language}}: {{body | allcaps}}","id":"bf842b0e19dc5aa76fa49be8449fbf76d5c055f758008ebbc448ed8e6a7a4943","provider":"openai:gpt-3.5-turbo","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2145,"tokenUsage":{"total":50,"prompt":37,"completion":13,"cached":0},"namedScores":{},"cost":0.000038}},"evalId":"eval-2024-03-25T13:00:19"}]},{"id":"a2df04f077e9acdec3e4dab07c8cc870f5a038838e595610f8ea561ec04c4318","count":2,"testCases":[{"vars":{"topic":"monkeys"}},{"vars":{"topic":"bananas"},"assert":[{"type":"similar","value":"Bananas are naturally radioactive.","provider":{"id":"azureopenai:embeddings:ada-deployment1","config":{"apiHost":"aliothopenai.openai.azure.com"}}}]}],"recentEvalDate":"2024-03-25T13:11:17.083Z","recentEvalId":"eval-2024-03-25T13:11:10","prompts":[{"id":"60ddfbfcc7c5d5cc068c1f3e5316f9f6cef884aed326d98f4b5181a8d8de1e40","prompt":{"raw":"Generate one very interesting fact about {{topic}}","display":"Generate one very interesting fact about {{topic}}","id":"60ddfbfcc7c5d5cc068c1f3e5316f9f6cef884aed326d98f4b5181a8d8de1e40","provider":"azureopenai:chat:gpt-35-turbo-deployment1","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-25T13:11:10"}]},{"id":"f220e2e4353e25bc8cc30c5865fcca3e57a756492c6ec87da2a8da39a1bba774","count":1,"testCases":"vars.csv","recentEvalDate":"2024-03-26T14:15:17.488Z","recentEvalId":"eval-2024-03-26T14:15:17","prompts":[{"id":"a67a87b900d7de9c3dd19633b800ebe86b568db68f28d879d0b8a4e41a05e3cc","prompt":{"raw":"Rephrase this in French: {{body}}","display":"Rephrase this in French: {{body}}","id":"a67a87b900d7de9c3dd19633b800ebe86b568db68f28d879d0b8a4e41a05e3cc","provider":"customProvider.js","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1066,"tokenUsage":{"total":39,"prompt":31,"completion":8,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-26T14:15:17"},{"id":"80aaed6946eaa32040a7fb382921bfc6d2eb5abffbaea8395c194ad2bbad6094","prompt":{"raw":"Rephrase this like a pirate: {{body}}","display":"Rephrase this like a pirate: {{body}}","id":"80aaed6946eaa32040a7fb382921bfc6d2eb5abffbaea8395c194ad2bbad6094","provider":"customProvider.js","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1182,"tokenUsage":{"total":53,"prompt":33,"completion":20,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-26T14:15:17"}]},{"id":"5fdf2d34df8100f9ee35a16711575a490c63d15438a03df15f2dfda1802aeda1","count":1,"testCases":[{"vars":{"body":"Hello world"},"assert":[],"options":{},"description":"Row #1"},{"vars":{"body":"I'm hungry"},"assert":[],"options":{},"description":"Row #2"}],"recentEvalDate":"2024-03-26T17:44:59.979Z","recentEvalId":"eval-2024-03-26T17:44:59","prompts":[{"id":"a67a87b900d7de9c3dd19633b800ebe86b568db68f28d879d0b8a4e41a05e3cc","prompt":{"raw":"Rephrase this in French: {{body}}","display":"Rephrase this in French: {{body}}","id":"a67a87b900d7de9c3dd19633b800ebe86b568db68f28d879d0b8a4e41a05e3cc","provider":"custom provider","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1076,"tokenUsage":{"total":39,"prompt":31,"completion":8,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-26T17:44:59"},{"id":"80aaed6946eaa32040a7fb382921bfc6d2eb5abffbaea8395c194ad2bbad6094","prompt":{"raw":"Rephrase this like a pirate: {{body}}","display":"Rephrase this like a pirate: {{body}}","id":"80aaed6946eaa32040a7fb382921bfc6d2eb5abffbaea8395c194ad2bbad6094","provider":"custom provider","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":940,"tokenUsage":{"total":51,"prompt":33,"completion":18,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-26T17:44:59"}]},{"id":"60947f9048ba276b6ada9ff1a73f818ff1374ad15840a75594f6b8e7168fe904","count":3,"testCases":[{"vars":{"message":"Hello world"},"assert":[{"type":"similar","threshold":0.25,"value":"Greetings earth","provider":"./customProvider.js"}]}],"recentEvalDate":"2024-03-26T19:39:39.037Z","recentEvalId":"eval-2024-03-26T19:39:27","prompts":[{"id":"ca14273ccab7b5eddd64a32cf93a17a6b9c2e81fcd3a61388ecf69d557d27997","prompt":{"raw":"Translate this to French: {{message}}","display":"Translate this to French: {{message}}","id":"ca14273ccab7b5eddd64a32cf93a17a6b9c2e81fcd3a61388ecf69d557d27997","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.4015448202047296,"testPassCount":1,"testFailCount":0,"assertPassCount":1,"assertFailCount":0,"totalLatencyMs":396,"tokenUsage":{"total":17,"prompt":14,"completion":3,"cached":0},"namedScores":{},"cost":0.0000115}},"evalId":"eval-2024-03-26T19:39:27"}]},{"id":"7060ea5fe58d69ef89cc65663a0c4bddf6cb7d42fbdba49e330a1ec928febde1","count":5,"testCases":[{"vars":{"question":"What is the parental leave policy?","context":"file://./load_context.py"}}],"recentEvalDate":"2024-03-26T20:51:01.412Z","recentEvalId":"eval-2024-03-26T20:49:19","prompts":[{"id":"a868045dcd584f8841c0db5ea8395aabee75965420d6fb87f9f3d53fcd35d311","prompt":{"raw":"You are an office assistant. Using this context:\n\n<context>{{context}}</context>\n\nAnswer this question:\n\n<question>{{question}}</question>","display":"You are an office assistant. Using this context:\n\n<context>{{context}}</context>\n\nAnswer this question:\n\n<question>{{question}}</question>","id":"a868045dcd584f8841c0db5ea8395aabee75965420d6fb87f9f3d53fcd35d311","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2304,"tokenUsage":{"total":172,"prompt":47,"completion":125,"cached":0},"namedScores":{},"cost":0.000211}},"evalId":"eval-2024-03-26T20:49:19"}]}]}
1
+ {"error":"Not implemented"}
@@ -0,0 +1 @@
1
+ {"error":"Not implemented"}
@@ -1 +1 @@
1
- {"data":[{"count":1,"id":"8ab7905517748b35f9944d15c05b9677c8f16719ba5f34c46fc951c66f694dbc","prompt":{"raw":"You're an ecommerce chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","display":"You're an ecommerce chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","id":"8ab7905517748b35f9944d15c05b9677c8f16719ba5f34c46fc951c66f694dbc","provider":"openai:gpt-3.5-turbo","metrics":{"score":10,"testPassCount":10,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":25,"tokenUsage":{"total":961,"prompt":0,"completion":0,"cached":961},"namedScores":{},"cost":0.0010515}},"recentEvalDate":"2024-03-16T22:43:14.095Z","recentEvalId":"eval-2024-03-16T22:43:14","evals":[{"id":"eval-2024-03-16T22:43:14","datasetId":"b94facc99edd1198f2e1c0f5f880601fbcdb90c433ddfa8b0efef3e47e99f6e4","metrics":{"score":10,"testPassCount":10,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":25,"tokenUsage":{"total":961,"prompt":0,"completion":0,"cached":961},"namedScores":{},"cost":0.0010515}}]},{"count":1,"id":"ca95eca9bd1c96f1230b7895aea27ee4b3bc59b576af1f6e85ae8996ce57ca4d","prompt":{"raw":"You're a smart, bubbly chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","display":"You're a smart, bubbly chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","id":"ca95eca9bd1c96f1230b7895aea27ee4b3bc59b576af1f6e85ae8996ce57ca4d","provider":"openai:gpt-3.5-turbo","metrics":{"score":10,"testPassCount":10,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":19,"tokenUsage":{"total":1057,"prompt":0,"completion":0,"cached":1057},"namedScores":{},"cost":0.0011655}},"recentEvalDate":"2024-03-16T22:43:14.095Z","recentEvalId":"eval-2024-03-16T22:43:14","evals":[{"id":"eval-2024-03-16T22:43:14","datasetId":"b94facc99edd1198f2e1c0f5f880601fbcdb90c433ddfa8b0efef3e47e99f6e4","metrics":{"score":10,"testPassCount":10,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":19,"tokenUsage":{"total":1057,"prompt":0,"completion":0,"cached":1057},"namedScores":{},"cost":0.0011655}}]},{"count":1,"id":"ffc67b22cafd59f77984ef46882cb609f133a746c95e52815ba4d03552b66b55","prompt":{"raw":"Output a JSON object that contains the keys `color` and `location`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `location`, describing the following object: {{item}}","id":"ffc67b22cafd59f77984ef46882cb609f133a746c95e52815ba4d03552b66b55","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":734,"tokenUsage":{"total":46,"prompt":28,"completion":18,"cached":0},"namedScores":{},"cost":0.000041}},"recentEvalDate":"2024-03-17T16:13:02.163Z","recentEvalId":"eval-2024-03-17T16:13:02","evals":[{"id":"eval-2024-03-17T16:13:02","datasetId":"3a4469b21f1ebc825af2dfa2e90eef1f1cbe887647fe5a5595752f8ab467cb53","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":734,"tokenUsage":{"total":46,"prompt":28,"completion":18,"cached":0},"namedScores":{},"cost":0.000041}}]},{"count":1,"id":"726b1f21862e1e60c0bfd6fccf86e6c2a0f059f5a05d21b1ae957fd58236546e","prompt":{"raw":"Output a JSON object that contains the keys `color` and `country`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `country`, describing the following object: {{item}}","id":"726b1f21862e1e60c0bfd6fccf86e6c2a0f059f5a05d21b1ae957fd58236546e","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":801,"tokenUsage":{"total":46,"prompt":28,"completion":18,"cached":0},"namedScores":{},"cost":0.000041}},"recentEvalDate":"2024-03-17T16:13:16.198Z","recentEvalId":"eval-2024-03-17T16:13:16","evals":[{"id":"eval-2024-03-17T16:13:16","datasetId":"5f0e1c1f9537421c928d7254f62621147be5f1309fc69ef8fbfce2d8947141d7","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":801,"tokenUsage":{"total":46,"prompt":28,"completion":18,"cached":0},"namedScores":{},"cost":0.000041}}]},{"count":8,"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":801,"tokenUsage":{"total":55,"prompt":28,"completion":27,"cached":0},"namedScores":{},"cost":0.0000545}},"recentEvalDate":"2024-03-17T16:20:45.953Z","recentEvalId":"eval-2024-03-17T16:13:42","evals":[{"id":"eval-2024-03-17T16:13:42","datasetId":"69f1aef3cb6d0ed8e2e04309281cab002e18794bece04c15c7bb767353669a2f","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":801,"tokenUsage":{"total":55,"prompt":28,"completion":27,"cached":0},"namedScores":{},"cost":0.0000545}},{"id":"eval-2024-03-17T16:14:37","datasetId":"8ccb8b12c5decb58c1c49b9b7c69fb9909104e80d65e252d18d8842726885a51","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":2,"tokenUsage":{"total":55,"prompt":0,"completion":0,"cached":55},"namedScores":{},"cost":0.0000545}},{"id":"eval-2024-03-17T16:14:50","datasetId":"267be9d91be410004320457f7c3cffea2bdb40212b43336f09faa6b3a6bcf288","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":2,"assertFailCount":0,"totalLatencyMs":3,"tokenUsage":{"total":55,"prompt":0,"completion":0,"cached":55},"namedScores":{},"cost":0.0000545}},{"id":"eval-2024-03-17T16:14:58","datasetId":"740ba3562af04218834915872cb95bafc21424244d994df82a26fc52d7f963bf","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":3,"tokenUsage":{"total":55,"prompt":0,"completion":0,"cached":55},"namedScores":{},"cost":0.0000545}},{"id":"eval-2024-03-17T16:18:57","datasetId":"5120354632aa75999e26385ca8dccbe37cd8090b57e651250dc1b818482689bd","metrics":{"score":1.5,"testPassCount":1,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":916,"tokenUsage":{"total":114,"prompt":29,"completion":30,"cached":55},"namedScores":{},"cost":0.000114}},{"id":"eval-2024-03-17T16:19:24","datasetId":"f01d8461edb59e78b4c9585e29f3d97128312ff1afe1e102c84a848b6ef8e6cf","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":4,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":114,"prompt":0,"completion":0,"cached":114},"namedScores":{},"cost":0.000114}},{"id":"eval-2024-03-17T16:19:49","datasetId":"6adf3df5c8d39f806bd74fc8358a43748de4d553058a11492c2a0f86cd3c2343","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":5,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":114,"prompt":0,"completion":0,"cached":114},"namedScores":{},"cost":0.000114}},{"id":"eval-2024-03-17T16:20:45","datasetId":"6adf3df5c8d39f806bd74fc8358a43748de4d553058a11492c2a0f86cd3c2343","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":5,"assertFailCount":0,"totalLatencyMs":5,"tokenUsage":{"total":114,"prompt":0,"completion":0,"cached":114},"namedScores":{},"cost":0.000114}}]},{"count":2,"id":"becdaf0693b6fd6c31874ed94e869e1f1873195bc9c1d55c725e19a19ce322b0","prompt":{"raw":"[\n {\n \"role\": \"user\",\n \"content\": \"{{message}}\"\n }\n]\n","display":"chat_prompt","id":"becdaf0693b6fd6c31874ed94e869e1f1873195bc9c1d55c725e19a19ce322b0","provider":"openai-gpt-3.5-turbo-lowtemp","metrics":{"score":7,"testPassCount":7,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":15397,"tokenUsage":{"total":784,"prompt":140,"completion":644,"cached":0},"namedScores":{},"cost":0.001036}},"recentEvalDate":"2024-03-17T16:44:08.392Z","recentEvalId":"eval-2024-03-17T16:44:08","evals":[{"id":"eval-2024-03-17T16:44:08","datasetId":"e4fb5519d3de935fd0654ef637518bf12f598e9439957879b10c3ed4f4a471f9","metrics":{"score":7,"testPassCount":7,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":15397,"tokenUsage":{"total":784,"prompt":140,"completion":644,"cached":0},"namedScores":{},"cost":0.001036}},{"id":"eval-2024-03-17T16:44:08","datasetId":"e4fb5519d3de935fd0654ef637518bf12f598e9439957879b10c3ed4f4a471f9","metrics":{"score":6,"testPassCount":6,"testFailCount":2,"assertPassCount":2,"assertFailCount":2,"totalLatencyMs":14637,"tokenUsage":{"total":798,"prompt":140,"completion":658,"cached":0},"namedScores":{},"cost":0.001057}}]},{"count":1,"id":"4a112e4f3fabf536c3e9e59d0f740373a27afa4900bb7f8797fe9e196e807ccf","prompt":{"raw":"User: {{message}}\nAssistant:\n","display":"completion_prompt","id":"4a112e4f3fabf536c3e9e59d0f740373a27afa4900bb7f8797fe9e196e807ccf","provider":"llama70b-v2-chat","metrics":{"score":4,"testPassCount":4,"testFailCount":4,"assertPassCount":0,"assertFailCount":4,"totalLatencyMs":181485,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-17T16:44:08.392Z","recentEvalId":"eval-2024-03-17T16:44:08","evals":[{"id":"eval-2024-03-17T16:44:08","datasetId":"e4fb5519d3de935fd0654ef637518bf12f598e9439957879b10c3ed4f4a471f9","metrics":{"score":4,"testPassCount":4,"testFailCount":4,"assertPassCount":0,"assertFailCount":4,"totalLatencyMs":181485,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":6,"id":"a16d4ef27b2a481dd4458b07c50a3ac995454e0096f69580ed3d83ca4def05ed","prompt":{"raw":"Respond to the following instruction: {{message}}","display":"Respond to the following instruction: {{message}}","id":"a16d4ef27b2a481dd4458b07c50a3ac995454e0096f69580ed3d83ca4def05ed","provider":"openai-gpt-3.5-turbo-lowtemp","metrics":{"score":10,"testPassCount":10,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":25,"tokenUsage":{"total":1952,"prompt":0,"completion":0,"cached":1952},"namedScores":{},"cost":0.002676}},"recentEvalDate":"2024-03-17T16:50:47.503Z","recentEvalId":"eval-2024-03-17T16:45:58","evals":[{"id":"eval-2024-03-17T16:45:58","datasetId":"fa1afa682106fd06807468fb8b3098617288b2ab8dab40db0f38f7ba89f0e143","metrics":{"score":10,"testPassCount":10,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":25,"tokenUsage":{"total":1952,"prompt":0,"completion":0,"cached":1952},"namedScores":{},"cost":0.002676}},{"id":"eval-2024-03-17T16:45:58","datasetId":"fa1afa682106fd06807468fb8b3098617288b2ab8dab40db0f38f7ba89f0e143","metrics":{"score":9,"testPassCount":9,"testFailCount":2,"assertPassCount":2,"assertFailCount":2,"totalLatencyMs":26,"tokenUsage":{"total":2375,"prompt":0,"completion":0,"cached":2375},"namedScores":{},"cost":0.0033105}},{"id":"eval-2024-03-17T16:46:21","datasetId":"fa1afa682106fd06807468fb8b3098617288b2ab8dab40db0f38f7ba89f0e143","metrics":{"score":10,"testPassCount":10,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":31,"tokenUsage":{"total":1952,"prompt":0,"completion":0,"cached":1952},"namedScores":{},"cost":0.002676}},{"id":"eval-2024-03-17T16:46:21","datasetId":"fa1afa682106fd06807468fb8b3098617288b2ab8dab40db0f38f7ba89f0e143","metrics":{"score":9,"testPassCount":9,"testFailCount":2,"assertPassCount":2,"assertFailCount":2,"totalLatencyMs":32,"tokenUsage":{"total":2375,"prompt":0,"completion":0,"cached":2375},"namedScores":{},"cost":0.0033105}},{"id":"eval-2024-03-17T16:50:47","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":10,"testPassCount":10,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":26,"tokenUsage":{"total":1952,"prompt":0,"completion":0,"cached":1952},"namedScores":{},"cost":0.002676}},{"id":"eval-2024-03-17T16:50:47","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":9,"testPassCount":9,"testFailCount":2,"assertPassCount":2,"assertFailCount":2,"totalLatencyMs":27,"tokenUsage":{"total":2375,"prompt":0,"completion":0,"cached":2375},"namedScores":{},"cost":0.0033105}}]},{"count":18,"id":"621a88dd7b3e65c0ff8be5c49a61d31704b46e0d0e967c305fb1b5e82144c2bf","prompt":{"raw":"You are a helpful assistant. Reply with a concise answer to this inquiry: '{{question}}'","display":"You are a helpful assistant. Reply with a concise answer to this inquiry: '{{question}}'","id":"621a88dd7b3e65c0ff8be5c49a61d31704b46e0d0e967c305fb1b5e82144c2bf","provider":"vertex:gemini-pro","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":10805,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-26T19:55:56.615Z","recentEvalId":"eval-2024-03-17T16:59:26","evals":[{"id":"eval-2024-03-17T16:59:26","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":10805,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-17T16:59:26","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":10458,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T17:07:29","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":87301,"tokenUsage":{"total":1592,"prompt":1299,"completion":293,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T17:07:29","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":11476,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T17:15:59","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":90,"tokenUsage":{"total":1592,"prompt":1299,"completion":293,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T17:15:59","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":34365,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:28:54","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":58476,"tokenUsage":{"total":1543,"prompt":1299,"completion":244,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:28:54","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":53458,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:29:39","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":109,"tokenUsage":{"total":1543,"prompt":1299,"completion":244,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:29:39","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":105,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:31:58","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":130,"tokenUsage":{"total":1543,"prompt":1299,"completion":244,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:31:58","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":125,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:34:12","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":122,"tokenUsage":{"total":1543,"prompt":1299,"completion":244,"cached":1543},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:34:12","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":126,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T19:39:11","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":140,"tokenUsage":{"total":1543,"prompt":1299,"completion":244,"cached":1543},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T19:39:11","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":129,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T19:55:56","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":114,"tokenUsage":{"total":1543,"prompt":1299,"completion":244,"cached":1543},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T19:55:56","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":115,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":1,"id":"ac1aff107414ae39d0e1927ff6bc534210a8cb49ecfe8b92324d0665965f7b28","prompt":{"raw":"Say this as though you are a pirate: {{body}}","display":"Say this as though you are a pirate: {{body}}","id":"ac1aff107414ae39d0e1927ff6bc534210a8cb49ecfe8b92324d0665965f7b28","provider":"openai:gpt-3.5-turbo","metrics":{"score":1.6255363200489734,"testPassCount":1,"testFailCount":4,"assertPassCount":3,"assertFailCount":4,"totalLatencyMs":7148,"tokenUsage":{"total":464,"prompt":103,"completion":361,"cached":0},"namedScores":{"Tone":1.8766089601469207,"Consistency":0},"cost":0.000593}},"recentEvalDate":"2024-03-17T17:02:39.867Z","recentEvalId":"eval-2024-03-17T17:02:39","evals":[{"id":"eval-2024-03-17T17:02:39","datasetId":"1017b6c475f430ad3e152b001af93861211d94dde122791c59638a7511790990","metrics":{"score":1.6255363200489734,"testPassCount":1,"testFailCount":4,"assertPassCount":3,"assertFailCount":4,"totalLatencyMs":7148,"tokenUsage":{"total":464,"prompt":103,"completion":361,"cached":0},"namedScores":{"Tone":1.8766089601469207,"Consistency":0},"cost":0.000593}}]},{"count":1,"id":"012226aceef4f02a4b1eb26cbbd0819db6ad6ed541f3ed5b59c7f92761ae1b4f","prompt":{"raw":"Say this as though you are a seafarer from the 17th century: {{body}}","display":"Say this as though you are a seafarer from the 17th century: {{body}}","id":"012226aceef4f02a4b1eb26cbbd0819db6ad6ed541f3ed5b59c7f92761ae1b4f","provider":"openai:gpt-3.5-turbo","metrics":{"score":3.645861906333771,"testPassCount":3,"testFailCount":2,"assertPassCount":5,"assertFailCount":2,"totalLatencyMs":8346,"tokenUsage":{"total":519,"prompt":143,"completion":376,"cached":0},"namedScores":{"Tone":2.9375857190013135,"Consistency":1},"cost":0.0006355000000000001}},"recentEvalDate":"2024-03-17T17:02:39.867Z","recentEvalId":"eval-2024-03-17T17:02:39","evals":[{"id":"eval-2024-03-17T17:02:39","datasetId":"1017b6c475f430ad3e152b001af93861211d94dde122791c59638a7511790990","metrics":{"score":3.645861906333771,"testPassCount":3,"testFailCount":2,"assertPassCount":5,"assertFailCount":2,"totalLatencyMs":8346,"tokenUsage":{"total":519,"prompt":143,"completion":376,"cached":0},"namedScores":{"Tone":2.9375857190013135,"Consistency":1},"cost":0.0006355000000000001}}]},{"count":6,"id":"1b485883ec47e69c98372a768b270faf8897253cd139ada2599781992125f5fd","prompt":{"raw":"Write a tweet about bananas","display":"Write a tweet about bananas","id":"1b485883ec47e69c98372a768b270faf8897253cd139ada2599781992125f5fd","provider":"openai:chat:gpt-3.5-turbo","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":608,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-21T14:18:42.236Z","recentEvalId":"eval-2024-03-21T14:14:27","evals":[{"id":"eval-2024-03-21T14:14:27","datasetId":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":608,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-21T14:14:36","datasetId":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":622,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-21T14:16:38","datasetId":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":646,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-21T14:16:47","datasetId":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":465,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-21T14:17:38","datasetId":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":449,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-21T14:18:42","datasetId":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":292,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":92,"id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","prompt":{"raw":"Write a tweet about {{topic}}","display":"Write a tweet about {{topic}}","id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","provider":"azureopenai:asst_E4GyOBYKlnAzMi19SZF2Sn8I","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-25T13:07:54.922Z","recentEvalId":"eval-2024-03-22T04:31:27","evals":[{"id":"eval-2024-03-22T04:31:27","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:32:42","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:33:40","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:34:55","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:39:05","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2414,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:40:03","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:41:00","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:43:34","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:44:02","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:44:26","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:46:07","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:46:51","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:47:30","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:48:53","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":5183,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:29","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2828,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:29","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:29","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:54","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2829,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:54","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2875,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:54","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1022,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:13:12","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2423,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:13:12","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2409,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:13:12","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1924,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:13:45","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1783,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:13:45","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2722,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:13:45","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2214,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:15:00","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2282,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:15:00","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2353,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:15:00","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2073,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:15:24","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2528,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:15:24","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2126,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:15:24","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1832,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:16:11","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1857,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:16:11","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1799,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:16:11","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2763,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:16:51","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":321,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:16:51","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":204,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:16:51","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":204,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:29:23","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":305,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:29:23","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":303,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:29:23","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":318,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:29:31","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":387,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:29:31","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":318,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:29:31","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":300,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:30:59","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":359,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:30:59","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":295,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:30:59","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":361,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:31:43","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":716,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:31:43","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":760,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:31:43","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":793,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:31:50","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":722,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:31:50","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":805,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:31:50","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":806,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:32:35","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":525,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:32:35","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":834,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:32:35","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":763,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:34:56","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":721,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:34:56","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":775,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:34:56","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":753,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:35:22","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":743,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:35:22","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":735,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:35:22","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":510,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:36:01","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":708,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:36:01","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":693,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:36:01","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":759,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:41:36","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":726,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:41:36","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":745,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:41:36","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":535,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:42:54","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":509,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:42:54","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":717,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:42:54","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":745,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:43:54","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":734,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:43:54","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":718,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:43:54","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":739,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:20","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":717,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:20","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":764,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:20","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":733,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:26","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":509,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:26","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":706,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:26","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":738,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:52","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":474,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:52","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":455,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:52","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":737,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:45:53","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":535,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:45:53","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":753,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:45:53","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":745,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:48:02","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":304,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:48:02","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":318,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:48:02","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":382,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-23T14:12:01","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1952,"tokenUsage":{"total":68,"prompt":12,"completion":56,"cached":0},"namedScores":{},"cost":0.00009}},{"id":"eval-2024-03-23T14:12:01","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":3912,"tokenUsage":{"total":89,"prompt":12,"completion":77,"cached":0},"namedScores":{},"cost":0.00243}},{"id":"eval-2024-03-25T13:07:54","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":3,"id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","prompt":{"raw":"{{output}}","display":"{{output}}","id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","provider":"echo","metrics":{"score":1.3971734892787524,"testPassCount":1,"testFailCount":2,"assertPassCount":7,"assertFailCount":2,"totalLatencyMs":7,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-22T19:44:13.597Z","recentEvalId":"eval-2024-03-22T19:43:44","evals":[{"id":"eval-2024-03-22T19:43:44","datasetId":"f34954fbbe9e53ee4541ae7bd834851e724419383d1c5d69276084f36f52bc19","metrics":{"score":1.3971734892787524,"testPassCount":1,"testFailCount":2,"assertPassCount":7,"assertFailCount":2,"totalLatencyMs":7,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:43:48","datasetId":"08dd5c7b0ed259e207805468c2d7ec70b6598a2deba182e635849391df127b0d","metrics":{"score":2.7943469785575052,"testPassCount":2,"testFailCount":4,"assertPassCount":14,"assertFailCount":4,"totalLatencyMs":12,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:44:13","datasetId":"a174ea017053d7e4c2a7871b881c14aa718428cf7e6dace78d95a1064c8cfa21","metrics":{"score":1.3971734892787524,"testPassCount":1,"testFailCount":2,"assertPassCount":7,"assertFailCount":2,"totalLatencyMs":5,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":3,"id":"34af43a2fbe7bb87bbe84f882c70380de41bfd23a0a97363a509ce7cfcc043df","prompt":{"raw":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"","display":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"","id":"34af43a2fbe7bb87bbe84f882c70380de41bfd23a0a97363a509ce7cfcc043df","provider":"bedrock:anthropic.claude-v2","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9240,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-22T20:11:10.956Z","recentEvalId":"eval-2024-03-22T20:08:27","evals":[{"id":"eval-2024-03-22T20:08:27","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9240,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:09:44","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":7325,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:10","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":5401,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":3,"id":"b8f071e6116bba0a8a68fde9a9edffadd11cbd6a25b47a60c9ea204b2ec829f5","prompt":{"raw":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"\n\n- Think carefully & step-by-step.\n- Only use information available on Wikipedia.\n- You must answer the question directly, without speculation.\n- You cannot access realtime information. Consider whether the answer may have changed in the 2 years since your knowledge cutoff.\n- If you are not confident in your answer, begin your response with \"Unsure\".","display":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"\n\n- Think carefully & step-by-step.\n- Only use information available on Wikipedia.\n- You must answer the question directly, without speculation.\n- You cannot access realtime information. Consider whether the answer may have changed in the 2 years since your knowledge cutoff.\n- If you are not confident in your answer, begin your response with \"Unsure\".","id":"b8f071e6116bba0a8a68fde9a9edffadd11cbd6a25b47a60c9ea204b2ec829f5","provider":"bedrock:anthropic.claude-v2","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9147,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-22T20:11:10.956Z","recentEvalId":"eval-2024-03-22T20:08:27","evals":[{"id":"eval-2024-03-22T20:08:27","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9147,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:09:44","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":7849,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:10","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":4699,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":2,"id":"982e2daba2e45daa63007e0713738f8cb47c07a372becd90661ec209f021cc76","prompt":{"raw":"Convert this English to {{language}}: {{input}}","display":"Convert this English to {{language}}: {{input}}","id":"982e2daba2e45daa63007e0713738f8cb47c07a372becd90661ec209f021cc76","provider":"bedrock:completion:anthropic.claude-instant-v1","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":787,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-22T20:08:40.637Z","recentEvalId":"eval-2024-03-22T20:08:40","evals":[{"id":"eval-2024-03-22T20:08:40","datasetId":"f76f0be5e2f2d66234ec747945bc9984c590415a0c9bf9b15fbf60e582d6f57d","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":787,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:08:40","datasetId":"f76f0be5e2f2d66234ec747945bc9984c590415a0c9bf9b15fbf60e582d6f57d","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":335,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":2,"id":"1cd6d24599bd30b06147480d8c35ca4b3ffb0c41385df8f0624099444fb68ae8","prompt":{"raw":"Translate to {{language}}: {{input}}","display":"Translate to {{language}}: {{input}}","id":"1cd6d24599bd30b06147480d8c35ca4b3ffb0c41385df8f0624099444fb68ae8","provider":"bedrock:completion:anthropic.claude-instant-v1","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":782,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-22T20:08:40.637Z","recentEvalId":"eval-2024-03-22T20:08:40","evals":[{"id":"eval-2024-03-22T20:08:40","datasetId":"f76f0be5e2f2d66234ec747945bc9984c590415a0c9bf9b15fbf60e582d6f57d","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":782,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:08:40","datasetId":"f76f0be5e2f2d66234ec747945bc9984c590415a0c9bf9b15fbf60e582d6f57d","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":343,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":6,"id":"6d5d84ed58b6b318281b920268b95e46484e5cc2ea15f83b934934506aa3e112","prompt":{"raw":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You're an angry pirate. Be concise and stay in character.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","display":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You're an angry pirate. Be concise and stay in character.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","id":"6d5d84ed58b6b318281b920268b95e46484e5cc2ea15f83b934934506aa3e112","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8676,"tokenUsage":{"total":458,"prompt":121,"completion":337,"cached":0},"namedScores":{},"cost":0.000566}},"recentEvalDate":"2024-03-23T00:00:21.342Z","recentEvalId":"eval-2024-03-22T23:55:00","evals":[{"id":"eval-2024-03-22T23:55:00","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8676,"tokenUsage":{"total":458,"prompt":121,"completion":337,"cached":0},"namedScores":{},"cost":0.000566}},{"id":"eval-2024-03-22T23:56:08","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":458,"prompt":0,"completion":0,"cached":458},"namedScores":{},"cost":0.000566}},{"id":"eval-2024-03-22T23:56:38","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8,"tokenUsage":{"total":458,"prompt":0,"completion":0,"cached":458},"namedScores":{},"cost":0.000566}},{"id":"eval-2024-03-22T23:56:53","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":10,"tokenUsage":{"total":458,"prompt":0,"completion":0,"cached":458},"namedScores":{},"cost":0.000566}},{"id":"eval-2024-03-22T23:59:05","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1868,"tokenUsage":{"total":470,"prompt":29,"completion":80,"cached":361},"namedScores":{},"cost":0.0005870000000000001}},{"id":"eval-2024-03-23T00:00:21","datasetId":"7a2cb71d85f7e4a785153cbf17c12e19a569915f85024f10e8d112b1ea1b2ea1","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":7339,"tokenUsage":{"total":456,"prompt":119,"completion":337,"cached":0},"namedScores":{},"cost":0.000565}}]},{"count":5,"id":"488f98e20c64231cfa1fa75aa3453961fae63164b6684ecd24cb997fe8142645","prompt":{"raw":"async function ({ vars }) {\n return `Imagine you're an angry pirate. Be concise and stay in character. Tell me about ${vars.topic}`;\n}","display":"async function ({ vars }) {\n return `Imagine you're an angry pirate. Be concise and stay in character. Tell me about ${vars.topic}`;\n}","id":"488f98e20c64231cfa1fa75aa3453961fae63164b6684ecd24cb997fe8142645","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":12002,"tokenUsage":{"total":606,"prompt":106,"completion":500,"cached":0},"namedScores":{},"cost":0.000803}},"recentEvalDate":"2024-03-23T00:00:21.342Z","recentEvalId":"eval-2024-03-22T23:55:00","evals":[{"id":"eval-2024-03-22T23:55:00","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":12002,"tokenUsage":{"total":606,"prompt":106,"completion":500,"cached":0},"namedScores":{},"cost":0.000803}},{"id":"eval-2024-03-22T23:56:08","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1738,"tokenUsage":{"total":554,"prompt":35,"completion":59,"cached":460},"namedScores":{},"cost":0.000716}},{"id":"eval-2024-03-22T23:56:53","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":12,"tokenUsage":{"total":606,"prompt":0,"completion":0,"cached":606},"namedScores":{},"cost":0.000803}},{"id":"eval-2024-03-22T23:59:05","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":17,"tokenUsage":{"total":606,"prompt":0,"completion":0,"cached":606},"namedScores":{},"cost":0.000803}},{"id":"eval-2024-03-23T00:00:21","datasetId":"7a2cb71d85f7e4a785153cbf17c12e19a569915f85024f10e8d112b1ea1b2ea1","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":11872,"tokenUsage":{"total":612,"prompt":107,"completion":505,"cached":0},"namedScores":{},"cost":0.000811}}]},{"count":3,"id":"e85d1e5589dfd8831379af0660d6d0bf99f81671308c9c74e2a46150227842cc","prompt":{"raw":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You do not answer questions. You only make wolf noises.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","display":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You do not answer questions. You only make wolf noises.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","id":"e85d1e5589dfd8831379af0660d6d0bf99f81671308c9c74e2a46150227842cc","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2688,"tokenUsage":{"total":133,"prompt":86,"completion":14,"cached":33},"namedScores":{},"cost":0.00008549999999999999}},"recentEvalDate":"2024-03-23T00:00:21.342Z","recentEvalId":"eval-2024-03-22T23:55:00","evals":[{"id":"eval-2024-03-22T23:55:00","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2688,"tokenUsage":{"total":133,"prompt":86,"completion":14,"cached":33},"namedScores":{},"cost":0.00008549999999999999}},{"id":"eval-2024-03-22T23:56:08","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":518,"tokenUsage":{"total":142,"prompt":37,"completion":4,"cached":101},"namedScores":{},"cost":0.00008999999999999999}},{"id":"eval-2024-03-23T00:00:21","datasetId":"7a2cb71d85f7e4a785153cbf17c12e19a569915f85024f10e8d112b1ea1b2ea1","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2935,"tokenUsage":{"total":135,"prompt":115,"completion":20,"cached":0},"namedScores":{},"cost":0.00008749999999999999}}]},{"count":6,"id":"51519e8874147b854779b876a95e19f6440037da60dbd77c58ee292c9e4d2f84","prompt":{"raw":"import sys\nimport json\n\ndef prompt1(context):\n return f'Write a one-sentence insult for anyone who likes {context[\"vars\"][\"topic\"]}.'\n\ndef generate_prompt(context):\n return f'Describe {context[\"vars\"][\"topic\"]} concisely, comparing it to the Python programming language.'\n\nif __name__ == '__main__':\n print(generate_prompt(json.loads(sys.argv[1])))\n","display":"import sys\nimport json\n\ndef prompt1(context):\n return f'Write a one-sentence insult for anyone who likes {context[\"vars\"][\"topic\"]}.'\n\ndef generate_prompt(context):\n return f'Describe {context[\"vars\"][\"topic\"]} concisely, comparing it to the Python programming language.'\n\nif __name__ == '__main__':\n print(generate_prompt(json.loads(sys.argv[1])))\n","id":"51519e8874147b854779b876a95e19f6440037da60dbd77c58ee292c9e4d2f84","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":4832,"tokenUsage":{"total":281,"prompt":68,"completion":167,"cached":46},"namedScores":{},"cost":0.0003315}},"recentEvalDate":"2024-03-23T00:00:21.342Z","recentEvalId":"eval-2024-03-22T23:55:00","evals":[{"id":"eval-2024-03-22T23:55:00","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":4832,"tokenUsage":{"total":281,"prompt":68,"completion":167,"cached":46},"namedScores":{},"cost":0.0003315}},{"id":"eval-2024-03-22T23:55:00","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":3612,"tokenUsage":{"total":149,"prompt":82,"completion":67,"cached":0},"namedScores":{},"cost":0.00014150000000000002}},{"id":"eval-2024-03-22T23:56:08","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1313,"tokenUsage":{"total":274,"prompt":31,"completion":19,"cached":224},"namedScores":{},"cost":0.000312}},{"id":"eval-2024-03-22T23:56:08","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":857,"tokenUsage":{"total":157,"prompt":28,"completion":19,"cached":110},"namedScores":{},"cost":0.0001455}},{"id":"eval-2024-03-23T00:00:21","datasetId":"7a2cb71d85f7e4a785153cbf17c12e19a569915f85024f10e8d112b1ea1b2ea1","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":6173,"tokenUsage":{"total":288,"prompt":91,"completion":197,"cached":0},"namedScores":{},"cost":0.000341}},{"id":"eval-2024-03-23T00:00:21","datasetId":"7a2cb71d85f7e4a785153cbf17c12e19a569915f85024f10e8d112b1ea1b2ea1","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":3608,"tokenUsage":{"total":149,"prompt":83,"completion":66,"cached":0},"namedScores":{},"cost":0.0001405}}]},{"count":3,"id":"82ea990e1aa6c114aa940e7e717c174aa458efa93e6c1bb1854c6f552a30f722","prompt":{"raw":"Write an instagram post about {{topic}}\n","display":"Write an instagram post about {{topic}}\n","id":"82ea990e1aa6c114aa940e7e717c174aa458efa93e6c1bb1854c6f552a30f722","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8851,"tokenUsage":{"total":363,"prompt":14,"completion":349,"cached":0},"namedScores":{},"cost":0.0005304999999999999}},"recentEvalDate":"2024-03-23T14:13:18.732Z","recentEvalId":"eval-2024-03-23T14:12:31","evals":[{"id":"eval-2024-03-23T14:12:31","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8851,"tokenUsage":{"total":363,"prompt":14,"completion":349,"cached":0},"namedScores":{},"cost":0.0005304999999999999}},{"id":"eval-2024-03-23T14:12:31","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9308,"tokenUsage":{"total":235,"prompt":14,"completion":221,"cached":0},"namedScores":{},"cost":0.00677}},{"id":"eval-2024-03-23T14:13:18","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":3,"tokenUsage":{"total":235,"prompt":0,"completion":0,"cached":235},"namedScores":{},"cost":0.00677}}]},{"count":3,"id":"7bdb27cd8dfa793744585b9c10d626040ca40c5fb1b1171827b56a8208ee4329","prompt":{"raw":"Write a tweet about {{topic}}\n","display":"Write a tweet about {{topic}}\n","id":"7bdb27cd8dfa793744585b9c10d626040ca40c5fb1b1171827b56a8208ee4329","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1855,"tokenUsage":{"total":69,"prompt":13,"completion":56,"cached":0},"namedScores":{},"cost":0.0000905}},"recentEvalDate":"2024-03-23T14:13:18.732Z","recentEvalId":"eval-2024-03-23T14:12:31","evals":[{"id":"eval-2024-03-23T14:12:31","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1855,"tokenUsage":{"total":69,"prompt":13,"completion":56,"cached":0},"namedScores":{},"cost":0.0000905}},{"id":"eval-2024-03-23T14:12:31","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":3348,"tokenUsage":{"total":74,"prompt":13,"completion":61,"cached":0},"namedScores":{},"cost":0.00196}},{"id":"eval-2024-03-23T14:13:18","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2,"tokenUsage":{"total":74,"prompt":0,"completion":0,"cached":74},"namedScores":{},"cost":0.00196}}]},{"count":1,"id":"ff529e7dc5641b7c96178c86718adfda3e5a96c8d5991ff07ef90577dc210ab6","prompt":{"raw":"Rephrase this in {{language}}: {{body | allcaps}}","display":"Rephrase this in {{language}}: {{body | allcaps}}","id":"ff529e7dc5641b7c96178c86718adfda3e5a96c8d5991ff07ef90577dc210ab6","provider":"openai:gpt-3.5-turbo","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":720,"tokenUsage":{"total":47,"prompt":35,"completion":12,"cached":0},"namedScores":{},"cost":0.0000355}},"recentEvalDate":"2024-03-25T13:00:19.393Z","recentEvalId":"eval-2024-03-25T13:00:19","evals":[{"id":"eval-2024-03-25T13:00:19","datasetId":"fd77de338af61b19edc0ce79be47635f14edd4bb3ff9a10c012e93612aaff3e2","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":720,"tokenUsage":{"total":47,"prompt":35,"completion":12,"cached":0},"namedScores":{},"cost":0.0000355}}]},{"count":1,"id":"bf842b0e19dc5aa76fa49be8449fbf76d5c055f758008ebbc448ed8e6a7a4943","prompt":{"raw":"Translate this to conversational {{language}}: {{body | allcaps}}","display":"Translate this to conversational {{language}}: {{body | allcaps}}","id":"bf842b0e19dc5aa76fa49be8449fbf76d5c055f758008ebbc448ed8e6a7a4943","provider":"openai:gpt-3.5-turbo","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2145,"tokenUsage":{"total":50,"prompt":37,"completion":13,"cached":0},"namedScores":{},"cost":0.000038}},"recentEvalDate":"2024-03-25T13:00:19.393Z","recentEvalId":"eval-2024-03-25T13:00:19","evals":[{"id":"eval-2024-03-25T13:00:19","datasetId":"fd77de338af61b19edc0ce79be47635f14edd4bb3ff9a10c012e93612aaff3e2","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2145,"tokenUsage":{"total":50,"prompt":37,"completion":13,"cached":0},"namedScores":{},"cost":0.000038}}]},{"count":2,"id":"60ddfbfcc7c5d5cc068c1f3e5316f9f6cef884aed326d98f4b5181a8d8de1e40","prompt":{"raw":"Generate one very interesting fact about {{topic}}","display":"Generate one very interesting fact about {{topic}}","id":"60ddfbfcc7c5d5cc068c1f3e5316f9f6cef884aed326d98f4b5181a8d8de1e40","provider":"azureopenai:chat:gpt-35-turbo-deployment1","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-25T13:11:17.083Z","recentEvalId":"eval-2024-03-25T13:11:10","evals":[{"id":"eval-2024-03-25T13:11:10","datasetId":"a2df04f077e9acdec3e4dab07c8cc870f5a038838e595610f8ea561ec04c4318","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-25T13:11:17","datasetId":"a2df04f077e9acdec3e4dab07c8cc870f5a038838e595610f8ea561ec04c4318","metrics":{"score":1.5,"testPassCount":1,"testFailCount":1,"assertPassCount":2,"assertFailCount":1,"totalLatencyMs":1443,"tokenUsage":{"total":76,"prompt":30,"completion":46,"cached":0},"namedScores":{},"cost":0}}]},{"count":3,"id":"a67a87b900d7de9c3dd19633b800ebe86b568db68f28d879d0b8a4e41a05e3cc","prompt":{"raw":"Rephrase this in French: {{body}}","display":"Rephrase this in French: {{body}}","id":"a67a87b900d7de9c3dd19633b800ebe86b568db68f28d879d0b8a4e41a05e3cc","provider":"customProvider.js","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1066,"tokenUsage":{"total":39,"prompt":31,"completion":8,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-26T17:44:59.979Z","recentEvalId":"eval-2024-03-26T14:15:17","evals":[{"id":"eval-2024-03-26T14:15:17","datasetId":"f220e2e4353e25bc8cc30c5865fcca3e57a756492c6ec87da2a8da39a1bba774","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1066,"tokenUsage":{"total":39,"prompt":31,"completion":8,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T14:15:17","datasetId":"f220e2e4353e25bc8cc30c5865fcca3e57a756492c6ec87da2a8da39a1bba774","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":807,"tokenUsage":{"total":39,"prompt":31,"completion":8,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T17:44:59","datasetId":"5fdf2d34df8100f9ee35a16711575a490c63d15438a03df15f2dfda1802aeda1","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1076,"tokenUsage":{"total":39,"prompt":31,"completion":8,"cached":0},"namedScores":{},"cost":0}}]},{"count":3,"id":"80aaed6946eaa32040a7fb382921bfc6d2eb5abffbaea8395c194ad2bbad6094","prompt":{"raw":"Rephrase this like a pirate: {{body}}","display":"Rephrase this like a pirate: {{body}}","id":"80aaed6946eaa32040a7fb382921bfc6d2eb5abffbaea8395c194ad2bbad6094","provider":"customProvider.js","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1182,"tokenUsage":{"total":53,"prompt":33,"completion":20,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-26T17:44:59.979Z","recentEvalId":"eval-2024-03-26T14:15:17","evals":[{"id":"eval-2024-03-26T14:15:17","datasetId":"f220e2e4353e25bc8cc30c5865fcca3e57a756492c6ec87da2a8da39a1bba774","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1182,"tokenUsage":{"total":53,"prompt":33,"completion":20,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T14:15:17","datasetId":"f220e2e4353e25bc8cc30c5865fcca3e57a756492c6ec87da2a8da39a1bba774","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":788,"tokenUsage":{"total":48,"prompt":33,"completion":15,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T17:44:59","datasetId":"5fdf2d34df8100f9ee35a16711575a490c63d15438a03df15f2dfda1802aeda1","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":940,"tokenUsage":{"total":51,"prompt":33,"completion":18,"cached":0},"namedScores":{},"cost":0}}]},{"count":3,"id":"ca14273ccab7b5eddd64a32cf93a17a6b9c2e81fcd3a61388ecf69d557d27997","prompt":{"raw":"Translate this to French: {{message}}","display":"Translate this to French: {{message}}","id":"ca14273ccab7b5eddd64a32cf93a17a6b9c2e81fcd3a61388ecf69d557d27997","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.4015448202047296,"testPassCount":1,"testFailCount":0,"assertPassCount":1,"assertFailCount":0,"totalLatencyMs":396,"tokenUsage":{"total":17,"prompt":14,"completion":3,"cached":0},"namedScores":{},"cost":0.0000115}},"recentEvalDate":"2024-03-26T19:39:39.037Z","recentEvalId":"eval-2024-03-26T19:39:27","evals":[{"id":"eval-2024-03-26T19:39:27","datasetId":"60947f9048ba276b6ada9ff1a73f818ff1374ad15840a75594f6b8e7168fe904","metrics":{"score":0.4015448202047296,"testPassCount":1,"testFailCount":0,"assertPassCount":1,"assertFailCount":0,"totalLatencyMs":396,"tokenUsage":{"total":17,"prompt":14,"completion":3,"cached":0},"namedScores":{},"cost":0.0000115}},{"id":"eval-2024-03-26T19:39:36","datasetId":"60947f9048ba276b6ada9ff1a73f818ff1374ad15840a75594f6b8e7168fe904","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T19:39:39","datasetId":"60947f9048ba276b6ada9ff1a73f818ff1374ad15840a75594f6b8e7168fe904","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":5,"id":"a868045dcd584f8841c0db5ea8395aabee75965420d6fb87f9f3d53fcd35d311","prompt":{"raw":"You are an office assistant. Using this context:\n\n<context>{{context}}</context>\n\nAnswer this question:\n\n<question>{{question}}</question>","display":"You are an office assistant. Using this context:\n\n<context>{{context}}</context>\n\nAnswer this question:\n\n<question>{{question}}</question>","id":"a868045dcd584f8841c0db5ea8395aabee75965420d6fb87f9f3d53fcd35d311","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2304,"tokenUsage":{"total":172,"prompt":47,"completion":125,"cached":0},"namedScores":{},"cost":0.000211}},"recentEvalDate":"2024-03-26T20:51:01.412Z","recentEvalId":"eval-2024-03-26T20:49:19","evals":[{"id":"eval-2024-03-26T20:49:19","datasetId":"7060ea5fe58d69ef89cc65663a0c4bddf6cb7d42fbdba49e330a1ec928febde1","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2304,"tokenUsage":{"total":172,"prompt":47,"completion":125,"cached":0},"namedScores":{},"cost":0.000211}},{"id":"eval-2024-03-26T20:49:40","datasetId":"7060ea5fe58d69ef89cc65663a0c4bddf6cb7d42fbdba49e330a1ec928febde1","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1,"tokenUsage":{"total":172,"prompt":0,"completion":0,"cached":172},"namedScores":{},"cost":0.000211}},{"id":"eval-2024-03-26T20:50:03","datasetId":"7060ea5fe58d69ef89cc65663a0c4bddf6cb7d42fbdba49e330a1ec928febde1","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2,"tokenUsage":{"total":172,"prompt":0,"completion":0,"cached":172},"namedScores":{},"cost":0.000211}},{"id":"eval-2024-03-26T20:50:29","datasetId":"7060ea5fe58d69ef89cc65663a0c4bddf6cb7d42fbdba49e330a1ec928febde1","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2,"tokenUsage":{"total":172,"prompt":0,"completion":0,"cached":172},"namedScores":{},"cost":0.000211}},{"id":"eval-2024-03-26T20:51:01","datasetId":"7060ea5fe58d69ef89cc65663a0c4bddf6cb7d42fbdba49e330a1ec928febde1","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2,"tokenUsage":{"total":172,"prompt":0,"completion":0,"cached":172},"namedScores":{},"cost":0.000211}}]}]}
1
+ {"error":"Not implemented"}
@@ -1 +1 @@
1
- {"data":[{"id":"eval-2024-04-01T14:09:47","label":"eval-2024-04-01T14:09:47"},{"id":"eval-2024-04-01T14:08:52","label":"eval-2024-04-01T14:08:52"},{"id":"eval-2024-04-01T14:08:34","label":"eval-2024-04-01T14:08:34"},{"id":"eval-2024-04-01T13:41:02","label":"eval-2024-04-01T13:41:02"},{"id":"eval-2024-04-01T13:40:43","label":"eval-2024-04-01T13:40:43"},{"id":"eval-2024-04-01T13:40:29","label":"eval-2024-04-01T13:40:29"},{"id":"eval-2024-04-01T12:57:44","label":"Transform with external function (eval-2024-04-01T12:57:44)"},{"id":"eval-2024-04-01T12:57:10","label":"Transform with external function (eval-2024-04-01T12:57:10)"},{"id":"eval-2024-03-31T21:19:32","label":"eval-2024-03-31T21:19:32"},{"id":"eval-2024-03-31T00:47:02","label":"eval-2024-03-31T00:47:02"},{"id":"eval-2024-03-31T00:18:09","label":"eval-2024-03-31T00:18:09"},{"id":"eval-2024-03-31T00:13:39","label":"eval-2024-03-31T00:13:39"},{"id":"eval-2024-03-31T00:13:15","label":"eval-2024-03-31T00:13:15"},{"id":"eval-2024-03-30T18:05:30","label":"eval-2024-03-30T18:05:30"},{"id":"eval-2024-03-30T18:04:39","label":"eval-2024-03-30T18:04:39"},{"id":"eval-2024-03-30T18:04:24","label":"eval-2024-03-30T18:04:24"},{"id":"eval-2024-03-30T17:24:41","label":"eval-2024-03-30T17:24:41"},{"id":"eval-2024-03-30T16:38:25","label":"eval-2024-03-30T16:38:25"},{"id":"eval-2024-03-30T16:38:17","label":"eval-2024-03-30T16:38:17"},{"id":"eval-2024-03-30T16:15:15","label":"eval-2024-03-30T16:15:15"},{"id":"eval-2024-03-30T16:14:18","label":"eval-2024-03-30T16:14:18"},{"id":"eval-2024-03-30T13:14:40","label":"eval-2024-03-30T13:14:40"},{"id":"eval-2024-03-30T13:14:04","label":"eval-2024-03-30T13:14:04"},{"id":"eval-2024-03-30T12:46:52","label":"eval-2024-03-30T12:46:52"},{"id":"eval-2024-03-30T12:44:44","label":"eval-2024-03-30T12:44:44"},{"id":"eval-2024-03-29T13:08:13","label":"eval-2024-03-29T13:08:13"},{"id":"eval-2024-03-29T13:05:16","label":"eval-2024-03-29T13:05:16"},{"id":"eval-2024-03-28T23:22:52","label":"eval-2024-03-28T23:22:52"},{"id":"eval-2024-03-28T23:13:26","label":"eval-2024-03-28T23:13:26"},{"id":"eval-2024-03-28T23:12:59","label":"eval-2024-03-28T23:12:59"},{"id":"eval-2024-03-28T23:11:09","label":"eval-2024-03-28T23:11:09"},{"id":"eval-2024-03-28T23:10:28","label":"eval-2024-03-28T23:10:28"},{"id":"eval-2024-03-28T23:08:32","label":"eval-2024-03-28T23:08:32"},{"id":"eval-2024-03-28T23:07:53","label":"eval-2024-03-28T23:07:53"},{"id":"eval-2024-03-28T23:07:36","label":"eval-2024-03-28T23:07:36"},{"id":"eval-2024-03-28T23:04:22","label":"eval-2024-03-28T23:04:22"},{"id":"eval-2024-03-28T23:04:10","label":"eval-2024-03-28T23:04:10"},{"id":"eval-2024-03-28T23:02:54","label":"eval-2024-03-28T23:02:54"},{"id":"eval-2024-03-28T22:56:47","label":"eval-2024-03-28T22:56:47"},{"id":"eval-2024-03-28T22:47:05","label":"eval-2024-03-28T22:47:05"},{"id":"eval-2024-03-28T22:46:50","label":"eval-2024-03-28T22:46:50"},{"id":"eval-2024-03-27T18:53:44","label":"eval-2024-03-27T18:53:44"},{"id":"eval-2024-03-27T17:15:35","label":"eval-2024-03-27T17:15:35"},{"id":"eval-2024-03-27T17:14:25","label":"eval-2024-03-27T17:14:25"},{"id":"eval-2024-03-27T17:14:08","label":"eval-2024-03-27T17:14:08"},{"id":"eval-2024-03-27T17:13:40","label":"eval-2024-03-27T17:13:40"},{"id":"eval-2024-03-27T12:55:28","label":"Simple test (eval-2024-03-27T12:55:28)"},{"id":"eval-2024-03-27T12:55:24","label":"Simple test (eval-2024-03-27T12:55:24)"},{"id":"eval-2024-03-26T20:56:31","label":"Dynamic var example (eval-2024-03-26T20:56:31)"},{"id":"eval-2024-03-26T20:54:38","label":"Dynamic var example (eval-2024-03-26T20:54:38)"},{"id":"eval-2024-03-26T20:54:22","label":"Dynamic var example (eval-2024-03-26T20:54:22)"},{"id":"eval-2024-03-26T20:53:19","label":"Dynamic var example (eval-2024-03-26T20:53:19)"},{"id":"eval-2024-03-26T20:52:29","label":"Dynamic var example (eval-2024-03-26T20:52:29)"},{"id":"eval-2024-03-26T20:52:14","label":"Dynamic var example (eval-2024-03-26T20:52:14)"},{"id":"eval-2024-03-26T20:51:52","label":"Dynamic var example (eval-2024-03-26T20:51:52)"},{"id":"eval-2024-03-26T20:51:18","label":"Dynamic var example (eval-2024-03-26T20:51:18)"},{"id":"eval-2024-03-26T20:51:01","label":"Dynamic var example (eval-2024-03-26T20:51:01)"},{"id":"eval-2024-03-26T20:50:29","label":"Dynamic var example (eval-2024-03-26T20:50:29)"},{"id":"eval-2024-03-26T20:50:03","label":"Dynamic var example (eval-2024-03-26T20:50:03)"},{"id":"eval-2024-03-26T20:49:40","label":"Dynamic var example (eval-2024-03-26T20:49:40)"},{"id":"eval-2024-03-26T20:49:19","label":"Dynamic var example (eval-2024-03-26T20:49:19)"},{"id":"eval-2024-03-26T19:55:56","label":"eval-2024-03-26T19:55:56"},{"id":"eval-2024-03-26T19:39:39","label":"eval-2024-03-26T19:39:39"},{"id":"eval-2024-03-26T19:39:36","label":"eval-2024-03-26T19:39:36"},{"id":"eval-2024-03-26T19:39:27","label":"eval-2024-03-26T19:39:27"},{"id":"eval-2024-03-26T19:39:11","label":"eval-2024-03-26T19:39:11"},{"id":"eval-2024-03-26T17:44:59","label":"eval-2024-03-26T17:44:59"},{"id":"eval-2024-03-26T14:15:17","label":"eval-2024-03-26T14:15:17"},{"id":"eval-2024-03-25T13:11:17","label":"eval-2024-03-25T13:11:17"},{"id":"eval-2024-03-25T13:11:10","label":"eval-2024-03-25T13:11:10"},{"id":"eval-2024-03-25T13:07:54","label":"Azure OpenAI assistant (eval-2024-03-25T13:07:54)"},{"id":"eval-2024-03-25T13:00:19","label":"eval-2024-03-25T13:00:19"},{"id":"eval-2024-03-23T14:13:18","label":"Glob test (eval-2024-03-23T14:13:18)"},{"id":"eval-2024-03-23T14:12:31","label":"Glob test (eval-2024-03-23T14:12:31)"},{"id":"eval-2024-03-23T14:12:01","label":"Glob test (eval-2024-03-23T14:12:01)"},{"id":"eval-2024-03-23T00:00:21","label":"eval-2024-03-23T00:00:21"},{"id":"eval-2024-03-22T23:59:05","label":"eval-2024-03-22T23:59:05"},{"id":"eval-2024-03-22T23:56:53","label":"eval-2024-03-22T23:56:53"},{"id":"eval-2024-03-22T23:56:38","label":"eval-2024-03-22T23:56:38"},{"id":"eval-2024-03-22T23:56:08","label":"eval-2024-03-22T23:56:08"},{"id":"eval-2024-03-22T23:55:00","label":"eval-2024-03-22T23:55:00"},{"id":"eval-2024-03-22T20:48:02","label":"eval-2024-03-22T20:48:02"},{"id":"eval-2024-03-22T20:45:53","label":"eval-2024-03-22T20:45:53"},{"id":"eval-2024-03-22T20:44:52","label":"eval-2024-03-22T20:44:52"},{"id":"eval-2024-03-22T20:44:26","label":"eval-2024-03-22T20:44:26"},{"id":"eval-2024-03-22T20:44:20","label":"eval-2024-03-22T20:44:20"},{"id":"eval-2024-03-22T20:43:54","label":"eval-2024-03-22T20:43:54"},{"id":"eval-2024-03-22T20:42:54","label":"eval-2024-03-22T20:42:54"},{"id":"eval-2024-03-22T20:41:36","label":"eval-2024-03-22T20:41:36"},{"id":"eval-2024-03-22T20:36:01","label":"eval-2024-03-22T20:36:01"},{"id":"eval-2024-03-22T20:35:22","label":"eval-2024-03-22T20:35:22"},{"id":"eval-2024-03-22T20:34:56","label":"eval-2024-03-22T20:34:56"},{"id":"eval-2024-03-22T20:32:35","label":"eval-2024-03-22T20:32:35"},{"id":"eval-2024-03-22T20:31:50","label":"eval-2024-03-22T20:31:50"},{"id":"eval-2024-03-22T20:31:43","label":"eval-2024-03-22T20:31:43"},{"id":"eval-2024-03-22T20:30:59","label":"eval-2024-03-22T20:30:59"},{"id":"eval-2024-03-22T20:29:31","label":"eval-2024-03-22T20:29:31"},{"id":"eval-2024-03-22T20:29:23","label":"eval-2024-03-22T20:29:23"},{"id":"eval-2024-03-22T20:16:51","label":"eval-2024-03-22T20:16:51"},{"id":"eval-2024-03-22T20:16:11","label":"eval-2024-03-22T20:16:11"}]}
1
+ {"error":"Not implemented"}