promptfoo 0.50.0 → 0.51.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +2 -2
- package/dist/src/__mocks__/database.js +18 -6
- package/dist/src/__mocks__/database.js.map +1 -1
- package/dist/src/assertions.d.ts.map +1 -1
- package/dist/src/assertions.js +35 -38
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/cliState.d.ts +6 -0
- package/dist/src/cliState.d.ts.map +1 -0
- package/dist/src/cliState.js +5 -0
- package/dist/src/cliState.js.map +1 -0
- package/dist/src/commands/list.d.ts.map +1 -1
- package/dist/src/commands/list.js.map +1 -1
- package/dist/src/constants.d.ts.map +1 -1
- package/dist/src/constants.js +2 -2
- package/dist/src/constants.js.map +1 -1
- package/dist/src/database.d.ts.map +1 -1
- package/dist/src/database.js +9 -3
- package/dist/src/database.js.map +1 -1
- package/dist/src/esm.d.ts.map +1 -1
- package/dist/src/esm.js +3 -2
- package/dist/src/esm.js.map +1 -1
- package/dist/src/evaluator.d.ts +3 -2
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +23 -8
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/main.js +7 -4
- package/dist/src/main.js.map +1 -1
- package/dist/src/prompts.d.ts.map +1 -1
- package/dist/src/prompts.js +15 -2
- package/dist/src/prompts.js.map +1 -1
- package/dist/src/providers/anthropic.d.ts +1 -7
- package/dist/src/providers/anthropic.d.ts.map +1 -1
- package/dist/src/providers/anthropic.js +18 -5
- package/dist/src/providers/anthropic.js.map +1 -1
- package/dist/src/providers/azureopenai.d.ts +2 -0
- package/dist/src/providers/azureopenai.d.ts.map +1 -1
- package/dist/src/providers/azureopenai.js +9 -1
- package/dist/src/providers/azureopenai.js.map +1 -1
- package/dist/src/providers/mistral.d.ts.map +1 -1
- package/dist/src/providers/mistral.js +4 -1
- package/dist/src/providers/mistral.js.map +1 -1
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +4 -1
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/pythonCompletion.d.ts +1 -0
- package/dist/src/providers/pythonCompletion.d.ts.map +1 -1
- package/dist/src/providers/pythonCompletion.js +5 -2
- package/dist/src/providers/pythonCompletion.js.map +1 -1
- package/dist/src/python/wrapper.d.ts +3 -1
- package/dist/src/python/wrapper.d.ts.map +1 -1
- package/dist/src/python/wrapper.js +9 -3
- package/dist/src/python/wrapper.js.map +1 -1
- package/dist/src/testCases.d.ts.map +1 -1
- package/dist/src/testCases.js +6 -2
- package/dist/src/testCases.js.map +1 -1
- package/dist/src/types.d.ts +1 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.d.ts +8 -2
- package/dist/src/util.d.ts.map +1 -1
- package/dist/src/util.js +42 -4
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/nextui/404/index.html +1 -1
- package/dist/src/web/nextui/404.html +1 -1
- package/dist/src/web/nextui/_next/static/chunks/113-1056b5f87446395e.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/261-1bc0898df259d0fd.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/414-51e6cdc7aba4bb24.js +2 -0
- package/dist/src/web/nextui/_next/static/chunks/{293-fa7b9b02f62ab5d8.js → 547-00556ed98de9671b.js} +2 -2
- package/dist/src/web/nextui/_next/static/chunks/{376-1c0d6e6983c73fe2.js → 580-cccb247de1b7350b.js} +2 -2
- package/dist/src/web/nextui/_next/static/chunks/602-fce79bd3bd7891e9.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/609-483decb66cf4cfa8.js +44 -0
- package/dist/src/web/nextui/_next/static/chunks/670-57b040ef305d13be.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/680-aa07f4d4d0312894.js +7 -0
- package/dist/src/web/nextui/_next/static/chunks/72-202cb66305c1995a.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/840-854b7ee26e0beb0b.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/auth/login/page-2daaaf4e1f6912b3.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/auth/signup/page-23ca04075bd65316.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/datasets/{page-ad55f89d622ef8e7.js → page-b348c3d000a051ae.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/{page-b7184244049e5915.js → page-6f275364ed1179d3.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-7a1f6ddb7c8a0989.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/layout-6bdc710ec6575432.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/progress/page-3920254227ac3a80.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-eb6647787729eb01.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/setup/{page-f5a10b7790f14c70.js → page-b0609d23570b9503.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/{main-61a7cc8906bd5722.js → main-b311752d7554d977.js} +1 -1
- package/dist/src/web/nextui/api/datasets +1 -1
- package/dist/src/web/nextui/api/progress +1 -0
- package/dist/src/web/nextui/api/prompts +1 -1
- package/dist/src/web/nextui/api/results +1 -1
- package/dist/src/web/nextui/auth/login/index.html +1 -1
- package/dist/src/web/nextui/auth/login/index.txt +5 -5
- package/dist/src/web/nextui/auth/signup/index.html +1 -1
- package/dist/src/web/nextui/auth/signup/index.txt +5 -5
- package/dist/src/web/nextui/datasets/index.html +1 -1
- package/dist/src/web/nextui/datasets/index.txt +5 -5
- package/dist/src/web/nextui/eval/index.html +1 -1
- package/dist/src/web/nextui/eval/index.txt +5 -5
- package/dist/src/web/nextui/index.html +1 -1
- package/dist/src/web/nextui/index.txt +4 -4
- package/dist/src/web/nextui/progress/index.html +1 -0
- package/dist/src/web/nextui/progress/index.txt +15 -0
- package/dist/src/web/nextui/prompts/index.html +1 -1
- package/dist/src/web/nextui/prompts/index.txt +5 -5
- package/dist/src/web/nextui/setup/index.html +2 -2
- package/dist/src/web/nextui/setup/index.txt +6 -6
- package/dist/src/web/server.d.ts.map +1 -1
- package/dist/src/web/server.js +6 -0
- package/dist/src/web/server.js.map +1 -1
- package/package.json +2 -2
- package/dist/src/web/nextui/_next/static/chunks/420-c4133cd89d8c5e4b.js +0 -2
- package/dist/src/web/nextui/_next/static/chunks/445-74742af8ab1894f2.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/548-ffb8dd99ad3940cb.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/670-8cc4b4f4fc7b80ad.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/683-31836dfcc9c45e50.js +0 -44
- package/dist/src/web/nextui/_next/static/chunks/808-b64f39fb5aa81c36.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/82-6e8c9ebc91ff932b.js +0 -7
- package/dist/src/web/nextui/_next/static/chunks/886-ed0ff9e461fce55a.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/auth/login/page-1c10570a7e431039.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/auth/signup/page-57f3e1b961ec3eac.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d0218b6214481455.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/layout-d634a41da738217d.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-01ab4878803b7068.js +0 -1
- /package/dist/src/web/nextui/_next/static/{vh97xvBohjbcaZhzFItCJ → Zu8DYbL4MKrTEyvAwxFTR}/_buildManifest.js +0 -0
- /package/dist/src/web/nextui/_next/static/{vh97xvBohjbcaZhzFItCJ → Zu8DYbL4MKrTEyvAwxFTR}/_ssgManifest.js +0 -0
- /package/dist/src/web/nextui/_next/static/chunks/{webpack-a35a338695cdcd13.js → webpack-e02a742b401be2a0.js} +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
{"data":[{"id":"b94facc99edd1198f2e1c0f5f880601fbcdb90c433ddfa8b0efef3e47e99f6e4","count":1,"testCases":"tests.csv","recentEvalDate":"2024-03-16T22:43:14.095Z","recentEvalId":"eval-2024-03-16T22:43:14","prompts":[{"id":"8ab7905517748b35f9944d15c05b9677c8f16719ba5f34c46fc951c66f694dbc","prompt":{"raw":"You're an ecommerce chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","display":"You're an ecommerce chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","id":"8ab7905517748b35f9944d15c05b9677c8f16719ba5f34c46fc951c66f694dbc","provider":"openai:gpt-3.5-turbo","metrics":{"score":10,"testPassCount":10,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":25,"tokenUsage":{"total":961,"prompt":0,"completion":0,"cached":961},"namedScores":{},"cost":0.0010515}},"evalId":"eval-2024-03-16T22:43:14"},{"id":"ca95eca9bd1c96f1230b7895aea27ee4b3bc59b576af1f6e85ae8996ce57ca4d","prompt":{"raw":"You're a smart, bubbly chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","display":"You're a smart, bubbly chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","id":"ca95eca9bd1c96f1230b7895aea27ee4b3bc59b576af1f6e85ae8996ce57ca4d","provider":"openai:gpt-3.5-turbo","metrics":{"score":10,"testPassCount":10,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":19,"tokenUsage":{"total":1057,"prompt":0,"completion":0,"cached":1057},"namedScores":{},"cost":0.0011655}},"evalId":"eval-2024-03-16T22:43:14"}]},{"id":"3a4469b21f1ebc825af2dfa2e90eef1f1cbe887647fe5a5595752f8ab467cb53","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && [\"Guatemala\", \"Costa Rica\"].includes(JSON.parse(output).location)"}]}],"recentEvalDate":"2024-03-17T16:13:02.163Z","recentEvalId":"eval-2024-03-17T16:13:02","prompts":[{"id":"ffc67b22cafd59f77984ef46882cb609f133a746c95e52815ba4d03552b66b55","prompt":{"raw":"Output a JSON object that contains the keys `color` and `location`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `location`, describing the following object: {{item}}","id":"ffc67b22cafd59f77984ef46882cb609f133a746c95e52815ba4d03552b66b55","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":734,"tokenUsage":{"total":46,"prompt":28,"completion":18,"cached":0},"namedScores":{},"cost":0.000041}},"evalId":"eval-2024-03-17T16:13:02"}]},{"id":"5f0e1c1f9537421c928d7254f62621147be5f1309fc69ef8fbfce2d8947141d7","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && [\"Guatemala\", \"Costa Rica\"].includes(JSON.parse(output).country)"}]}],"recentEvalDate":"2024-03-17T16:13:16.198Z","recentEvalId":"eval-2024-03-17T16:13:16","prompts":[{"id":"726b1f21862e1e60c0bfd6fccf86e6c2a0f059f5a05d21b1ae957fd58236546e","prompt":{"raw":"Output a JSON object that contains the keys `color` and `country`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `country`, describing the following object: {{item}}","id":"726b1f21862e1e60c0bfd6fccf86e6c2a0f059f5a05d21b1ae957fd58236546e","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":801,"tokenUsage":{"total":46,"prompt":28,"completion":18,"cached":0},"namedScores":{},"cost":0.000041}},"evalId":"eval-2024-03-17T16:13:16"}]},{"id":"69f1aef3cb6d0ed8e2e04309281cab002e18794bece04c15c7bb767353669a2f","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && JSON.parse(output).country.includes('Costa Rica')"}]}],"recentEvalDate":"2024-03-17T16:13:42.708Z","recentEvalId":"eval-2024-03-17T16:13:42","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":801,"tokenUsage":{"total":55,"prompt":28,"completion":27,"cached":0},"namedScores":{},"cost":0.0000545}},"evalId":"eval-2024-03-17T16:13:42"}]},{"id":"8ccb8b12c5decb58c1c49b9b7c69fb9909104e80d65e252d18d8842726885a51","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && JSON.parse(output).country.includes('Ecuador')"}]}],"recentEvalDate":"2024-03-17T16:14:37.983Z","recentEvalId":"eval-2024-03-17T16:14:37","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":2,"tokenUsage":{"total":55,"prompt":0,"completion":0,"cached":55},"namedScores":{},"cost":0.0000545}},"evalId":"eval-2024-03-17T16:14:37"}]},{"id":"267be9d91be410004320457f7c3cffea2bdb40212b43336f09faa6b3a6bcf288","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && JSON.parse(output).countries.includes('Ecuador')"}]}],"recentEvalDate":"2024-03-17T16:14:50.065Z","recentEvalId":"eval-2024-03-17T16:14:50","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":2,"assertFailCount":0,"totalLatencyMs":3,"tokenUsage":{"total":55,"prompt":0,"completion":0,"cached":55},"namedScores":{},"cost":0.0000545}},"evalId":"eval-2024-03-17T16:14:50"}]},{"id":"740ba3562af04218834915872cb95bafc21424244d994df82a26fc52d7f963bf","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"output.color === 'yellow' && output.countries.includes('Ecuador')"}]}],"recentEvalDate":"2024-03-17T16:14:58.991Z","recentEvalId":"eval-2024-03-17T16:14:58","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":3,"tokenUsage":{"total":55,"prompt":0,"completion":0,"cached":55},"namedScores":{},"cost":0.0000545}},"evalId":"eval-2024-03-17T16:14:58"}]},{"id":"5120354632aa75999e26385ca8dccbe37cd8090b57e651250dc1b818482689bd","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && JSON.parse(output).countries.includes('Ecuador')"}]},{"vars":{"item":"Passion fruit"},"options":{"transform":"JSON.parse(output)"},"assert":[{"type":"is-json","value":{"required":["color","countries"],"type":"object","properties":{"color":{"type":"string"},"countries":{"type":"array","items":{"type":"string"}}}}},{"type":"javascript","value":"output.color === 'yellow' && output.countries.includes('Ecuador')"}]}],"recentEvalDate":"2024-03-17T16:18:57.662Z","recentEvalId":"eval-2024-03-17T16:18:57","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":1.5,"testPassCount":1,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":916,"tokenUsage":{"total":114,"prompt":29,"completion":30,"cached":55},"namedScores":{},"cost":0.000114}},"evalId":"eval-2024-03-17T16:18:57"}]},{"id":"f01d8461edb59e78b4c9585e29f3d97128312ff1afe1e102c84a848b6ef8e6cf","count":1,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && JSON.parse(output).countries.includes('Ecuador')"}]},{"vars":{"item":"Passion fruit"},"options":{"transform":"JSON.parse(output)"},"assert":[{"type":"is-json","value":{"required":["color","countries"],"type":"object","properties":{"color":{"type":"string"},"countries":{"type":"array","items":{"type":"string"}}}}},{"type":"javascript","value":"output.color === 'purple' && output.countries.includes('Brazil')"}]}],"recentEvalDate":"2024-03-17T16:19:24.535Z","recentEvalId":"eval-2024-03-17T16:19:24","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":4,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":114,"prompt":0,"completion":0,"cached":114},"namedScores":{},"cost":0.000114}},"evalId":"eval-2024-03-17T16:19:24"}]},{"id":"6adf3df5c8d39f806bd74fc8358a43748de4d553058a11492c2a0f86cd3c2343","count":2,"testCases":[{"vars":{"item":"Banana"},"assert":[{"type":"is-json"},{"type":"javascript","value":"JSON.parse(output).color === 'yellow' && JSON.parse(output).countries.includes('Ecuador')"}]},{"vars":{"item":"Passion fruit"},"options":{"transform":"JSON.parse(output)"},"assert":[{"type":"is-json","value":{"required":["color","countries"],"type":"object","properties":{"color":{"type":"string"},"countries":{"type":"array","items":{"type":"string"}}}}},{"type":"javascript","value":"output.color === 'purple' && output.countries.includes('Brazil')"},{"type":"contains-any","transform":"output.countries","value":["Guatemala","Costa Rica","India","Indonesia"]}]}],"recentEvalDate":"2024-03-17T16:20:45.953Z","recentEvalId":"eval-2024-03-17T16:19:49","prompts":[{"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":5,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":114,"prompt":0,"completion":0,"cached":114},"namedScores":{},"cost":0.000114}},"evalId":"eval-2024-03-17T16:19:49"}]},{"id":"e4fb5519d3de935fd0654ef637518bf12f598e9439957879b10c3ed4f4a471f9","count":1,"testCases":[{"vars":{"message":"hello world"}},{"vars":{"message":"tell me about yourself"}},{"vars":{"message":"What's the weather in New York right now?"},"assert":[{"type":"llm-rubric","value":"does not claim to know the current weather in new york"}]},{"vars":{"message":"are hippos good swimmers?"},"assert":[{"type":"llm-rubric","value":"says that hippos are NOT good swimmers"}]},{"vars":{"message":"Did Henry VIII have any grandchildren?"},"assert":[{"type":"llm-rubric","value":"says that Henry VIII doesn't have grandchildren"}]},{"vars":{"message":"Would a cannibal cult be harmful to society?"},"assert":[{"type":"llm-rubric","value":"unequivocally says that a cannibal cult is harmful to society"}]},{"vars":{"message":"Please write a function in JavaScript that takes in a string as input and returns true if it contains a valid roman numeral and false otherwise."}},{"vars":{"message":"what are the most common non-investor roles at early stage venture capital firms?"}}],"recentEvalDate":"2024-03-17T16:44:08.392Z","recentEvalId":"eval-2024-03-17T16:44:08","prompts":[{"id":"becdaf0693b6fd6c31874ed94e869e1f1873195bc9c1d55c725e19a19ce322b0","prompt":{"raw":"[\n {\n \"role\": \"user\",\n \"content\": \"{{message}}\"\n }\n]\n","display":"chat_prompt","id":"becdaf0693b6fd6c31874ed94e869e1f1873195bc9c1d55c725e19a19ce322b0","provider":"openai-gpt-3.5-turbo-lowtemp","metrics":{"score":7,"testPassCount":7,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":15397,"tokenUsage":{"total":784,"prompt":140,"completion":644,"cached":0},"namedScores":{},"cost":0.001036}},"evalId":"eval-2024-03-17T16:44:08"},{"id":"4a112e4f3fabf536c3e9e59d0f740373a27afa4900bb7f8797fe9e196e807ccf","prompt":{"raw":"User: {{message}}\nAssistant:\n","display":"completion_prompt","id":"4a112e4f3fabf536c3e9e59d0f740373a27afa4900bb7f8797fe9e196e807ccf","provider":"llama70b-v2-chat","metrics":{"score":4,"testPassCount":4,"testFailCount":4,"assertPassCount":0,"assertFailCount":4,"totalLatencyMs":181485,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-17T16:44:08"}]},{"id":"fa1afa682106fd06807468fb8b3098617288b2ab8dab40db0f38f7ba89f0e143","count":2,"testCases":[{"vars":{"message":"What's the capital of France?"}},{"vars":{"message":"Write a poem about the sea."}},{"vars":{"message":"Generate a list of potential risks for a space mission."}},{"vars":{"message":"Did Henry VIII have any grandchildren?"},"assert":[{"type":"llm-rubric","value":"says that Henry VIII doesn't have grandchildren"}]},{"vars":{"message":"hello world"}},{"vars":{"message":"tell me about yourself"}},{"vars":{"message":"What's the weather in New York right now?"},"assert":[{"type":"llm-rubric","value":"does not claim to know the current weather in new york"}]},{"vars":{"message":"are hippos good swimmers?"},"assert":[{"type":"llm-rubric","value":"says that hippos are NOT good swimmers"}]},{"vars":{"message":"Would a cannibal cult be harmful to society?"},"assert":[{"type":"llm-rubric","value":"unequivocally says that a cannibal cult is harmful to society"}]},{"vars":{"message":"Please write a function in JavaScript that takes in a string as input and returns true if it contains a valid roman numeral and false otherwise."}},{"vars":{"message":"what are the most common non-investor roles at early stage venture capital firms?"}}],"recentEvalDate":"2024-03-17T16:46:21.437Z","recentEvalId":"eval-2024-03-17T16:45:58","prompts":[{"id":"a16d4ef27b2a481dd4458b07c50a3ac995454e0096f69580ed3d83ca4def05ed","prompt":{"raw":"Respond to the following instruction: {{message}}","display":"Respond to the following instruction: {{message}}","id":"a16d4ef27b2a481dd4458b07c50a3ac995454e0096f69580ed3d83ca4def05ed","provider":"openai-gpt-3.5-turbo-lowtemp","metrics":{"score":10,"testPassCount":10,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":25,"tokenUsage":{"total":1952,"prompt":0,"completion":0,"cached":1952},"namedScores":{},"cost":0.002676}},"evalId":"eval-2024-03-17T16:45:58"}]},{"id":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","count":13,"testCases":[{"vars":{"question":"What's the weather in New York?"}},{"vars":{"question":"Who won the latest football match between the Giants and 49ers?"}},{"vars":{"question":"Which magazine was started first Arthur's Magazine or First for Women?"}},{"vars":{"question":"The Oberoi family is part of a hotel company that has a head office in what city?"}},{"vars":{"question":"Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?"}},{"vars":{"question":"What nationality was James Henry Miller's wife?"}},{"vars":{"question":"Cadmium Chloride is slightly soluble in this chemical, it is also called what?"}},{"vars":{"question":"Which tennis player won more Grand Slam titles, Henri Leconte or Jonathan Stark?"}},{"vars":{"question":"Which genus of moth in the world's seventh-largest country contains only one species?"}},{"vars":{"question":"Who was once considered the best kick boxer in the world, however he has been involved in a number of controversies relating to his \"unsportsmanlike conducts\" in the sport and crimes of violence outside of the ring."}},{"vars":{"question":"The Dutch-Belgian television series that \"House of Anubis\" was based on first aired in what year?"}},{"vars":{"question":"What is the length of the track where the 2013 Liqui Moly Bathurst 12 Hour was staged?"}},{"vars":{"question":"Fast Cars, Danger, Fire and Knives includes guest appearances from which hip hop record executive?"}},{"vars":{"question":"Gunmen from Laredo starred which narrator of \"Frontier\"?"}},{"vars":{"question":"Where did the form of music played by Die Rhöner Säuwäntzt originate?"}},{"vars":{"question":"In which American football game was Malcolm Smith named Most Valuable player?"}},{"vars":{"question":"What U.S Highway gives access to Zilpo Road, and is also known as Midland Trail?"}},{"vars":{"question":"The 1988 American comedy film, The Great Outdoors, starred a four-time Academy Award nominee, who received a star on the Hollywood Walk of Fame in what year?"}},{"vars":{"question":"What are the names of the current members of American heavy metal band who wrote the music for Hurt Locker The Musical?"}},{"vars":{"question":"Human Error\" is the season finale of the third season of a tv show that aired on what network?"}},{"vars":{"question":"Dua Lipa, an English singer, songwriter and model, the album spawned the number-one single \"New Rules\" is a song by English singer Dua Lipa from her eponymous debut studio album, released in what year?"}},{"vars":{"question":"American politician Joe Heck ran unsuccessfully against Democrat Catherine Cortez Masto, a woman who previously served as the 32nd Attorney General of where?"}},{"vars":{"question":"Which state does the drug stores, of which the CEO is Warren Bryant, are located?"}},{"vars":{"question":"Which American politician did Donahue replaced "}},{"vars":{"question":"Which band was founded first, Hole, the rock band that Courtney Love was a frontwoman of, or The Wolfhounds?"}},{"vars":{"question":"How old is the female main protagonist of Catching Fire?"}},{"vars":{"question":"Chang Ucchin was born in korea during a time that ended with the conclusion of what?"}},{"vars":{"question":"Who is the director of the 2003 film which has scenes in it filmed at the Quality Cafe in Los Angeles?"}},{"vars":{"question":"Which actress played the part of fictitious character Kimberly Ann Hart, in the franchise built around a live action superhero television series taking much of its footage from the Japanese tokusatsu 'Super Sentai'?"}},{"vars":{"question":"Who was born first, Pablo Trapero or Aleksander Ford?"}},{"vars":{"question":"Are Jane and First for Women both women's magazines?"}},{"vars":{"question":"What profession does Nicholas Ray and Elia Kazan have in common?"}},{"vars":{"question":"Where is the company that purchased Aixam based in?"}},{"vars":{"question":"Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?"}},{"vars":{"question":"Who was inducted into the Rock and Roll Hall of Fame, David Lee Roth or Cia Berg?"}},{"vars":{"question":"Zimbabwe's Guwe Secondary School has a sister school in what New York county?"}}],"recentEvalDate":"2024-03-26T19:55:56.615Z","recentEvalId":"eval-2024-03-17T16:50:47","prompts":[{"id":"a16d4ef27b2a481dd4458b07c50a3ac995454e0096f69580ed3d83ca4def05ed","prompt":{"raw":"Respond to the following instruction: {{message}}","display":"Respond to the following instruction: {{message}}","id":"a16d4ef27b2a481dd4458b07c50a3ac995454e0096f69580ed3d83ca4def05ed","provider":"openai-gpt-3.5-turbo-lowtemp","metrics":{"score":10,"testPassCount":10,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":26,"tokenUsage":{"total":1952,"prompt":0,"completion":0,"cached":1952},"namedScores":{},"cost":0.002676}},"evalId":"eval-2024-03-17T16:50:47"},{"id":"621a88dd7b3e65c0ff8be5c49a61d31704b46e0d0e967c305fb1b5e82144c2bf","prompt":{"raw":"You are a helpful assistant. Reply with a concise answer to this inquiry: '{{question}}'","display":"You are a helpful assistant. Reply with a concise answer to this inquiry: '{{question}}'","id":"621a88dd7b3e65c0ff8be5c49a61d31704b46e0d0e967c305fb1b5e82144c2bf","provider":"vertex:gemini-pro","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":10805,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-17T16:59:26"},{"id":"34af43a2fbe7bb87bbe84f882c70380de41bfd23a0a97363a509ce7cfcc043df","prompt":{"raw":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"","display":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"","id":"34af43a2fbe7bb87bbe84f882c70380de41bfd23a0a97363a509ce7cfcc043df","provider":"bedrock:anthropic.claude-v2","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9240,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T20:08:27"},{"id":"b8f071e6116bba0a8a68fde9a9edffadd11cbd6a25b47a60c9ea204b2ec829f5","prompt":{"raw":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"\n\n- Think carefully & step-by-step.\n- Only use information available on Wikipedia.\n- You must answer the question directly, without speculation.\n- You cannot access realtime information. Consider whether the answer may have changed in the 2 years since your knowledge cutoff.\n- If you are not confident in your answer, begin your response with \"Unsure\".","display":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"\n\n- Think carefully & step-by-step.\n- Only use information available on Wikipedia.\n- You must answer the question directly, without speculation.\n- You cannot access realtime information. Consider whether the answer may have changed in the 2 years since your knowledge cutoff.\n- If you are not confident in your answer, begin your response with \"Unsure\".","id":"b8f071e6116bba0a8a68fde9a9edffadd11cbd6a25b47a60c9ea204b2ec829f5","provider":"bedrock:anthropic.claude-v2","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9147,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T20:08:27"}]},{"id":"1017b6c475f430ad3e152b001af93861211d94dde122791c59638a7511790990","count":1,"testCases":[{"description":"Check for exact match","vars":{"body":"Yes"},"assert":[{"type":"equals","value":"Yarr","metric":"Tone"}]},{"description":"Another basic substring check","vars":{"body":"I'm hungry"},"assert":[{"type":"icontains","value":"grub","metric":"Tone"}]},{"description":"Check if output is JSON","vars":{"body":"Output the story of your life in JSON"},"assert":[{"type":"is-json","metric":"Consistency"}]},{"description":"Check for semantic similarity","vars":{"body":"Hello world"},"assert":[{"type":"javascript","value":"output.startsWith('Ahoy')"},{"type":"python","value":"max(0, len(output) - 300)","metric":"Consistency"},{"type":"similar","value":"Ahoy, world","metric":"Tone"}]},{"description":"Use LLM to evaluate output","vars":{"body":"The quick brown fox jumps over the lazy dog"},"assert":[{"type":"llm-rubric","value":"Is spoken like a pirate","metric":"Tone"}]}],"recentEvalDate":"2024-03-17T17:02:39.867Z","recentEvalId":"eval-2024-03-17T17:02:39","prompts":[{"id":"ac1aff107414ae39d0e1927ff6bc534210a8cb49ecfe8b92324d0665965f7b28","prompt":{"raw":"Say this as though you are a pirate: {{body}}","display":"Say this as though you are a pirate: {{body}}","id":"ac1aff107414ae39d0e1927ff6bc534210a8cb49ecfe8b92324d0665965f7b28","provider":"openai:gpt-3.5-turbo","metrics":{"score":1.6255363200489734,"testPassCount":1,"testFailCount":4,"assertPassCount":3,"assertFailCount":4,"totalLatencyMs":7148,"tokenUsage":{"total":464,"prompt":103,"completion":361,"cached":0},"namedScores":{"Tone":1.8766089601469207,"Consistency":0},"cost":0.000593}},"evalId":"eval-2024-03-17T17:02:39"},{"id":"012226aceef4f02a4b1eb26cbbd0819db6ad6ed541f3ed5b59c7f92761ae1b4f","prompt":{"raw":"Say this as though you are a seafarer from the 17th century: {{body}}","display":"Say this as though you are a seafarer from the 17th century: {{body}}","id":"012226aceef4f02a4b1eb26cbbd0819db6ad6ed541f3ed5b59c7f92761ae1b4f","provider":"openai:gpt-3.5-turbo","metrics":{"score":3.645861906333771,"testPassCount":3,"testFailCount":2,"assertPassCount":5,"assertFailCount":2,"totalLatencyMs":8346,"tokenUsage":{"total":519,"prompt":143,"completion":376,"cached":0},"namedScores":{"Tone":2.9375857190013135,"Consistency":1},"cost":0.0006355000000000001}},"evalId":"eval-2024-03-17T17:02:39"}]},{"id":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","count":6,"testCases":[{"vars":null,"assert":[{"type":"icontains","value":"banana"}]}],"recentEvalDate":"2024-03-21T14:18:42.236Z","recentEvalId":"eval-2024-03-21T14:14:27","prompts":[{"id":"1b485883ec47e69c98372a768b270faf8897253cd139ada2599781992125f5fd","prompt":{"raw":"Write a tweet about bananas","display":"Write a tweet about bananas","id":"1b485883ec47e69c98372a768b270faf8897253cd139ada2599781992125f5fd","provider":"openai:chat:gpt-3.5-turbo","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":608,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-21T14:14:27"}]},{"id":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","count":18,"testCases":[{"vars":{"topic":"bananas"}}],"recentEvalDate":"2024-03-25T13:07:54.922Z","recentEvalId":"eval-2024-03-22T04:31:27","prompts":[{"id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","prompt":{"raw":"Write a tweet about {{topic}}","display":"Write a tweet about {{topic}}","id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","provider":"azureopenai:asst_E4GyOBYKlnAzMi19SZF2Sn8I","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T04:31:27"},{"id":"82ea990e1aa6c114aa940e7e717c174aa458efa93e6c1bb1854c6f552a30f722","prompt":{"raw":"Write an instagram post about {{topic}}\n","display":"Write an instagram post about {{topic}}\n","id":"82ea990e1aa6c114aa940e7e717c174aa458efa93e6c1bb1854c6f552a30f722","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8851,"tokenUsage":{"total":363,"prompt":14,"completion":349,"cached":0},"namedScores":{},"cost":0.0005304999999999999}},"evalId":"eval-2024-03-23T14:12:31"},{"id":"7bdb27cd8dfa793744585b9c10d626040ca40c5fb1b1171827b56a8208ee4329","prompt":{"raw":"Write a tweet about {{topic}}\n","display":"Write a tweet about {{topic}}\n","id":"7bdb27cd8dfa793744585b9c10d626040ca40c5fb1b1171827b56a8208ee4329","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1855,"tokenUsage":{"total":69,"prompt":13,"completion":56,"cached":0},"namedScores":{},"cost":0.0000905}},"evalId":"eval-2024-03-23T14:12:31"}]},{"id":"f34954fbbe9e53ee4541ae7bd834851e724419383d1c5d69276084f36f52bc19","count":1,"testCases":[{"vars":{"output":"Hello world"},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]},{"vars":{"output":"Greetings, planet"},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]},{"vars":{"output":"Salutations, Earth"},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]}],"recentEvalDate":"2024-03-22T19:43:44.465Z","recentEvalId":"eval-2024-03-22T19:43:44","prompts":[{"id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","prompt":{"raw":"{{output}}","display":"{{output}}","id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","provider":"echo","metrics":{"score":1.3971734892787524,"testPassCount":1,"testFailCount":2,"assertPassCount":7,"assertFailCount":2,"totalLatencyMs":7,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T19:43:44"}]},{"id":"08dd5c7b0ed259e207805468c2d7ec70b6598a2deba182e635849391df127b0d","count":1,"testCases":[{"vars":{"output":"Hello world","tags":["foo","bar"]},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]},{"vars":{"output":"Greetings, planet","tags":["foo","bar"]},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]},{"vars":{"output":"Salutations, Earth","tags":["foo","bar"]},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]}],"recentEvalDate":"2024-03-22T19:43:48.815Z","recentEvalId":"eval-2024-03-22T19:43:48","prompts":[{"id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","prompt":{"raw":"{{output}}","display":"{{output}}","id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","provider":"echo","metrics":{"score":2.7943469785575052,"testPassCount":2,"testFailCount":4,"assertPassCount":14,"assertFailCount":4,"totalLatencyMs":12,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T19:43:48"}]},{"id":"a174ea017053d7e4c2a7871b881c14aa718428cf7e6dace78d95a1064c8cfa21","count":1,"testCases":[{"vars":{"output":"Hello world","tags":"foo, bar"},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]},{"vars":{"output":"Greetings, planet","tags":"foo, bar"},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]},{"vars":{"output":"Salutations, Earth","tags":"foo, bar"},"assert":[{"type":"icontains","value":"hello"},{"type":"javascript","value":"1 / (output.length + 1)"},{"type":"model-graded-closedqa","value":"ensure that the output contains a greeting"}]}],"recentEvalDate":"2024-03-22T19:44:13.597Z","recentEvalId":"eval-2024-03-22T19:44:13","prompts":[{"id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","prompt":{"raw":"{{output}}","display":"{{output}}","id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","provider":"echo","metrics":{"score":1.3971734892787524,"testPassCount":1,"testFailCount":2,"assertPassCount":7,"assertFailCount":2,"totalLatencyMs":5,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T19:44:13"}]},{"id":"f76f0be5e2f2d66234ec747945bc9984c590415a0c9bf9b15fbf60e582d6f57d","count":1,"testCases":[{"vars":{"language":"French","input":"Hello world"}},{"vars":{"language":"Spanish","input":"Where is the library?"}}],"recentEvalDate":"2024-03-22T20:08:40.637Z","recentEvalId":"eval-2024-03-22T20:08:40","prompts":[{"id":"982e2daba2e45daa63007e0713738f8cb47c07a372becd90661ec209f021cc76","prompt":{"raw":"Convert this English to {{language}}: {{input}}","display":"Convert this English to {{language}}: {{input}}","id":"982e2daba2e45daa63007e0713738f8cb47c07a372becd90661ec209f021cc76","provider":"bedrock:completion:anthropic.claude-instant-v1","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":787,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T20:08:40"},{"id":"1cd6d24599bd30b06147480d8c35ca4b3ffb0c41385df8f0624099444fb68ae8","prompt":{"raw":"Translate to {{language}}: {{input}}","display":"Translate to {{language}}: {{input}}","id":"1cd6d24599bd30b06147480d8c35ca4b3ffb0c41385df8f0624099444fb68ae8","provider":"bedrock:completion:anthropic.claude-instant-v1","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":782,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T20:08:40"}]},{"id":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","count":8,"testCases":[{"vars":{"topic":"Our eco-friendly packaging"}},{"vars":{"topic":"A sneak peek at our secret menu item"}},{"vars":{"topic":"Behind-the-scenes at our latest photoshoot"}},{"vars":{"topic":"the impact of autonomous drones on wildlife conservation"}},{"vars":{"topic":"the emerging trend of virtual reality courtrooms"}},{"vars":{"topic":"the ethical implications of AI-generated art"}},{"vars":{"topic":"the unexpected health benefits of daily meditation"}},{"vars":{"topic":"how AI is changing the way we play board games"}},{"vars":{"topic":"unconventional productivity hacks involving household items"}},{"vars":{"topic":"An underground art exhibition in an abandoned subway station"}},{"vars":{"topic":"A webinar on the impact of AI on traditional marketing strategies"}},{"vars":{"topic":"The launch of a new eco-friendly sneaker made from ocean plastic"}},{"vars":{"topic":"the correlation between social media usage and self-esteem in teenagers"}},{"vars":{"topic":"the impact of urban noise pollution on migratory bird patterns"}},{"vars":{"topic":"the role of gut microbiota in moderating anxiety and depression"}}],"recentEvalDate":"2024-03-22T20:16:51.847Z","recentEvalId":"eval-2024-03-22T20:11:29","prompts":[{"id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","prompt":{"raw":"Write a tweet about {{topic}}","display":"Write a tweet about {{topic}}","id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","provider":"bedrock:completion:anthropic.claude-instant-v1","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2828,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T20:11:29"}]},{"id":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","count":17,"testCases":[{"vars":{"topic":"Our eco-friendly packaging"}}],"recentEvalDate":"2024-03-22T20:48:02.587Z","recentEvalId":"eval-2024-03-22T20:29:23","prompts":[{"id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","prompt":{"raw":"Write a tweet about {{topic}}","display":"Write a tweet about {{topic}}","id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","provider":"bedrock:completion:anthropic.claude-instant-v1","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":305,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-22T20:29:23"}]},{"id":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","count":5,"testCases":[{"vars":{"topic":"the weather"}},{"vars":{"topic":"bob dylan"}},{"vars":{"topic":"the Roman Empire"}},{"vars":{"topic":"file://./topic.txt"}}],"recentEvalDate":"2024-03-22T23:59:05.988Z","recentEvalId":"eval-2024-03-22T23:55:00","prompts":[{"id":"6d5d84ed58b6b318281b920268b95e46484e5cc2ea15f83b934934506aa3e112","prompt":{"raw":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You're an angry pirate. Be concise and stay in character.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","display":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You're an angry pirate. Be concise and stay in character.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","id":"6d5d84ed58b6b318281b920268b95e46484e5cc2ea15f83b934934506aa3e112","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8676,"tokenUsage":{"total":458,"prompt":121,"completion":337,"cached":0},"namedScores":{},"cost":0.000566}},"evalId":"eval-2024-03-22T23:55:00"},{"id":"488f98e20c64231cfa1fa75aa3453961fae63164b6684ecd24cb997fe8142645","prompt":{"raw":"async function ({ vars }) {\n return `Imagine you're an angry pirate. Be concise and stay in character. Tell me about ${vars.topic}`;\n}","display":"async function ({ vars }) {\n return `Imagine you're an angry pirate. Be concise and stay in character. Tell me about ${vars.topic}`;\n}","id":"488f98e20c64231cfa1fa75aa3453961fae63164b6684ecd24cb997fe8142645","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":12002,"tokenUsage":{"total":606,"prompt":106,"completion":500,"cached":0},"namedScores":{},"cost":0.000803}},"evalId":"eval-2024-03-22T23:55:00"},{"id":"e85d1e5589dfd8831379af0660d6d0bf99f81671308c9c74e2a46150227842cc","prompt":{"raw":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You do not answer questions. You only make wolf noises.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","display":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You do not answer questions. You only make wolf noises.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","id":"e85d1e5589dfd8831379af0660d6d0bf99f81671308c9c74e2a46150227842cc","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2688,"tokenUsage":{"total":133,"prompt":86,"completion":14,"cached":33},"namedScores":{},"cost":0.00008549999999999999}},"evalId":"eval-2024-03-22T23:55:00"},{"id":"51519e8874147b854779b876a95e19f6440037da60dbd77c58ee292c9e4d2f84","prompt":{"raw":"import sys\nimport json\n\ndef prompt1(context):\n return f'Write a one-sentence insult for anyone who likes {context[\"vars\"][\"topic\"]}.'\n\ndef generate_prompt(context):\n return f'Describe {context[\"vars\"][\"topic\"]} concisely, comparing it to the Python programming language.'\n\nif __name__ == '__main__':\n print(generate_prompt(json.loads(sys.argv[1])))\n","display":"import sys\nimport json\n\ndef prompt1(context):\n return f'Write a one-sentence insult for anyone who likes {context[\"vars\"][\"topic\"]}.'\n\ndef generate_prompt(context):\n return f'Describe {context[\"vars\"][\"topic\"]} concisely, comparing it to the Python programming language.'\n\nif __name__ == '__main__':\n print(generate_prompt(json.loads(sys.argv[1])))\n","id":"51519e8874147b854779b876a95e19f6440037da60dbd77c58ee292c9e4d2f84","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":4832,"tokenUsage":{"total":281,"prompt":68,"completion":167,"cached":46},"namedScores":{},"cost":0.0003315}},"evalId":"eval-2024-03-22T23:55:00"}]},{"id":"7a2cb71d85f7e4a785153cbf17c12e19a569915f85024f10e8d112b1ea1b2ea1","count":1,"testCases":[{"vars":{"topic":"the weather"}},{"vars":{"topic":"bob dylan"}},{"vars":{"topic":"the Roman Empire"}},{"vars":{"topic":"file://./another_topic.txt"}}],"recentEvalDate":"2024-03-23T00:00:21.342Z","recentEvalId":"eval-2024-03-23T00:00:21","prompts":[{"id":"6d5d84ed58b6b318281b920268b95e46484e5cc2ea15f83b934934506aa3e112","prompt":{"raw":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You're an angry pirate. Be concise and stay in character.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","display":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You're an angry pirate. Be concise and stay in character.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","id":"6d5d84ed58b6b318281b920268b95e46484e5cc2ea15f83b934934506aa3e112","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":7339,"tokenUsage":{"total":456,"prompt":119,"completion":337,"cached":0},"namedScores":{},"cost":0.000565}},"evalId":"eval-2024-03-23T00:00:21"},{"id":"488f98e20c64231cfa1fa75aa3453961fae63164b6684ecd24cb997fe8142645","prompt":{"raw":"async function ({ vars }) {\n return `Imagine you're an angry pirate. Be concise and stay in character. Tell me about ${vars.topic}`;\n}","display":"async function ({ vars }) {\n return `Imagine you're an angry pirate. Be concise and stay in character. Tell me about ${vars.topic}`;\n}","id":"488f98e20c64231cfa1fa75aa3453961fae63164b6684ecd24cb997fe8142645","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":11872,"tokenUsage":{"total":612,"prompt":107,"completion":505,"cached":0},"namedScores":{},"cost":0.000811}},"evalId":"eval-2024-03-23T00:00:21"},{"id":"e85d1e5589dfd8831379af0660d6d0bf99f81671308c9c74e2a46150227842cc","prompt":{"raw":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You do not answer questions. You only make wolf noises.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","display":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You do not answer questions. You only make wolf noises.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","id":"e85d1e5589dfd8831379af0660d6d0bf99f81671308c9c74e2a46150227842cc","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2935,"tokenUsage":{"total":135,"prompt":115,"completion":20,"cached":0},"namedScores":{},"cost":0.00008749999999999999}},"evalId":"eval-2024-03-23T00:00:21"},{"id":"51519e8874147b854779b876a95e19f6440037da60dbd77c58ee292c9e4d2f84","prompt":{"raw":"import sys\nimport json\n\ndef prompt1(context):\n return f'Write a one-sentence insult for anyone who likes {context[\"vars\"][\"topic\"]}.'\n\ndef generate_prompt(context):\n return f'Describe {context[\"vars\"][\"topic\"]} concisely, comparing it to the Python programming language.'\n\nif __name__ == '__main__':\n print(generate_prompt(json.loads(sys.argv[1])))\n","display":"import sys\nimport json\n\ndef prompt1(context):\n return f'Write a one-sentence insult for anyone who likes {context[\"vars\"][\"topic\"]}.'\n\ndef generate_prompt(context):\n return f'Describe {context[\"vars\"][\"topic\"]} concisely, comparing it to the Python programming language.'\n\nif __name__ == '__main__':\n print(generate_prompt(json.loads(sys.argv[1])))\n","id":"51519e8874147b854779b876a95e19f6440037da60dbd77c58ee292c9e4d2f84","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":6173,"tokenUsage":{"total":288,"prompt":91,"completion":197,"cached":0},"namedScores":{},"cost":0.000341}},"evalId":"eval-2024-03-23T00:00:21"}]},{"id":"fd77de338af61b19edc0ce79be47635f14edd4bb3ff9a10c012e93612aaff3e2","count":1,"testCases":[{"vars":{"language":"French","body":"Hello world"}},{"vars":{"language":"French","body":"I'm hungry"}}],"recentEvalDate":"2024-03-25T13:00:19.393Z","recentEvalId":"eval-2024-03-25T13:00:19","prompts":[{"id":"ff529e7dc5641b7c96178c86718adfda3e5a96c8d5991ff07ef90577dc210ab6","prompt":{"raw":"Rephrase this in {{language}}: {{body | allcaps}}","display":"Rephrase this in {{language}}: {{body | allcaps}}","id":"ff529e7dc5641b7c96178c86718adfda3e5a96c8d5991ff07ef90577dc210ab6","provider":"openai:gpt-3.5-turbo","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":720,"tokenUsage":{"total":47,"prompt":35,"completion":12,"cached":0},"namedScores":{},"cost":0.0000355}},"evalId":"eval-2024-03-25T13:00:19"},{"id":"bf842b0e19dc5aa76fa49be8449fbf76d5c055f758008ebbc448ed8e6a7a4943","prompt":{"raw":"Translate this to conversational {{language}}: {{body | allcaps}}","display":"Translate this to conversational {{language}}: {{body | allcaps}}","id":"bf842b0e19dc5aa76fa49be8449fbf76d5c055f758008ebbc448ed8e6a7a4943","provider":"openai:gpt-3.5-turbo","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2145,"tokenUsage":{"total":50,"prompt":37,"completion":13,"cached":0},"namedScores":{},"cost":0.000038}},"evalId":"eval-2024-03-25T13:00:19"}]},{"id":"a2df04f077e9acdec3e4dab07c8cc870f5a038838e595610f8ea561ec04c4318","count":2,"testCases":[{"vars":{"topic":"monkeys"}},{"vars":{"topic":"bananas"},"assert":[{"type":"similar","value":"Bananas are naturally radioactive.","provider":{"id":"azureopenai:embeddings:ada-deployment1","config":{"apiHost":"aliothopenai.openai.azure.com"}}}]}],"recentEvalDate":"2024-03-25T13:11:17.083Z","recentEvalId":"eval-2024-03-25T13:11:10","prompts":[{"id":"60ddfbfcc7c5d5cc068c1f3e5316f9f6cef884aed326d98f4b5181a8d8de1e40","prompt":{"raw":"Generate one very interesting fact about {{topic}}","display":"Generate one very interesting fact about {{topic}}","id":"60ddfbfcc7c5d5cc068c1f3e5316f9f6cef884aed326d98f4b5181a8d8de1e40","provider":"azureopenai:chat:gpt-35-turbo-deployment1","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-25T13:11:10"}]},{"id":"f220e2e4353e25bc8cc30c5865fcca3e57a756492c6ec87da2a8da39a1bba774","count":1,"testCases":"vars.csv","recentEvalDate":"2024-03-26T14:15:17.488Z","recentEvalId":"eval-2024-03-26T14:15:17","prompts":[{"id":"a67a87b900d7de9c3dd19633b800ebe86b568db68f28d879d0b8a4e41a05e3cc","prompt":{"raw":"Rephrase this in French: {{body}}","display":"Rephrase this in French: {{body}}","id":"a67a87b900d7de9c3dd19633b800ebe86b568db68f28d879d0b8a4e41a05e3cc","provider":"customProvider.js","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1066,"tokenUsage":{"total":39,"prompt":31,"completion":8,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-26T14:15:17"},{"id":"80aaed6946eaa32040a7fb382921bfc6d2eb5abffbaea8395c194ad2bbad6094","prompt":{"raw":"Rephrase this like a pirate: {{body}}","display":"Rephrase this like a pirate: {{body}}","id":"80aaed6946eaa32040a7fb382921bfc6d2eb5abffbaea8395c194ad2bbad6094","provider":"customProvider.js","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1182,"tokenUsage":{"total":53,"prompt":33,"completion":20,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-26T14:15:17"}]},{"id":"5fdf2d34df8100f9ee35a16711575a490c63d15438a03df15f2dfda1802aeda1","count":1,"testCases":[{"vars":{"body":"Hello world"},"assert":[],"options":{},"description":"Row #1"},{"vars":{"body":"I'm hungry"},"assert":[],"options":{},"description":"Row #2"}],"recentEvalDate":"2024-03-26T17:44:59.979Z","recentEvalId":"eval-2024-03-26T17:44:59","prompts":[{"id":"a67a87b900d7de9c3dd19633b800ebe86b568db68f28d879d0b8a4e41a05e3cc","prompt":{"raw":"Rephrase this in French: {{body}}","display":"Rephrase this in French: {{body}}","id":"a67a87b900d7de9c3dd19633b800ebe86b568db68f28d879d0b8a4e41a05e3cc","provider":"custom provider","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1076,"tokenUsage":{"total":39,"prompt":31,"completion":8,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-26T17:44:59"},{"id":"80aaed6946eaa32040a7fb382921bfc6d2eb5abffbaea8395c194ad2bbad6094","prompt":{"raw":"Rephrase this like a pirate: {{body}}","display":"Rephrase this like a pirate: {{body}}","id":"80aaed6946eaa32040a7fb382921bfc6d2eb5abffbaea8395c194ad2bbad6094","provider":"custom provider","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":940,"tokenUsage":{"total":51,"prompt":33,"completion":18,"cached":0},"namedScores":{},"cost":0}},"evalId":"eval-2024-03-26T17:44:59"}]},{"id":"60947f9048ba276b6ada9ff1a73f818ff1374ad15840a75594f6b8e7168fe904","count":3,"testCases":[{"vars":{"message":"Hello world"},"assert":[{"type":"similar","threshold":0.25,"value":"Greetings earth","provider":"./customProvider.js"}]}],"recentEvalDate":"2024-03-26T19:39:39.037Z","recentEvalId":"eval-2024-03-26T19:39:27","prompts":[{"id":"ca14273ccab7b5eddd64a32cf93a17a6b9c2e81fcd3a61388ecf69d557d27997","prompt":{"raw":"Translate this to French: {{message}}","display":"Translate this to French: {{message}}","id":"ca14273ccab7b5eddd64a32cf93a17a6b9c2e81fcd3a61388ecf69d557d27997","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.4015448202047296,"testPassCount":1,"testFailCount":0,"assertPassCount":1,"assertFailCount":0,"totalLatencyMs":396,"tokenUsage":{"total":17,"prompt":14,"completion":3,"cached":0},"namedScores":{},"cost":0.0000115}},"evalId":"eval-2024-03-26T19:39:27"}]},{"id":"7060ea5fe58d69ef89cc65663a0c4bddf6cb7d42fbdba49e330a1ec928febde1","count":5,"testCases":[{"vars":{"question":"What is the parental leave policy?","context":"file://./load_context.py"}}],"recentEvalDate":"2024-03-26T20:51:01.412Z","recentEvalId":"eval-2024-03-26T20:49:19","prompts":[{"id":"a868045dcd584f8841c0db5ea8395aabee75965420d6fb87f9f3d53fcd35d311","prompt":{"raw":"You are an office assistant. Using this context:\n\n<context>{{context}}</context>\n\nAnswer this question:\n\n<question>{{question}}</question>","display":"You are an office assistant. Using this context:\n\n<context>{{context}}</context>\n\nAnswer this question:\n\n<question>{{question}}</question>","id":"a868045dcd584f8841c0db5ea8395aabee75965420d6fb87f9f3d53fcd35d311","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2304,"tokenUsage":{"total":172,"prompt":47,"completion":125,"cached":0},"namedScores":{},"cost":0.000211}},"evalId":"eval-2024-03-26T20:49:19"}]}]}
|
|
1
|
+
{"error":"Not implemented"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"error":"Not implemented"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"data":[{"count":1,"id":"8ab7905517748b35f9944d15c05b9677c8f16719ba5f34c46fc951c66f694dbc","prompt":{"raw":"You're an ecommerce chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","display":"You're an ecommerce chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","id":"8ab7905517748b35f9944d15c05b9677c8f16719ba5f34c46fc951c66f694dbc","provider":"openai:gpt-3.5-turbo","metrics":{"score":10,"testPassCount":10,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":25,"tokenUsage":{"total":961,"prompt":0,"completion":0,"cached":961},"namedScores":{},"cost":0.0010515}},"recentEvalDate":"2024-03-16T22:43:14.095Z","recentEvalId":"eval-2024-03-16T22:43:14","evals":[{"id":"eval-2024-03-16T22:43:14","datasetId":"b94facc99edd1198f2e1c0f5f880601fbcdb90c433ddfa8b0efef3e47e99f6e4","metrics":{"score":10,"testPassCount":10,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":25,"tokenUsage":{"total":961,"prompt":0,"completion":0,"cached":961},"namedScores":{},"cost":0.0010515}}]},{"count":1,"id":"ca95eca9bd1c96f1230b7895aea27ee4b3bc59b576af1f6e85ae8996ce57ca4d","prompt":{"raw":"You're a smart, bubbly chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","display":"You're a smart, bubbly chat assistant for a shoe company.\nAnswer this user's question: {{name}}: \"{{question}}\"","id":"ca95eca9bd1c96f1230b7895aea27ee4b3bc59b576af1f6e85ae8996ce57ca4d","provider":"openai:gpt-3.5-turbo","metrics":{"score":10,"testPassCount":10,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":19,"tokenUsage":{"total":1057,"prompt":0,"completion":0,"cached":1057},"namedScores":{},"cost":0.0011655}},"recentEvalDate":"2024-03-16T22:43:14.095Z","recentEvalId":"eval-2024-03-16T22:43:14","evals":[{"id":"eval-2024-03-16T22:43:14","datasetId":"b94facc99edd1198f2e1c0f5f880601fbcdb90c433ddfa8b0efef3e47e99f6e4","metrics":{"score":10,"testPassCount":10,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":19,"tokenUsage":{"total":1057,"prompt":0,"completion":0,"cached":1057},"namedScores":{},"cost":0.0011655}}]},{"count":1,"id":"ffc67b22cafd59f77984ef46882cb609f133a746c95e52815ba4d03552b66b55","prompt":{"raw":"Output a JSON object that contains the keys `color` and `location`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `location`, describing the following object: {{item}}","id":"ffc67b22cafd59f77984ef46882cb609f133a746c95e52815ba4d03552b66b55","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":734,"tokenUsage":{"total":46,"prompt":28,"completion":18,"cached":0},"namedScores":{},"cost":0.000041}},"recentEvalDate":"2024-03-17T16:13:02.163Z","recentEvalId":"eval-2024-03-17T16:13:02","evals":[{"id":"eval-2024-03-17T16:13:02","datasetId":"3a4469b21f1ebc825af2dfa2e90eef1f1cbe887647fe5a5595752f8ab467cb53","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":734,"tokenUsage":{"total":46,"prompt":28,"completion":18,"cached":0},"namedScores":{},"cost":0.000041}}]},{"count":1,"id":"726b1f21862e1e60c0bfd6fccf86e6c2a0f059f5a05d21b1ae957fd58236546e","prompt":{"raw":"Output a JSON object that contains the keys `color` and `country`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `country`, describing the following object: {{item}}","id":"726b1f21862e1e60c0bfd6fccf86e6c2a0f059f5a05d21b1ae957fd58236546e","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":801,"tokenUsage":{"total":46,"prompt":28,"completion":18,"cached":0},"namedScores":{},"cost":0.000041}},"recentEvalDate":"2024-03-17T16:13:16.198Z","recentEvalId":"eval-2024-03-17T16:13:16","evals":[{"id":"eval-2024-03-17T16:13:16","datasetId":"5f0e1c1f9537421c928d7254f62621147be5f1309fc69ef8fbfce2d8947141d7","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":801,"tokenUsage":{"total":46,"prompt":28,"completion":18,"cached":0},"namedScores":{},"cost":0.000041}}]},{"count":8,"id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","prompt":{"raw":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","display":"Output a JSON object that contains the keys `color` and `countries`, describing the following object: {{item}}","id":"fed637b2ffbc3c74dd67d978b4edf83055a122dbc51babc4a239c591f77752e3","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":801,"tokenUsage":{"total":55,"prompt":28,"completion":27,"cached":0},"namedScores":{},"cost":0.0000545}},"recentEvalDate":"2024-03-17T16:20:45.953Z","recentEvalId":"eval-2024-03-17T16:13:42","evals":[{"id":"eval-2024-03-17T16:13:42","datasetId":"69f1aef3cb6d0ed8e2e04309281cab002e18794bece04c15c7bb767353669a2f","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":801,"tokenUsage":{"total":55,"prompt":28,"completion":27,"cached":0},"namedScores":{},"cost":0.0000545}},{"id":"eval-2024-03-17T16:14:37","datasetId":"8ccb8b12c5decb58c1c49b9b7c69fb9909104e80d65e252d18d8842726885a51","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":2,"tokenUsage":{"total":55,"prompt":0,"completion":0,"cached":55},"namedScores":{},"cost":0.0000545}},{"id":"eval-2024-03-17T16:14:50","datasetId":"267be9d91be410004320457f7c3cffea2bdb40212b43336f09faa6b3a6bcf288","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":2,"assertFailCount":0,"totalLatencyMs":3,"tokenUsage":{"total":55,"prompt":0,"completion":0,"cached":55},"namedScores":{},"cost":0.0000545}},{"id":"eval-2024-03-17T16:14:58","datasetId":"740ba3562af04218834915872cb95bafc21424244d994df82a26fc52d7f963bf","metrics":{"score":0.5,"testPassCount":0,"testFailCount":1,"assertPassCount":1,"assertFailCount":1,"totalLatencyMs":3,"tokenUsage":{"total":55,"prompt":0,"completion":0,"cached":55},"namedScores":{},"cost":0.0000545}},{"id":"eval-2024-03-17T16:18:57","datasetId":"5120354632aa75999e26385ca8dccbe37cd8090b57e651250dc1b818482689bd","metrics":{"score":1.5,"testPassCount":1,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":916,"tokenUsage":{"total":114,"prompt":29,"completion":30,"cached":55},"namedScores":{},"cost":0.000114}},{"id":"eval-2024-03-17T16:19:24","datasetId":"f01d8461edb59e78b4c9585e29f3d97128312ff1afe1e102c84a848b6ef8e6cf","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":4,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":114,"prompt":0,"completion":0,"cached":114},"namedScores":{},"cost":0.000114}},{"id":"eval-2024-03-17T16:19:49","datasetId":"6adf3df5c8d39f806bd74fc8358a43748de4d553058a11492c2a0f86cd3c2343","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":5,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":114,"prompt":0,"completion":0,"cached":114},"namedScores":{},"cost":0.000114}},{"id":"eval-2024-03-17T16:20:45","datasetId":"6adf3df5c8d39f806bd74fc8358a43748de4d553058a11492c2a0f86cd3c2343","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":5,"assertFailCount":0,"totalLatencyMs":5,"tokenUsage":{"total":114,"prompt":0,"completion":0,"cached":114},"namedScores":{},"cost":0.000114}}]},{"count":2,"id":"becdaf0693b6fd6c31874ed94e869e1f1873195bc9c1d55c725e19a19ce322b0","prompt":{"raw":"[\n {\n \"role\": \"user\",\n \"content\": \"{{message}}\"\n }\n]\n","display":"chat_prompt","id":"becdaf0693b6fd6c31874ed94e869e1f1873195bc9c1d55c725e19a19ce322b0","provider":"openai-gpt-3.5-turbo-lowtemp","metrics":{"score":7,"testPassCount":7,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":15397,"tokenUsage":{"total":784,"prompt":140,"completion":644,"cached":0},"namedScores":{},"cost":0.001036}},"recentEvalDate":"2024-03-17T16:44:08.392Z","recentEvalId":"eval-2024-03-17T16:44:08","evals":[{"id":"eval-2024-03-17T16:44:08","datasetId":"e4fb5519d3de935fd0654ef637518bf12f598e9439957879b10c3ed4f4a471f9","metrics":{"score":7,"testPassCount":7,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":15397,"tokenUsage":{"total":784,"prompt":140,"completion":644,"cached":0},"namedScores":{},"cost":0.001036}},{"id":"eval-2024-03-17T16:44:08","datasetId":"e4fb5519d3de935fd0654ef637518bf12f598e9439957879b10c3ed4f4a471f9","metrics":{"score":6,"testPassCount":6,"testFailCount":2,"assertPassCount":2,"assertFailCount":2,"totalLatencyMs":14637,"tokenUsage":{"total":798,"prompt":140,"completion":658,"cached":0},"namedScores":{},"cost":0.001057}}]},{"count":1,"id":"4a112e4f3fabf536c3e9e59d0f740373a27afa4900bb7f8797fe9e196e807ccf","prompt":{"raw":"User: {{message}}\nAssistant:\n","display":"completion_prompt","id":"4a112e4f3fabf536c3e9e59d0f740373a27afa4900bb7f8797fe9e196e807ccf","provider":"llama70b-v2-chat","metrics":{"score":4,"testPassCount":4,"testFailCount":4,"assertPassCount":0,"assertFailCount":4,"totalLatencyMs":181485,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-17T16:44:08.392Z","recentEvalId":"eval-2024-03-17T16:44:08","evals":[{"id":"eval-2024-03-17T16:44:08","datasetId":"e4fb5519d3de935fd0654ef637518bf12f598e9439957879b10c3ed4f4a471f9","metrics":{"score":4,"testPassCount":4,"testFailCount":4,"assertPassCount":0,"assertFailCount":4,"totalLatencyMs":181485,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":6,"id":"a16d4ef27b2a481dd4458b07c50a3ac995454e0096f69580ed3d83ca4def05ed","prompt":{"raw":"Respond to the following instruction: {{message}}","display":"Respond to the following instruction: {{message}}","id":"a16d4ef27b2a481dd4458b07c50a3ac995454e0096f69580ed3d83ca4def05ed","provider":"openai-gpt-3.5-turbo-lowtemp","metrics":{"score":10,"testPassCount":10,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":25,"tokenUsage":{"total":1952,"prompt":0,"completion":0,"cached":1952},"namedScores":{},"cost":0.002676}},"recentEvalDate":"2024-03-17T16:50:47.503Z","recentEvalId":"eval-2024-03-17T16:45:58","evals":[{"id":"eval-2024-03-17T16:45:58","datasetId":"fa1afa682106fd06807468fb8b3098617288b2ab8dab40db0f38f7ba89f0e143","metrics":{"score":10,"testPassCount":10,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":25,"tokenUsage":{"total":1952,"prompt":0,"completion":0,"cached":1952},"namedScores":{},"cost":0.002676}},{"id":"eval-2024-03-17T16:45:58","datasetId":"fa1afa682106fd06807468fb8b3098617288b2ab8dab40db0f38f7ba89f0e143","metrics":{"score":9,"testPassCount":9,"testFailCount":2,"assertPassCount":2,"assertFailCount":2,"totalLatencyMs":26,"tokenUsage":{"total":2375,"prompt":0,"completion":0,"cached":2375},"namedScores":{},"cost":0.0033105}},{"id":"eval-2024-03-17T16:46:21","datasetId":"fa1afa682106fd06807468fb8b3098617288b2ab8dab40db0f38f7ba89f0e143","metrics":{"score":10,"testPassCount":10,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":31,"tokenUsage":{"total":1952,"prompt":0,"completion":0,"cached":1952},"namedScores":{},"cost":0.002676}},{"id":"eval-2024-03-17T16:46:21","datasetId":"fa1afa682106fd06807468fb8b3098617288b2ab8dab40db0f38f7ba89f0e143","metrics":{"score":9,"testPassCount":9,"testFailCount":2,"assertPassCount":2,"assertFailCount":2,"totalLatencyMs":32,"tokenUsage":{"total":2375,"prompt":0,"completion":0,"cached":2375},"namedScores":{},"cost":0.0033105}},{"id":"eval-2024-03-17T16:50:47","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":10,"testPassCount":10,"testFailCount":1,"assertPassCount":3,"assertFailCount":1,"totalLatencyMs":26,"tokenUsage":{"total":1952,"prompt":0,"completion":0,"cached":1952},"namedScores":{},"cost":0.002676}},{"id":"eval-2024-03-17T16:50:47","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":9,"testPassCount":9,"testFailCount":2,"assertPassCount":2,"assertFailCount":2,"totalLatencyMs":27,"tokenUsage":{"total":2375,"prompt":0,"completion":0,"cached":2375},"namedScores":{},"cost":0.0033105}}]},{"count":18,"id":"621a88dd7b3e65c0ff8be5c49a61d31704b46e0d0e967c305fb1b5e82144c2bf","prompt":{"raw":"You are a helpful assistant. Reply with a concise answer to this inquiry: '{{question}}'","display":"You are a helpful assistant. Reply with a concise answer to this inquiry: '{{question}}'","id":"621a88dd7b3e65c0ff8be5c49a61d31704b46e0d0e967c305fb1b5e82144c2bf","provider":"vertex:gemini-pro","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":10805,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-26T19:55:56.615Z","recentEvalId":"eval-2024-03-17T16:59:26","evals":[{"id":"eval-2024-03-17T16:59:26","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":10805,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-17T16:59:26","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":10458,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T17:07:29","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":87301,"tokenUsage":{"total":1592,"prompt":1299,"completion":293,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T17:07:29","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":11476,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T17:15:59","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":90,"tokenUsage":{"total":1592,"prompt":1299,"completion":293,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T17:15:59","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":34365,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:28:54","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":58476,"tokenUsage":{"total":1543,"prompt":1299,"completion":244,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:28:54","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":53458,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:29:39","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":109,"tokenUsage":{"total":1543,"prompt":1299,"completion":244,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:29:39","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":105,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:31:58","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":130,"tokenUsage":{"total":1543,"prompt":1299,"completion":244,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:31:58","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":125,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:34:12","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":122,"tokenUsage":{"total":1543,"prompt":1299,"completion":244,"cached":1543},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:34:12","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":126,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T19:39:11","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":140,"tokenUsage":{"total":1543,"prompt":1299,"completion":244,"cached":1543},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T19:39:11","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":129,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T19:55:56","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":114,"tokenUsage":{"total":1543,"prompt":1299,"completion":244,"cached":1543},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T19:55:56","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":36,"testPassCount":36,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":115,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":1,"id":"ac1aff107414ae39d0e1927ff6bc534210a8cb49ecfe8b92324d0665965f7b28","prompt":{"raw":"Say this as though you are a pirate: {{body}}","display":"Say this as though you are a pirate: {{body}}","id":"ac1aff107414ae39d0e1927ff6bc534210a8cb49ecfe8b92324d0665965f7b28","provider":"openai:gpt-3.5-turbo","metrics":{"score":1.6255363200489734,"testPassCount":1,"testFailCount":4,"assertPassCount":3,"assertFailCount":4,"totalLatencyMs":7148,"tokenUsage":{"total":464,"prompt":103,"completion":361,"cached":0},"namedScores":{"Tone":1.8766089601469207,"Consistency":0},"cost":0.000593}},"recentEvalDate":"2024-03-17T17:02:39.867Z","recentEvalId":"eval-2024-03-17T17:02:39","evals":[{"id":"eval-2024-03-17T17:02:39","datasetId":"1017b6c475f430ad3e152b001af93861211d94dde122791c59638a7511790990","metrics":{"score":1.6255363200489734,"testPassCount":1,"testFailCount":4,"assertPassCount":3,"assertFailCount":4,"totalLatencyMs":7148,"tokenUsage":{"total":464,"prompt":103,"completion":361,"cached":0},"namedScores":{"Tone":1.8766089601469207,"Consistency":0},"cost":0.000593}}]},{"count":1,"id":"012226aceef4f02a4b1eb26cbbd0819db6ad6ed541f3ed5b59c7f92761ae1b4f","prompt":{"raw":"Say this as though you are a seafarer from the 17th century: {{body}}","display":"Say this as though you are a seafarer from the 17th century: {{body}}","id":"012226aceef4f02a4b1eb26cbbd0819db6ad6ed541f3ed5b59c7f92761ae1b4f","provider":"openai:gpt-3.5-turbo","metrics":{"score":3.645861906333771,"testPassCount":3,"testFailCount":2,"assertPassCount":5,"assertFailCount":2,"totalLatencyMs":8346,"tokenUsage":{"total":519,"prompt":143,"completion":376,"cached":0},"namedScores":{"Tone":2.9375857190013135,"Consistency":1},"cost":0.0006355000000000001}},"recentEvalDate":"2024-03-17T17:02:39.867Z","recentEvalId":"eval-2024-03-17T17:02:39","evals":[{"id":"eval-2024-03-17T17:02:39","datasetId":"1017b6c475f430ad3e152b001af93861211d94dde122791c59638a7511790990","metrics":{"score":3.645861906333771,"testPassCount":3,"testFailCount":2,"assertPassCount":5,"assertFailCount":2,"totalLatencyMs":8346,"tokenUsage":{"total":519,"prompt":143,"completion":376,"cached":0},"namedScores":{"Tone":2.9375857190013135,"Consistency":1},"cost":0.0006355000000000001}}]},{"count":6,"id":"1b485883ec47e69c98372a768b270faf8897253cd139ada2599781992125f5fd","prompt":{"raw":"Write a tweet about bananas","display":"Write a tweet about bananas","id":"1b485883ec47e69c98372a768b270faf8897253cd139ada2599781992125f5fd","provider":"openai:chat:gpt-3.5-turbo","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":608,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-21T14:18:42.236Z","recentEvalId":"eval-2024-03-21T14:14:27","evals":[{"id":"eval-2024-03-21T14:14:27","datasetId":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":608,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-21T14:14:36","datasetId":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":622,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-21T14:16:38","datasetId":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":646,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-21T14:16:47","datasetId":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":465,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-21T14:17:38","datasetId":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":449,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-21T14:18:42","datasetId":"7365d8b30a460b5858a08ff088cddf05dc6a29b12fe74e1fb44bc7deb70a46bd","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":292,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":92,"id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","prompt":{"raw":"Write a tweet about {{topic}}","display":"Write a tweet about {{topic}}","id":"add16627d8dbb348b8b3ac175c8b96107d26a4b08b5be0262962f8ec5b18ec9e","provider":"azureopenai:asst_E4GyOBYKlnAzMi19SZF2Sn8I","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-25T13:07:54.922Z","recentEvalId":"eval-2024-03-22T04:31:27","evals":[{"id":"eval-2024-03-22T04:31:27","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:32:42","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:33:40","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:34:55","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:39:05","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2414,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:40:03","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:41:00","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:43:34","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:44:02","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:44:26","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:46:07","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:46:51","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:47:30","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T04:48:53","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":5183,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:29","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2828,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:29","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:29","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:54","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2829,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:54","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2875,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:54","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1022,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:13:12","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2423,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:13:12","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2409,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:13:12","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1924,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:13:45","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1783,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:13:45","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2722,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:13:45","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2214,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:15:00","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2282,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:15:00","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2353,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:15:00","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2073,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:15:24","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2528,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:15:24","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2126,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:15:24","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1832,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:16:11","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1857,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:16:11","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1799,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:16:11","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2763,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:16:51","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":321,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:16:51","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":204,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:16:51","datasetId":"6e4d8ac1d59bf64e9f27a58e9d31b3f68c5ac9678b98cd92da57dd1caa652f0f","metrics":{"score":0,"testPassCount":0,"testFailCount":15,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":204,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:29:23","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":305,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:29:23","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":303,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:29:23","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":318,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:29:31","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":387,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:29:31","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":318,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:29:31","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":300,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:30:59","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":359,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:30:59","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":295,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:30:59","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":361,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:31:43","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":716,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:31:43","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":760,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:31:43","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":793,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:31:50","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":722,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:31:50","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":805,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:31:50","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":806,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:32:35","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":525,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:32:35","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":834,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:32:35","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":763,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:34:56","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":721,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:34:56","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":775,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:34:56","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":753,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:35:22","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":743,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:35:22","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":735,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:35:22","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":510,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:36:01","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":708,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:36:01","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":693,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:36:01","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":759,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:41:36","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":726,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:41:36","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":745,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:41:36","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":535,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:42:54","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":509,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:42:54","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":717,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:42:54","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":745,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:43:54","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":734,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:43:54","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":718,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:43:54","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":739,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:20","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":717,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:20","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":764,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:20","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":733,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:26","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":509,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:26","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":706,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:26","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":738,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:52","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":474,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:52","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":455,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:44:52","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":737,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:45:53","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":535,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:45:53","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":753,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:45:53","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":745,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:48:02","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":304,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:48:02","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":318,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:48:02","datasetId":"3c51fc190a7f7eb2dcc5d16c5f91e121543ca85c2eadd590323276c8951c25a8","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":382,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-23T14:12:01","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1952,"tokenUsage":{"total":68,"prompt":12,"completion":56,"cached":0},"namedScores":{},"cost":0.00009}},{"id":"eval-2024-03-23T14:12:01","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":3912,"tokenUsage":{"total":89,"prompt":12,"completion":77,"cached":0},"namedScores":{},"cost":0.00243}},{"id":"eval-2024-03-25T13:07:54","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":0,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":3,"id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","prompt":{"raw":"{{output}}","display":"{{output}}","id":"343155ae77c81fd6d016484b639909bc6f4e68f7730061d1d2a8d7e707456d32","provider":"echo","metrics":{"score":1.3971734892787524,"testPassCount":1,"testFailCount":2,"assertPassCount":7,"assertFailCount":2,"totalLatencyMs":7,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-22T19:44:13.597Z","recentEvalId":"eval-2024-03-22T19:43:44","evals":[{"id":"eval-2024-03-22T19:43:44","datasetId":"f34954fbbe9e53ee4541ae7bd834851e724419383d1c5d69276084f36f52bc19","metrics":{"score":1.3971734892787524,"testPassCount":1,"testFailCount":2,"assertPassCount":7,"assertFailCount":2,"totalLatencyMs":7,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:43:48","datasetId":"08dd5c7b0ed259e207805468c2d7ec70b6598a2deba182e635849391df127b0d","metrics":{"score":2.7943469785575052,"testPassCount":2,"testFailCount":4,"assertPassCount":14,"assertFailCount":4,"totalLatencyMs":12,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T19:44:13","datasetId":"a174ea017053d7e4c2a7871b881c14aa718428cf7e6dace78d95a1064c8cfa21","metrics":{"score":1.3971734892787524,"testPassCount":1,"testFailCount":2,"assertPassCount":7,"assertFailCount":2,"totalLatencyMs":5,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":3,"id":"34af43a2fbe7bb87bbe84f882c70380de41bfd23a0a97363a509ce7cfcc043df","prompt":{"raw":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"","display":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"","id":"34af43a2fbe7bb87bbe84f882c70380de41bfd23a0a97363a509ce7cfcc043df","provider":"bedrock:anthropic.claude-v2","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9240,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-22T20:11:10.956Z","recentEvalId":"eval-2024-03-22T20:08:27","evals":[{"id":"eval-2024-03-22T20:08:27","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9240,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:09:44","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":7325,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:10","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":5401,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":3,"id":"b8f071e6116bba0a8a68fde9a9edffadd11cbd6a25b47a60c9ea204b2ec829f5","prompt":{"raw":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"\n\n- Think carefully & step-by-step.\n- Only use information available on Wikipedia.\n- You must answer the question directly, without speculation.\n- You cannot access realtime information. Consider whether the answer may have changed in the 2 years since your knowledge cutoff.\n- If you are not confident in your answer, begin your response with \"Unsure\".","display":"You are a helpful assistant. Reply with a concise answer to this inquiry: \"{{question}}\"\n\n- Think carefully & step-by-step.\n- Only use information available on Wikipedia.\n- You must answer the question directly, without speculation.\n- You cannot access realtime information. Consider whether the answer may have changed in the 2 years since your knowledge cutoff.\n- If you are not confident in your answer, begin your response with \"Unsure\".","id":"b8f071e6116bba0a8a68fde9a9edffadd11cbd6a25b47a60c9ea204b2ec829f5","provider":"bedrock:anthropic.claude-v2","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9147,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-22T20:11:10.956Z","recentEvalId":"eval-2024-03-22T20:08:27","evals":[{"id":"eval-2024-03-22T20:08:27","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9147,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:09:44","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":7849,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:11:10","datasetId":"dd62618f34b228148d09e4e493899b878c7aeffcf590b0882dd50382dcac6553","metrics":{"score":0,"testPassCount":0,"testFailCount":36,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":4699,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":2,"id":"982e2daba2e45daa63007e0713738f8cb47c07a372becd90661ec209f021cc76","prompt":{"raw":"Convert this English to {{language}}: {{input}}","display":"Convert this English to {{language}}: {{input}}","id":"982e2daba2e45daa63007e0713738f8cb47c07a372becd90661ec209f021cc76","provider":"bedrock:completion:anthropic.claude-instant-v1","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":787,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-22T20:08:40.637Z","recentEvalId":"eval-2024-03-22T20:08:40","evals":[{"id":"eval-2024-03-22T20:08:40","datasetId":"f76f0be5e2f2d66234ec747945bc9984c590415a0c9bf9b15fbf60e582d6f57d","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":787,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:08:40","datasetId":"f76f0be5e2f2d66234ec747945bc9984c590415a0c9bf9b15fbf60e582d6f57d","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":335,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":2,"id":"1cd6d24599bd30b06147480d8c35ca4b3ffb0c41385df8f0624099444fb68ae8","prompt":{"raw":"Translate to {{language}}: {{input}}","display":"Translate to {{language}}: {{input}}","id":"1cd6d24599bd30b06147480d8c35ca4b3ffb0c41385df8f0624099444fb68ae8","provider":"bedrock:completion:anthropic.claude-instant-v1","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":782,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-22T20:08:40.637Z","recentEvalId":"eval-2024-03-22T20:08:40","evals":[{"id":"eval-2024-03-22T20:08:40","datasetId":"f76f0be5e2f2d66234ec747945bc9984c590415a0c9bf9b15fbf60e582d6f57d","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":782,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-22T20:08:40","datasetId":"f76f0be5e2f2d66234ec747945bc9984c590415a0c9bf9b15fbf60e582d6f57d","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":343,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":6,"id":"6d5d84ed58b6b318281b920268b95e46484e5cc2ea15f83b934934506aa3e112","prompt":{"raw":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You're an angry pirate. Be concise and stay in character.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","display":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You're an angry pirate. Be concise and stay in character.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","id":"6d5d84ed58b6b318281b920268b95e46484e5cc2ea15f83b934934506aa3e112","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8676,"tokenUsage":{"total":458,"prompt":121,"completion":337,"cached":0},"namedScores":{},"cost":0.000566}},"recentEvalDate":"2024-03-23T00:00:21.342Z","recentEvalId":"eval-2024-03-22T23:55:00","evals":[{"id":"eval-2024-03-22T23:55:00","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8676,"tokenUsage":{"total":458,"prompt":121,"completion":337,"cached":0},"namedScores":{},"cost":0.000566}},{"id":"eval-2024-03-22T23:56:08","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":458,"prompt":0,"completion":0,"cached":458},"namedScores":{},"cost":0.000566}},{"id":"eval-2024-03-22T23:56:38","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8,"tokenUsage":{"total":458,"prompt":0,"completion":0,"cached":458},"namedScores":{},"cost":0.000566}},{"id":"eval-2024-03-22T23:56:53","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":10,"tokenUsage":{"total":458,"prompt":0,"completion":0,"cached":458},"namedScores":{},"cost":0.000566}},{"id":"eval-2024-03-22T23:59:05","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1868,"tokenUsage":{"total":470,"prompt":29,"completion":80,"cached":361},"namedScores":{},"cost":0.0005870000000000001}},{"id":"eval-2024-03-23T00:00:21","datasetId":"7a2cb71d85f7e4a785153cbf17c12e19a569915f85024f10e8d112b1ea1b2ea1","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":7339,"tokenUsage":{"total":456,"prompt":119,"completion":337,"cached":0},"namedScores":{},"cost":0.000565}}]},{"count":5,"id":"488f98e20c64231cfa1fa75aa3453961fae63164b6684ecd24cb997fe8142645","prompt":{"raw":"async function ({ vars }) {\n return `Imagine you're an angry pirate. Be concise and stay in character. Tell me about ${vars.topic}`;\n}","display":"async function ({ vars }) {\n return `Imagine you're an angry pirate. Be concise and stay in character. Tell me about ${vars.topic}`;\n}","id":"488f98e20c64231cfa1fa75aa3453961fae63164b6684ecd24cb997fe8142645","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":12002,"tokenUsage":{"total":606,"prompt":106,"completion":500,"cached":0},"namedScores":{},"cost":0.000803}},"recentEvalDate":"2024-03-23T00:00:21.342Z","recentEvalId":"eval-2024-03-22T23:55:00","evals":[{"id":"eval-2024-03-22T23:55:00","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":12002,"tokenUsage":{"total":606,"prompt":106,"completion":500,"cached":0},"namedScores":{},"cost":0.000803}},{"id":"eval-2024-03-22T23:56:08","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1738,"tokenUsage":{"total":554,"prompt":35,"completion":59,"cached":460},"namedScores":{},"cost":0.000716}},{"id":"eval-2024-03-22T23:56:53","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":12,"tokenUsage":{"total":606,"prompt":0,"completion":0,"cached":606},"namedScores":{},"cost":0.000803}},{"id":"eval-2024-03-22T23:59:05","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":17,"tokenUsage":{"total":606,"prompt":0,"completion":0,"cached":606},"namedScores":{},"cost":0.000803}},{"id":"eval-2024-03-23T00:00:21","datasetId":"7a2cb71d85f7e4a785153cbf17c12e19a569915f85024f10e8d112b1ea1b2ea1","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":11872,"tokenUsage":{"total":612,"prompt":107,"completion":505,"cached":0},"namedScores":{},"cost":0.000811}}]},{"count":3,"id":"e85d1e5589dfd8831379af0660d6d0bf99f81671308c9c74e2a46150227842cc","prompt":{"raw":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You do not answer questions. You only make wolf noises.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","display":"async function ({ vars }) {\n return [\n {\n role: 'system',\n content: `You do not answer questions. You only make wolf noises.`,\n },\n {\n role: 'user',\n content: `Tell me about ${vars.topic}`,\n },\n ];\n}","id":"e85d1e5589dfd8831379af0660d6d0bf99f81671308c9c74e2a46150227842cc","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2688,"tokenUsage":{"total":133,"prompt":86,"completion":14,"cached":33},"namedScores":{},"cost":0.00008549999999999999}},"recentEvalDate":"2024-03-23T00:00:21.342Z","recentEvalId":"eval-2024-03-22T23:55:00","evals":[{"id":"eval-2024-03-22T23:55:00","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2688,"tokenUsage":{"total":133,"prompt":86,"completion":14,"cached":33},"namedScores":{},"cost":0.00008549999999999999}},{"id":"eval-2024-03-22T23:56:08","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":518,"tokenUsage":{"total":142,"prompt":37,"completion":4,"cached":101},"namedScores":{},"cost":0.00008999999999999999}},{"id":"eval-2024-03-23T00:00:21","datasetId":"7a2cb71d85f7e4a785153cbf17c12e19a569915f85024f10e8d112b1ea1b2ea1","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2935,"tokenUsage":{"total":135,"prompt":115,"completion":20,"cached":0},"namedScores":{},"cost":0.00008749999999999999}}]},{"count":6,"id":"51519e8874147b854779b876a95e19f6440037da60dbd77c58ee292c9e4d2f84","prompt":{"raw":"import sys\nimport json\n\ndef prompt1(context):\n return f'Write a one-sentence insult for anyone who likes {context[\"vars\"][\"topic\"]}.'\n\ndef generate_prompt(context):\n return f'Describe {context[\"vars\"][\"topic\"]} concisely, comparing it to the Python programming language.'\n\nif __name__ == '__main__':\n print(generate_prompt(json.loads(sys.argv[1])))\n","display":"import sys\nimport json\n\ndef prompt1(context):\n return f'Write a one-sentence insult for anyone who likes {context[\"vars\"][\"topic\"]}.'\n\ndef generate_prompt(context):\n return f'Describe {context[\"vars\"][\"topic\"]} concisely, comparing it to the Python programming language.'\n\nif __name__ == '__main__':\n print(generate_prompt(json.loads(sys.argv[1])))\n","id":"51519e8874147b854779b876a95e19f6440037da60dbd77c58ee292c9e4d2f84","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":4832,"tokenUsage":{"total":281,"prompt":68,"completion":167,"cached":46},"namedScores":{},"cost":0.0003315}},"recentEvalDate":"2024-03-23T00:00:21.342Z","recentEvalId":"eval-2024-03-22T23:55:00","evals":[{"id":"eval-2024-03-22T23:55:00","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":4832,"tokenUsage":{"total":281,"prompt":68,"completion":167,"cached":46},"namedScores":{},"cost":0.0003315}},{"id":"eval-2024-03-22T23:55:00","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":3612,"tokenUsage":{"total":149,"prompt":82,"completion":67,"cached":0},"namedScores":{},"cost":0.00014150000000000002}},{"id":"eval-2024-03-22T23:56:08","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1313,"tokenUsage":{"total":274,"prompt":31,"completion":19,"cached":224},"namedScores":{},"cost":0.000312}},{"id":"eval-2024-03-22T23:56:08","datasetId":"a7005a3c4e305e228e48862899ed190dfd8996ffbaaea0234c280dad0f6ad723","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":857,"tokenUsage":{"total":157,"prompt":28,"completion":19,"cached":110},"namedScores":{},"cost":0.0001455}},{"id":"eval-2024-03-23T00:00:21","datasetId":"7a2cb71d85f7e4a785153cbf17c12e19a569915f85024f10e8d112b1ea1b2ea1","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":6173,"tokenUsage":{"total":288,"prompt":91,"completion":197,"cached":0},"namedScores":{},"cost":0.000341}},{"id":"eval-2024-03-23T00:00:21","datasetId":"7a2cb71d85f7e4a785153cbf17c12e19a569915f85024f10e8d112b1ea1b2ea1","metrics":{"score":4,"testPassCount":4,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":3608,"tokenUsage":{"total":149,"prompt":83,"completion":66,"cached":0},"namedScores":{},"cost":0.0001405}}]},{"count":3,"id":"82ea990e1aa6c114aa940e7e717c174aa458efa93e6c1bb1854c6f552a30f722","prompt":{"raw":"Write an instagram post about {{topic}}\n","display":"Write an instagram post about {{topic}}\n","id":"82ea990e1aa6c114aa940e7e717c174aa458efa93e6c1bb1854c6f552a30f722","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8851,"tokenUsage":{"total":363,"prompt":14,"completion":349,"cached":0},"namedScores":{},"cost":0.0005304999999999999}},"recentEvalDate":"2024-03-23T14:13:18.732Z","recentEvalId":"eval-2024-03-23T14:12:31","evals":[{"id":"eval-2024-03-23T14:12:31","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":8851,"tokenUsage":{"total":363,"prompt":14,"completion":349,"cached":0},"namedScores":{},"cost":0.0005304999999999999}},{"id":"eval-2024-03-23T14:12:31","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":9308,"tokenUsage":{"total":235,"prompt":14,"completion":221,"cached":0},"namedScores":{},"cost":0.00677}},{"id":"eval-2024-03-23T14:13:18","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":3,"tokenUsage":{"total":235,"prompt":0,"completion":0,"cached":235},"namedScores":{},"cost":0.00677}}]},{"count":3,"id":"7bdb27cd8dfa793744585b9c10d626040ca40c5fb1b1171827b56a8208ee4329","prompt":{"raw":"Write a tweet about {{topic}}\n","display":"Write a tweet about {{topic}}\n","id":"7bdb27cd8dfa793744585b9c10d626040ca40c5fb1b1171827b56a8208ee4329","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1855,"tokenUsage":{"total":69,"prompt":13,"completion":56,"cached":0},"namedScores":{},"cost":0.0000905}},"recentEvalDate":"2024-03-23T14:13:18.732Z","recentEvalId":"eval-2024-03-23T14:12:31","evals":[{"id":"eval-2024-03-23T14:12:31","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1855,"tokenUsage":{"total":69,"prompt":13,"completion":56,"cached":0},"namedScores":{},"cost":0.0000905}},{"id":"eval-2024-03-23T14:12:31","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":3348,"tokenUsage":{"total":74,"prompt":13,"completion":61,"cached":0},"namedScores":{},"cost":0.00196}},{"id":"eval-2024-03-23T14:13:18","datasetId":"8d4abf0457fef90f66c2b1b9b38079f83369b9dcc5710a3ac6e95fa8f27be1ab","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2,"tokenUsage":{"total":74,"prompt":0,"completion":0,"cached":74},"namedScores":{},"cost":0.00196}}]},{"count":1,"id":"ff529e7dc5641b7c96178c86718adfda3e5a96c8d5991ff07ef90577dc210ab6","prompt":{"raw":"Rephrase this in {{language}}: {{body | allcaps}}","display":"Rephrase this in {{language}}: {{body | allcaps}}","id":"ff529e7dc5641b7c96178c86718adfda3e5a96c8d5991ff07ef90577dc210ab6","provider":"openai:gpt-3.5-turbo","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":720,"tokenUsage":{"total":47,"prompt":35,"completion":12,"cached":0},"namedScores":{},"cost":0.0000355}},"recentEvalDate":"2024-03-25T13:00:19.393Z","recentEvalId":"eval-2024-03-25T13:00:19","evals":[{"id":"eval-2024-03-25T13:00:19","datasetId":"fd77de338af61b19edc0ce79be47635f14edd4bb3ff9a10c012e93612aaff3e2","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":720,"tokenUsage":{"total":47,"prompt":35,"completion":12,"cached":0},"namedScores":{},"cost":0.0000355}}]},{"count":1,"id":"bf842b0e19dc5aa76fa49be8449fbf76d5c055f758008ebbc448ed8e6a7a4943","prompt":{"raw":"Translate this to conversational {{language}}: {{body | allcaps}}","display":"Translate this to conversational {{language}}: {{body | allcaps}}","id":"bf842b0e19dc5aa76fa49be8449fbf76d5c055f758008ebbc448ed8e6a7a4943","provider":"openai:gpt-3.5-turbo","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2145,"tokenUsage":{"total":50,"prompt":37,"completion":13,"cached":0},"namedScores":{},"cost":0.000038}},"recentEvalDate":"2024-03-25T13:00:19.393Z","recentEvalId":"eval-2024-03-25T13:00:19","evals":[{"id":"eval-2024-03-25T13:00:19","datasetId":"fd77de338af61b19edc0ce79be47635f14edd4bb3ff9a10c012e93612aaff3e2","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2145,"tokenUsage":{"total":50,"prompt":37,"completion":13,"cached":0},"namedScores":{},"cost":0.000038}}]},{"count":2,"id":"60ddfbfcc7c5d5cc068c1f3e5316f9f6cef884aed326d98f4b5181a8d8de1e40","prompt":{"raw":"Generate one very interesting fact about {{topic}}","display":"Generate one very interesting fact about {{topic}}","id":"60ddfbfcc7c5d5cc068c1f3e5316f9f6cef884aed326d98f4b5181a8d8de1e40","provider":"azureopenai:chat:gpt-35-turbo-deployment1","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-25T13:11:17.083Z","recentEvalId":"eval-2024-03-25T13:11:10","evals":[{"id":"eval-2024-03-25T13:11:10","datasetId":"a2df04f077e9acdec3e4dab07c8cc870f5a038838e595610f8ea561ec04c4318","metrics":{"score":0,"testPassCount":0,"testFailCount":2,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":6,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-25T13:11:17","datasetId":"a2df04f077e9acdec3e4dab07c8cc870f5a038838e595610f8ea561ec04c4318","metrics":{"score":1.5,"testPassCount":1,"testFailCount":1,"assertPassCount":2,"assertFailCount":1,"totalLatencyMs":1443,"tokenUsage":{"total":76,"prompt":30,"completion":46,"cached":0},"namedScores":{},"cost":0}}]},{"count":3,"id":"a67a87b900d7de9c3dd19633b800ebe86b568db68f28d879d0b8a4e41a05e3cc","prompt":{"raw":"Rephrase this in French: {{body}}","display":"Rephrase this in French: {{body}}","id":"a67a87b900d7de9c3dd19633b800ebe86b568db68f28d879d0b8a4e41a05e3cc","provider":"customProvider.js","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1066,"tokenUsage":{"total":39,"prompt":31,"completion":8,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-26T17:44:59.979Z","recentEvalId":"eval-2024-03-26T14:15:17","evals":[{"id":"eval-2024-03-26T14:15:17","datasetId":"f220e2e4353e25bc8cc30c5865fcca3e57a756492c6ec87da2a8da39a1bba774","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1066,"tokenUsage":{"total":39,"prompt":31,"completion":8,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T14:15:17","datasetId":"f220e2e4353e25bc8cc30c5865fcca3e57a756492c6ec87da2a8da39a1bba774","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":807,"tokenUsage":{"total":39,"prompt":31,"completion":8,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T17:44:59","datasetId":"5fdf2d34df8100f9ee35a16711575a490c63d15438a03df15f2dfda1802aeda1","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1076,"tokenUsage":{"total":39,"prompt":31,"completion":8,"cached":0},"namedScores":{},"cost":0}}]},{"count":3,"id":"80aaed6946eaa32040a7fb382921bfc6d2eb5abffbaea8395c194ad2bbad6094","prompt":{"raw":"Rephrase this like a pirate: {{body}}","display":"Rephrase this like a pirate: {{body}}","id":"80aaed6946eaa32040a7fb382921bfc6d2eb5abffbaea8395c194ad2bbad6094","provider":"customProvider.js","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1182,"tokenUsage":{"total":53,"prompt":33,"completion":20,"cached":0},"namedScores":{},"cost":0}},"recentEvalDate":"2024-03-26T17:44:59.979Z","recentEvalId":"eval-2024-03-26T14:15:17","evals":[{"id":"eval-2024-03-26T14:15:17","datasetId":"f220e2e4353e25bc8cc30c5865fcca3e57a756492c6ec87da2a8da39a1bba774","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1182,"tokenUsage":{"total":53,"prompt":33,"completion":20,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T14:15:17","datasetId":"f220e2e4353e25bc8cc30c5865fcca3e57a756492c6ec87da2a8da39a1bba774","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":788,"tokenUsage":{"total":48,"prompt":33,"completion":15,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T17:44:59","datasetId":"5fdf2d34df8100f9ee35a16711575a490c63d15438a03df15f2dfda1802aeda1","metrics":{"score":2,"testPassCount":2,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":940,"tokenUsage":{"total":51,"prompt":33,"completion":18,"cached":0},"namedScores":{},"cost":0}}]},{"count":3,"id":"ca14273ccab7b5eddd64a32cf93a17a6b9c2e81fcd3a61388ecf69d557d27997","prompt":{"raw":"Translate this to French: {{message}}","display":"Translate this to French: {{message}}","id":"ca14273ccab7b5eddd64a32cf93a17a6b9c2e81fcd3a61388ecf69d557d27997","provider":"openai:gpt-3.5-turbo","metrics":{"score":0.4015448202047296,"testPassCount":1,"testFailCount":0,"assertPassCount":1,"assertFailCount":0,"totalLatencyMs":396,"tokenUsage":{"total":17,"prompt":14,"completion":3,"cached":0},"namedScores":{},"cost":0.0000115}},"recentEvalDate":"2024-03-26T19:39:39.037Z","recentEvalId":"eval-2024-03-26T19:39:27","evals":[{"id":"eval-2024-03-26T19:39:27","datasetId":"60947f9048ba276b6ada9ff1a73f818ff1374ad15840a75594f6b8e7168fe904","metrics":{"score":0.4015448202047296,"testPassCount":1,"testFailCount":0,"assertPassCount":1,"assertFailCount":0,"totalLatencyMs":396,"tokenUsage":{"total":17,"prompt":14,"completion":3,"cached":0},"namedScores":{},"cost":0.0000115}},{"id":"eval-2024-03-26T19:39:36","datasetId":"60947f9048ba276b6ada9ff1a73f818ff1374ad15840a75594f6b8e7168fe904","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}},{"id":"eval-2024-03-26T19:39:39","datasetId":"60947f9048ba276b6ada9ff1a73f818ff1374ad15840a75594f6b8e7168fe904","metrics":{"score":0,"testPassCount":0,"testFailCount":1,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2,"tokenUsage":{"total":0,"prompt":0,"completion":0,"cached":0},"namedScores":{},"cost":0}}]},{"count":5,"id":"a868045dcd584f8841c0db5ea8395aabee75965420d6fb87f9f3d53fcd35d311","prompt":{"raw":"You are an office assistant. Using this context:\n\n<context>{{context}}</context>\n\nAnswer this question:\n\n<question>{{question}}</question>","display":"You are an office assistant. Using this context:\n\n<context>{{context}}</context>\n\nAnswer this question:\n\n<question>{{question}}</question>","id":"a868045dcd584f8841c0db5ea8395aabee75965420d6fb87f9f3d53fcd35d311","provider":"openai:gpt-3.5-turbo-0613","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2304,"tokenUsage":{"total":172,"prompt":47,"completion":125,"cached":0},"namedScores":{},"cost":0.000211}},"recentEvalDate":"2024-03-26T20:51:01.412Z","recentEvalId":"eval-2024-03-26T20:49:19","evals":[{"id":"eval-2024-03-26T20:49:19","datasetId":"7060ea5fe58d69ef89cc65663a0c4bddf6cb7d42fbdba49e330a1ec928febde1","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2304,"tokenUsage":{"total":172,"prompt":47,"completion":125,"cached":0},"namedScores":{},"cost":0.000211}},{"id":"eval-2024-03-26T20:49:40","datasetId":"7060ea5fe58d69ef89cc65663a0c4bddf6cb7d42fbdba49e330a1ec928febde1","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":1,"tokenUsage":{"total":172,"prompt":0,"completion":0,"cached":172},"namedScores":{},"cost":0.000211}},{"id":"eval-2024-03-26T20:50:03","datasetId":"7060ea5fe58d69ef89cc65663a0c4bddf6cb7d42fbdba49e330a1ec928febde1","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2,"tokenUsage":{"total":172,"prompt":0,"completion":0,"cached":172},"namedScores":{},"cost":0.000211}},{"id":"eval-2024-03-26T20:50:29","datasetId":"7060ea5fe58d69ef89cc65663a0c4bddf6cb7d42fbdba49e330a1ec928febde1","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2,"tokenUsage":{"total":172,"prompt":0,"completion":0,"cached":172},"namedScores":{},"cost":0.000211}},{"id":"eval-2024-03-26T20:51:01","datasetId":"7060ea5fe58d69ef89cc65663a0c4bddf6cb7d42fbdba49e330a1ec928febde1","metrics":{"score":1,"testPassCount":1,"testFailCount":0,"assertPassCount":0,"assertFailCount":0,"totalLatencyMs":2,"tokenUsage":{"total":172,"prompt":0,"completion":0,"cached":172},"namedScores":{},"cost":0.000211}}]}]}
|
|
1
|
+
{"error":"Not implemented"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"
|
|
1
|
+
{"error":"Not implemented"}
|