promptfoo 0.66.0 → 0.68.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/dist/package.json +6 -4
- package/dist/src/assertions/validateAssertions.d.ts.map +1 -1
- package/dist/src/assertions/validateAssertions.js +11 -11
- package/dist/src/assertions/validateAssertions.js.map +1 -1
- package/dist/src/assertions.d.ts +6 -4
- package/dist/src/assertions.d.ts.map +1 -1
- package/dist/src/assertions.js +138 -165
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/cache.d.ts.map +1 -1
- package/dist/src/cache.js +3 -3
- package/dist/src/cache.js.map +1 -1
- package/dist/src/checkNodeVersion.d.ts +6 -0
- package/dist/src/checkNodeVersion.d.ts.map +1 -0
- package/dist/src/checkNodeVersion.js +67 -0
- package/dist/src/checkNodeVersion.js.map +1 -0
- package/dist/src/commands/config.d.ts.map +1 -1
- package/dist/src/commands/config.js +13 -2
- package/dist/src/commands/config.js.map +1 -1
- package/dist/src/commands/delete.d.ts.map +1 -1
- package/dist/src/commands/delete.js +11 -11
- package/dist/src/commands/delete.js.map +1 -1
- package/dist/src/commands/export.d.ts.map +1 -1
- package/dist/src/commands/export.js +1 -1
- package/dist/src/commands/export.js.map +1 -1
- package/dist/src/commands/import.js +1 -1
- package/dist/src/commands/import.js.map +1 -1
- package/dist/src/commands/list.d.ts.map +1 -1
- package/dist/src/commands/list.js +2 -2
- package/dist/src/commands/list.js.map +1 -1
- package/dist/src/commands/show.d.ts.map +1 -1
- package/dist/src/commands/show.js +66 -66
- package/dist/src/commands/show.js.map +1 -1
- package/dist/src/constants.d.ts +1 -0
- package/dist/src/constants.d.ts.map +1 -1
- package/dist/src/constants.js +3 -1
- package/dist/src/constants.js.map +1 -1
- package/dist/src/csv.d.ts +1 -1
- package/dist/src/csv.d.ts.map +1 -1
- package/dist/src/csv.js +50 -51
- package/dist/src/csv.js.map +1 -1
- package/dist/src/database.d.ts +56 -56
- package/dist/src/database.d.ts.map +1 -1
- package/dist/src/database.js +34 -34
- package/dist/src/database.js.map +1 -1
- package/dist/src/esm.d.ts.map +1 -1
- package/dist/src/esm.js +1 -1
- package/dist/src/esm.js.map +1 -1
- package/dist/src/evaluator.d.ts +18 -0
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +41 -23
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/feedback.d.ts +1 -1
- package/dist/src/feedback.d.ts.map +1 -1
- package/dist/src/feedback.js +21 -21
- package/dist/src/feedback.js.map +1 -1
- package/dist/src/fetch.d.ts.map +1 -1
- package/dist/src/fetch.js.map +1 -1
- package/dist/src/globalConfig.d.ts +0 -4
- package/dist/src/globalConfig.d.ts.map +1 -1
- package/dist/src/globalConfig.js +5 -5
- package/dist/src/globalConfig.js.map +1 -1
- package/dist/src/googleSheets.d.ts +1 -1
- package/dist/src/googleSheets.d.ts.map +1 -1
- package/dist/src/googleSheets.js +16 -16
- package/dist/src/googleSheets.js.map +1 -1
- package/dist/src/index.d.ts +3 -1
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +4 -4
- package/dist/src/index.js.map +1 -1
- package/dist/src/integrations/langfuse.js +1 -1
- package/dist/src/integrations/langfuse.js.map +1 -1
- package/dist/src/main.js +28 -26
- package/dist/src/main.js.map +1 -1
- package/dist/src/matchers.d.ts.map +1 -1
- package/dist/src/matchers.js +8 -7
- package/dist/src/matchers.js.map +1 -1
- package/dist/src/migrate.d.ts.map +1 -1
- package/dist/src/migrate.js +2 -2
- package/dist/src/migrate.js.map +1 -1
- package/dist/src/onboarding.d.ts.map +1 -1
- package/dist/src/onboarding.js +2 -2
- package/dist/src/onboarding.js.map +1 -1
- package/dist/src/prompts/constants.d.ts +3 -0
- package/dist/src/prompts/constants.d.ts.map +1 -0
- package/dist/src/prompts/constants.js +16 -0
- package/dist/src/prompts/constants.js.map +1 -0
- package/dist/src/prompts/external/ragas.d.ts.map +1 -0
- package/dist/src/prompts/external/ragas.js.map +1 -0
- package/dist/src/{prompts.d.ts → prompts/grading.d.ts} +7 -4
- package/dist/src/prompts/grading.d.ts.map +1 -0
- package/dist/src/prompts/grading.js +119 -0
- package/dist/src/prompts/grading.js.map +1 -0
- package/dist/src/prompts/index.d.ts +25 -0
- package/dist/src/prompts/index.d.ts.map +1 -0
- package/dist/src/prompts/index.js +143 -0
- package/dist/src/prompts/index.js.map +1 -0
- package/dist/src/prompts/processors/javascript.d.ts +9 -0
- package/dist/src/prompts/processors/javascript.d.ts.map +1 -0
- package/dist/src/prompts/processors/javascript.js +22 -0
- package/dist/src/prompts/processors/javascript.js.map +1 -0
- package/dist/src/prompts/processors/json.d.ts +12 -0
- package/dist/src/prompts/processors/json.d.ts.map +1 -0
- package/dist/src/prompts/processors/json.js +48 -0
- package/dist/src/prompts/processors/json.js.map +1 -0
- package/dist/src/prompts/processors/jsonl.d.ts +9 -0
- package/dist/src/prompts/processors/jsonl.d.ts.map +1 -0
- package/dist/src/prompts/processors/jsonl.js +48 -0
- package/dist/src/prompts/processors/jsonl.js.map +1 -0
- package/dist/src/prompts/processors/python.d.ts +31 -0
- package/dist/src/prompts/processors/python.d.ts.map +1 -0
- package/dist/src/prompts/processors/python.js +96 -0
- package/dist/src/prompts/processors/python.js.map +1 -0
- package/dist/src/prompts/processors/string.d.ts +8 -0
- package/dist/src/prompts/processors/string.d.ts.map +1 -0
- package/dist/src/prompts/processors/string.js +23 -0
- package/dist/src/prompts/processors/string.js.map +1 -0
- package/dist/src/prompts/processors/text.d.ts +9 -0
- package/dist/src/prompts/processors/text.d.ts.map +1 -0
- package/dist/src/prompts/processors/text.js +47 -0
- package/dist/src/prompts/processors/text.js.map +1 -0
- package/dist/src/prompts/processors/yaml.d.ts +13 -0
- package/dist/src/prompts/processors/yaml.d.ts.map +1 -0
- package/dist/src/prompts/processors/yaml.js +55 -0
- package/dist/src/prompts/processors/yaml.js.map +1 -0
- package/dist/src/prompts/utils.d.ts +29 -0
- package/dist/src/prompts/utils.d.ts.map +1 -0
- package/dist/src/prompts/utils.js +143 -0
- package/dist/src/prompts/utils.js.map +1 -0
- package/dist/src/providers/anthropic.d.ts +1 -1
- package/dist/src/providers/anthropic.d.ts.map +1 -1
- package/dist/src/providers/anthropic.js +68 -67
- package/dist/src/providers/anthropic.js.map +1 -1
- package/dist/src/providers/azureopenai.d.ts.map +1 -1
- package/dist/src/providers/azureopenai.js +1 -1
- package/dist/src/providers/azureopenai.js.map +1 -1
- package/dist/src/providers/azureopenaiUtil.d.ts.map +1 -1
- package/dist/src/providers/azureopenaiUtil.js +2 -2
- package/dist/src/providers/azureopenaiUtil.js.map +1 -1
- package/dist/src/providers/bam.d.ts.map +1 -1
- package/dist/src/providers/bam.js +1 -1
- package/dist/src/providers/bam.js.map +1 -1
- package/dist/src/providers/bedrock.d.ts +24 -0
- package/dist/src/providers/bedrock.d.ts.map +1 -1
- package/dist/src/providers/bedrock.js +146 -46
- package/dist/src/providers/bedrock.js.map +1 -1
- package/dist/src/providers/cloudflare-ai.d.ts +1 -1
- package/dist/src/providers/cloudflare-ai.d.ts.map +1 -1
- package/dist/src/providers/cloudflare-ai.js +1 -1
- package/dist/src/providers/cloudflare-ai.js.map +1 -1
- package/dist/src/providers/cohere.d.ts.map +1 -1
- package/dist/src/providers/cohere.js.map +1 -1
- package/dist/src/providers/defaults.d.ts +1 -1
- package/dist/src/providers/defaults.d.ts.map +1 -1
- package/dist/src/providers/defaults.js +2 -2
- package/dist/src/providers/defaults.js.map +1 -1
- package/dist/src/providers/http.d.ts.map +1 -1
- package/dist/src/providers/http.js +12 -12
- package/dist/src/providers/http.js.map +1 -1
- package/dist/src/providers/huggingface.d.ts +1 -1
- package/dist/src/providers/huggingface.d.ts.map +1 -1
- package/dist/src/providers/huggingface.js +1 -1
- package/dist/src/providers/huggingface.js.map +1 -1
- package/dist/src/providers/llama.d.ts.map +1 -1
- package/dist/src/providers/llama.js.map +1 -1
- package/dist/src/providers/localai.d.ts.map +1 -1
- package/dist/src/providers/localai.js +1 -1
- package/dist/src/providers/localai.js.map +1 -1
- package/dist/src/providers/mistral.d.ts.map +1 -1
- package/dist/src/providers/mistral.js +55 -54
- package/dist/src/providers/mistral.js.map +1 -1
- package/dist/src/providers/ollama.d.ts.map +1 -1
- package/dist/src/providers/ollama.js +1 -1
- package/dist/src/providers/ollama.js.map +1 -1
- package/dist/src/providers/openai.d.ts +1 -1
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +124 -118
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/palm.d.ts.map +1 -1
- package/dist/src/providers/palm.js +1 -1
- package/dist/src/providers/palm.js.map +1 -1
- package/dist/src/providers/portkey.d.ts +1 -1
- package/dist/src/providers/portkey.d.ts.map +1 -1
- package/dist/src/providers/portkey.js.map +1 -1
- package/dist/src/providers/promptfoo.d.ts.map +1 -1
- package/dist/src/providers/promptfoo.js.map +1 -1
- package/dist/src/providers/pythonCompletion.d.ts.map +1 -1
- package/dist/src/providers/pythonCompletion.js +2 -2
- package/dist/src/providers/pythonCompletion.js.map +1 -1
- package/dist/src/providers/replicate.d.ts +17 -1
- package/dist/src/providers/replicate.d.ts.map +1 -1
- package/dist/src/providers/replicate.js +65 -3
- package/dist/src/providers/replicate.js.map +1 -1
- package/dist/src/providers/scriptCompletion.d.ts.map +1 -1
- package/dist/src/providers/scriptCompletion.js +1 -1
- package/dist/src/providers/scriptCompletion.js.map +1 -1
- package/dist/src/providers/vertex.d.ts +43 -32
- package/dist/src/providers/vertex.d.ts.map +1 -1
- package/dist/src/providers/vertex.js +60 -3
- package/dist/src/providers/vertex.js.map +1 -1
- package/dist/src/providers/voyage.d.ts.map +1 -1
- package/dist/src/providers/voyage.js.map +1 -1
- package/dist/src/providers/webhook.d.ts.map +1 -1
- package/dist/src/providers/webhook.js +1 -1
- package/dist/src/providers/webhook.js.map +1 -1
- package/dist/src/providers.d.ts +7 -7
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +82 -65
- package/dist/src/providers.js.map +1 -1
- package/dist/src/python/wrapper.d.ts.map +1 -1
- package/dist/src/python/wrapper.js +1 -1
- package/dist/src/python/wrapper.js.map +1 -1
- package/dist/src/redteam/getHijackingTests.d.ts.map +1 -1
- package/dist/src/redteam/getHijackingTests.js.map +1 -1
- package/dist/src/redteam/index.d.ts +1 -1
- package/dist/src/redteam/index.d.ts.map +1 -1
- package/dist/src/redteam/index.js +38 -38
- package/dist/src/redteam/index.js.map +1 -1
- package/dist/src/redteam/iterative.d.ts +3 -0
- package/dist/src/redteam/iterative.d.ts.map +1 -1
- package/dist/src/redteam/iterative.js +24 -14
- package/dist/src/redteam/iterative.js.map +1 -1
- package/dist/src/redteam/iterativeImage.d.ts +12 -0
- package/dist/src/redteam/iterativeImage.d.ts.map +1 -0
- package/dist/src/redteam/iterativeImage.js +227 -0
- package/dist/src/redteam/iterativeImage.js.map +1 -0
- package/dist/src/share.d.ts.map +1 -1
- package/dist/src/share.js +1 -1
- package/dist/src/share.js.map +1 -1
- package/dist/src/suggestions.d.ts.map +1 -1
- package/dist/src/suggestions.js.map +1 -1
- package/dist/src/table.d.ts.map +1 -1
- package/dist/src/table.js +4 -5
- package/dist/src/table.js.map +1 -1
- package/dist/src/telemetry.d.ts.map +1 -1
- package/dist/src/telemetry.js +1 -1
- package/dist/src/telemetry.js.map +1 -1
- package/dist/src/testCases.d.ts +1 -1
- package/dist/src/testCases.d.ts.map +1 -1
- package/dist/src/testCases.js +24 -15
- package/dist/src/testCases.js.map +1 -1
- package/dist/src/types.d.ts +5 -2
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/types.js +5 -5
- package/dist/src/types.js.map +1 -1
- package/dist/src/updates.js +4 -3
- package/dist/src/updates.js.map +1 -1
- package/dist/src/util.d.ts +18 -17
- package/dist/src/util.d.ts.map +1 -1
- package/dist/src/util.js +157 -126
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/nextui/404/index.html +1 -1
- package/dist/src/web/nextui/404.html +1 -1
- package/dist/src/web/nextui/_next/static/chunks/2-e4ac60fba7a205e9.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/897-1955b232a2148365.js +32 -0
- package/dist/src/web/nextui/_next/static/chunks/app/auth/login/{page-c4a2650ac3a0ecd9.js → page-d932a73274f0f175.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/auth/signup/{page-dd18caf3100d8d0e.js → page-7a8f35189f8bc5b8.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-8b6fc67a6c47c793.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/{page-35bb69e87d17a291.js → page-dff9258a62cdf49e.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/{page-aef3aed32af8d4d7.js → page-7955455d29645096.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/layout-45eacc3320f78daa.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/progress/{page-00b5c3308a81af12.js → page-948dc7bcbf53cecf.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-3b66a4c23899e662.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/report/page-7869eb9950cab8de.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-2b2257cb43136762.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/{webpack-2fa22c6070dd15bc.js → webpack-525f81bed20c14b6.js} +1 -1
- package/dist/src/web/nextui/_next/static/css/036bf4af64e53e86.css +1 -0
- package/dist/src/web/nextui/_next/static/css/255fe4bf8eb4c6e9.css +1 -0
- package/dist/src/web/nextui/_next/static/css/dab5d695b3657d59.css +1 -0
- package/dist/src/web/nextui/_next/static/css/e141e895af3747c6.css +1 -0
- package/dist/src/web/nextui/_next/static/css/edcd6f0b6c902fde.css +1 -0
- package/dist/src/web/nextui/auth/login/index.html +1 -1
- package/dist/src/web/nextui/auth/login/index.txt +6 -6
- package/dist/src/web/nextui/auth/signup/index.html +1 -1
- package/dist/src/web/nextui/auth/signup/index.txt +6 -6
- package/dist/src/web/nextui/datasets/index.html +1 -1
- package/dist/src/web/nextui/datasets/index.txt +6 -6
- package/dist/src/web/nextui/eval/index.html +1 -1
- package/dist/src/web/nextui/eval/index.txt +8 -8
- package/dist/src/web/nextui/index.html +1 -1
- package/dist/src/web/nextui/index.txt +5 -5
- package/dist/src/web/nextui/progress/index.html +1 -1
- package/dist/src/web/nextui/progress/index.txt +6 -6
- package/dist/src/web/nextui/prompts/index.html +1 -1
- package/dist/src/web/nextui/prompts/index.txt +6 -6
- package/dist/src/web/nextui/report/index.html +1 -1
- package/dist/src/web/nextui/report/index.txt +8 -8
- package/dist/src/web/nextui/setup/index.html +2 -2
- package/dist/src/web/nextui/setup/index.txt +9 -9
- package/dist/src/web/server.d.ts.map +1 -1
- package/dist/src/web/server.js +10 -10
- package/dist/src/web/server.js.map +1 -1
- package/package.json +6 -4
- package/dist/src/external/ragas.d.ts.map +0 -1
- package/dist/src/external/ragas.js.map +0 -1
- package/dist/src/prompts.d.ts.map +0 -1
- package/dist/src/prompts.js +0 -391
- package/dist/src/prompts.js.map +0 -1
- package/dist/src/web/nextui/_next/static/chunks/2-60ab1c881a240da6.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/94-c07f30271fa4d8e4.js +0 -32
- package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-9426b519d4be1fdb.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/layout-dfda5ed5ef745c2d.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/prompts/page-ee610cffca4b965b.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/report/page-1b97ddc1b365a121.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-e1c49ea6fe7c04c5.js +0 -1
- package/dist/src/web/nextui/_next/static/css/16c1dd82fc87c9d7.css +0 -1
- package/dist/src/web/nextui/_next/static/css/451beaa5570cb9d3.css +0 -1
- package/dist/src/web/nextui/_next/static/css/51a17e8edcdfdbb2.css +0 -1
- package/dist/src/web/nextui/_next/static/css/51f7d6933894a4f8.css +0 -1
- package/dist/src/web/nextui/_next/static/css/e9f25719d0b14939.css +0 -1
- /package/dist/src/{external → prompts/external}/ragas.d.ts +0 -0
- /package/dist/src/{external → prompts/external}/ragas.js +0 -0
- /package/dist/src/web/nextui/_next/static/{lMO8mRWL6KkcjtN4Giq14 → 82qlai1jFeoFILGwnRAJx}/_buildManifest.js +0 -0
- /package/dist/src/web/nextui/_next/static/{lMO8mRWL6KkcjtN4Giq14 → 82qlai1jFeoFILGwnRAJx}/_ssgManifest.js +0 -0
package/README.md
CHANGED
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://npmjs.com/package/promptfoo)
|
|
4
4
|
[](https://npmjs.com/package/promptfoo)
|
|
5
|
-
[](https://github.com/
|
|
6
|
-
](https://github.com/promptfoo/promptfoo/actions/workflows/main.yml)
|
|
6
|
+

|
|
7
7
|
[](https://discord.gg/gHPS9jjfbs)
|
|
8
8
|
|
|
9
9
|
`promptfoo` is a tool for testing and evaluating LLM apps.
|
|
@@ -30,7 +30,7 @@ promptfoo produces matrix views that let you quickly evaluate outputs across man
|
|
|
30
30
|
|
|
31
31
|
It works on the command line too:
|
|
32
32
|
|
|
33
|
-

|
|
34
34
|
|
|
35
35
|
## Why choose promptfoo?
|
|
36
36
|
|
|
@@ -52,7 +52,7 @@ As you explore modifications to the prompt, use `promptfoo eval` to rate all out
|
|
|
52
52
|
|
|
53
53
|
As you collect more examples and establish a user feedback loop, continue to build the pool of test cases.
|
|
54
54
|
|
|
55
|
-
<img width="772" alt="LLM ops" src="https://github.com/
|
|
55
|
+
<img width="772" alt="LLM ops" src="https://github.com/promptfoo/promptfoo/assets/310310/cf0461a7-2832-4362-9fbb-4ebd911d06ff">
|
|
56
56
|
|
|
57
57
|
## Usage
|
|
58
58
|
|
|
@@ -161,7 +161,7 @@ providers: [openai:gpt-3.5-turbo]
|
|
|
161
161
|
tests: tests.csv
|
|
162
162
|
```
|
|
163
163
|
|
|
164
|
-
See [example CSV](https://github.com/
|
|
164
|
+
See [example CSV](https://github.com/promptfoo/promptfoo/blob/main/examples/simple-test/tests.csv).
|
|
165
165
|
|
|
166
166
|
### Command-line
|
|
167
167
|
|
|
@@ -169,7 +169,7 @@ If you're looking to customize your usage, you have a wide set of parameters at
|
|
|
169
169
|
|
|
170
170
|
| Option | Description |
|
|
171
171
|
| ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
172
|
-
| `-p, --prompts <paths...>` | Paths to [prompt files](https://www.promptfoo.dev/docs/configuration/parameters#
|
|
172
|
+
| `-p, --prompts <paths...>` | Paths to [prompt files](https://www.promptfoo.dev/docs/configuration/parameters#prompts), directory, or glob |
|
|
173
173
|
| `-r, --providers <name or path...>` | One of: openai:chat, openai:completion, openai:model-name, localai:chat:model-name, localai:completion:model-name. See [API providers][providers-docs] |
|
|
174
174
|
| `-o, --output <path>` | Path to [output file](https://www.promptfoo.dev/docs/configuration/parameters#output-file) (csv, json, yaml, html) |
|
|
175
175
|
| `--tests <path>` | Path to [external test file](https://www.promptfoo.dev/docs/configurationexpected-outputsassertions#load-an-external-tests-file) |
|
|
@@ -190,7 +190,7 @@ npx promptfoo view
|
|
|
190
190
|
|
|
191
191
|
#### Prompt quality
|
|
192
192
|
|
|
193
|
-
In [this example](https://github.com/
|
|
193
|
+
In [this example](https://github.com/promptfoo/promptfoo/tree/main/examples/assistant-cli), we evaluate whether adding adjectives to the personality of an assistant bot affects the responses:
|
|
194
194
|
|
|
195
195
|
```
|
|
196
196
|
npx promptfoo eval -p prompts.txt -r openai:gpt-3.5-turbo -t tests.csv
|
|
@@ -204,13 +204,13 @@ npx promptfoo eval -p prompts.txt -r openai:gpt-3.5-turbo -t tests.csv
|
|
|
204
204
|
|
|
205
205
|
This command will evaluate the prompts in `prompts.txt`, substituting the variable values from `vars.csv`, and output results in your terminal.
|
|
206
206
|
|
|
207
|
-
You can also output a nice [spreadsheet](https://docs.google.com/spreadsheets/d/1nanoj3_TniWrDl1Sj-qYqIMD6jwm5FBy15xPFdUTsmI/edit?usp=sharing), [JSON](https://github.com/
|
|
207
|
+
You can also output a nice [spreadsheet](https://docs.google.com/spreadsheets/d/1nanoj3_TniWrDl1Sj-qYqIMD6jwm5FBy15xPFdUTsmI/edit?usp=sharing), [JSON](https://github.com/promptfoo/promptfoo/blob/main/examples/simple-cli/output.json), YAML, or an HTML file:
|
|
208
208
|
|
|
209
209
|

|
|
210
210
|
|
|
211
211
|
#### Model quality
|
|
212
212
|
|
|
213
|
-
In the [next example](https://github.com/
|
|
213
|
+
In the [next example](https://github.com/promptfoo/promptfoo/tree/main/examples/gpt-3.5-vs-4), we evaluate the difference between GPT 3 and GPT 4 outputs for a given prompt:
|
|
214
214
|
|
|
215
215
|
```
|
|
216
216
|
npx promptfoo eval -p prompts.txt -r openai:gpt-3.5-turbo openai:gpt-4 -o output.html
|
|
@@ -302,7 +302,7 @@ const results = await promptfoo.evaluate({
|
|
|
302
302
|
|
|
303
303
|
This code imports the `promptfoo` library, defines the evaluation options, and then calls the `evaluate` function with these options.
|
|
304
304
|
|
|
305
|
-
See the full example [here](https://github.com/
|
|
305
|
+
See the full example [here](https://github.com/promptfoo/promptfoo/tree/main/examples/simple-import), which includes an example results object.
|
|
306
306
|
|
|
307
307
|
## Configuration
|
|
308
308
|
|
package/dist/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "promptfoo",
|
|
3
3
|
"description": "LLM eval & testing toolkit",
|
|
4
4
|
"author": "Ian Webster",
|
|
5
|
-
"version": "0.
|
|
5
|
+
"version": "0.68.0",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"type": "commonjs",
|
|
8
8
|
"repository": "promptfoo/promptfoo",
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
"dist"
|
|
24
24
|
],
|
|
25
25
|
"engines": {
|
|
26
|
-
"node": ">=18"
|
|
26
|
+
"node": ">=18.0.0"
|
|
27
27
|
},
|
|
28
28
|
"bin": {
|
|
29
29
|
"promptfoo": "dist/src/main.js"
|
|
@@ -49,12 +49,12 @@
|
|
|
49
49
|
"prepublishOnly": "npm run build:clean && npm run build"
|
|
50
50
|
},
|
|
51
51
|
"peerDependencies": {
|
|
52
|
-
"@aws-sdk/client-bedrock-runtime": "^3.
|
|
52
|
+
"@aws-sdk/client-bedrock-runtime": "^3.602.0",
|
|
53
53
|
"@azure/identity": "^4.0.0",
|
|
54
54
|
"@azure/openai-assistants": "^1.0.0-beta.5",
|
|
55
55
|
"@ibm-generative-ai/node-sdk": "^2.0.6",
|
|
56
|
+
"@smithy/node-http-handler": "^3.1.1",
|
|
56
57
|
"google-auth-library": "^9.7.0",
|
|
57
|
-
"googleapis": "^134.0.0",
|
|
58
58
|
"langfuse": "^3.7.0",
|
|
59
59
|
"node-sql-parser": "^5.2.0"
|
|
60
60
|
},
|
|
@@ -65,6 +65,7 @@
|
|
|
65
65
|
"@swc/cli": "^0.3.12",
|
|
66
66
|
"@swc/core": "^1.6.1",
|
|
67
67
|
"@swc/jest": "^0.2.36",
|
|
68
|
+
"@trivago/prettier-plugin-sort-imports": "^4.3.0",
|
|
68
69
|
"@types/async": "^3.2.24",
|
|
69
70
|
"@types/better-sqlite3": "^7.6.10",
|
|
70
71
|
"@types/cache-manager": "^4.0.6",
|
|
@@ -103,6 +104,7 @@
|
|
|
103
104
|
"dependencies": {
|
|
104
105
|
"@anthropic-ai/sdk": "^0.24.0",
|
|
105
106
|
"@apidevtools/json-schema-ref-parser": "^11.6.4",
|
|
107
|
+
"@googleapis/sheets": "^8.0.0",
|
|
106
108
|
"ajv": "^8.16.0",
|
|
107
109
|
"ajv-formats": "^2.1.1",
|
|
108
110
|
"async": "^3.2.5",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validateAssertions.d.ts","sourceRoot":"","sources":["../../../src/assertions/validateAssertions.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEpC,qBAAa,oBAAqB,SAAQ,KAAK;gBACjC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ;CAMhD;
|
|
1
|
+
{"version":3,"file":"validateAssertions.d.ts","sourceRoot":"","sources":["../../../src/assertions/validateAssertions.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,UAAU,CAAC;AAEpC,qBAAa,oBAAqB,SAAQ,KAAK;gBACjC,OAAO,EAAE,MAAM,EAAE,QAAQ,EAAE,QAAQ;CAMhD;AAgBD,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,EAAE,CAAC,CAAC,EAAE,QAU/F"}
|
|
@@ -9,6 +9,17 @@ class AssertValiationError extends Error {
|
|
|
9
9
|
}
|
|
10
10
|
}
|
|
11
11
|
exports.AssertValiationError = AssertValiationError;
|
|
12
|
+
function validateAssertSet(assertion, test) {
|
|
13
|
+
if (!('assert' in assertion)) {
|
|
14
|
+
throw new AssertValiationError('assert-set must have an `assert` property', test);
|
|
15
|
+
}
|
|
16
|
+
if (!Array.isArray(assertion.assert)) {
|
|
17
|
+
throw new AssertValiationError('assert-set `assert` must be an array of assertions', test);
|
|
18
|
+
}
|
|
19
|
+
if (assertion.assert.some((assertion) => assertion.type === 'assert-set')) {
|
|
20
|
+
throw new AssertValiationError('assert-set must not have child assert-sets', test);
|
|
21
|
+
}
|
|
22
|
+
}
|
|
12
23
|
function validateAssertions(tests) {
|
|
13
24
|
for (const test of tests) {
|
|
14
25
|
if (test.assert) {
|
|
@@ -21,15 +32,4 @@ function validateAssertions(tests) {
|
|
|
21
32
|
}
|
|
22
33
|
}
|
|
23
34
|
exports.validateAssertions = validateAssertions;
|
|
24
|
-
function validateAssertSet(assertion, test) {
|
|
25
|
-
if (!('assert' in assertion)) {
|
|
26
|
-
throw new AssertValiationError('assert-set must have an `assert` property', test);
|
|
27
|
-
}
|
|
28
|
-
if (!Array.isArray(assertion.assert)) {
|
|
29
|
-
throw new AssertValiationError('assert-set `assert` must be an array of assertions', test);
|
|
30
|
-
}
|
|
31
|
-
if (assertion.assert.some((assertion) => assertion.type === 'assert-set')) {
|
|
32
|
-
throw new AssertValiationError('assert-set must not have child assert-sets', test);
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
35
|
//# sourceMappingURL=validateAssertions.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validateAssertions.js","sourceRoot":"","sources":["../../../src/assertions/validateAssertions.ts"],"names":[],"mappings":";;;AAEA,MAAa,oBAAqB,SAAQ,KAAK;IAC7C,YAAY,OAAe,EAAE,QAAkB;QAC7C,MAAM,mBAAmB,GAAG,QAAQ,CAAC,WAAW,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QAE7E,KAAK,CAAC,GAAG,OAAO,SAAS,mBAAmB,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAPD,oDAOC;AAED,
|
|
1
|
+
{"version":3,"file":"validateAssertions.js","sourceRoot":"","sources":["../../../src/assertions/validateAssertions.ts"],"names":[],"mappings":";;;AAEA,MAAa,oBAAqB,SAAQ,KAAK;IAC7C,YAAY,OAAe,EAAE,QAAkB;QAC7C,MAAM,mBAAmB,GAAG,QAAQ,CAAC,WAAW,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAC;QAE7E,KAAK,CAAC,GAAG,OAAO,SAAS,mBAAmB,EAAE,CAAC,CAAC;QAChD,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AAPD,oDAOC;AAED,SAAS,iBAAiB,CAAC,SAAiB,EAAE,IAAc;IAC1D,IAAI,CAAC,CAAC,QAAQ,IAAI,SAAS,CAAC,EAAE,CAAC;QAC7B,MAAM,IAAI,oBAAoB,CAAC,2CAA2C,EAAE,IAAI,CAAC,CAAC;IACpF,CAAC;IAED,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,MAAM,CAAC,EAAE,CAAC;QACrC,MAAM,IAAI,oBAAoB,CAAC,oDAAoD,EAAE,IAAI,CAAC,CAAC;IAC7F,CAAC;IAED,IAAI,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,KAAK,YAAY,CAAC,EAAE,CAAC;QAC1E,MAAM,IAAI,oBAAoB,CAAC,4CAA4C,EAAE,IAAI,CAAC,CAAC;IACrF,CAAC;AACH,CAAC;AAED,SAAgB,kBAAkB,CAAC,KAA6D;IAC9F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YAChB,KAAK,MAAM,SAAS,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBACpC,IAAI,SAAS,CAAC,IAAI,KAAK,YAAY,EAAE,CAAC;oBACpC,iBAAiB,CAAC,SAAS,EAAE,IAAI,CAAC,CAAC;gBACrC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;AACH,CAAC;AAVD,gDAUC"}
|
package/dist/src/assertions.d.ts
CHANGED
|
@@ -1,29 +1,31 @@
|
|
|
1
1
|
import { matchesSimilarity, matchesLlmRubric, matchesFactuality, matchesClosedQa, matchesClassification, matchesAnswerRelevance, matchesContextRecall, matchesContextRelevance, matchesContextFaithfulness, matchesSelectBest, matchesModeration } from './matchers';
|
|
2
2
|
import { type ApiProvider, type Assertion, type AssertionType, type AtomicTestCase, type GradingResult, AssertionValue } from './types';
|
|
3
3
|
export declare const MODEL_GRADED_ASSERTION_TYPES: Set<AssertionType>;
|
|
4
|
-
export declare function
|
|
4
|
+
export declare function isSql(outputString: string, renderedValue: AssertionValue | undefined, inverse: boolean, assertion: Assertion): Promise<GradingResult>;
|
|
5
|
+
export declare function runAssertion({ prompt, provider, assertion, test, output, latencyMs, logProbs, cost, }: {
|
|
5
6
|
prompt?: string;
|
|
6
7
|
provider?: ApiProvider;
|
|
8
|
+
assertion: Assertion;
|
|
7
9
|
test: AtomicTestCase;
|
|
8
10
|
output: string | object;
|
|
9
11
|
latencyMs?: number;
|
|
10
12
|
logProbs?: number[];
|
|
11
13
|
cost?: number;
|
|
12
14
|
}): Promise<GradingResult>;
|
|
13
|
-
export declare function
|
|
15
|
+
export declare function runAssertions({ prompt, provider, test, output, latencyMs, logProbs, cost, }: {
|
|
14
16
|
prompt?: string;
|
|
15
17
|
provider?: ApiProvider;
|
|
16
|
-
assertion: Assertion;
|
|
17
18
|
test: AtomicTestCase;
|
|
18
19
|
output: string | object;
|
|
19
20
|
latencyMs?: number;
|
|
20
21
|
logProbs?: number[];
|
|
21
22
|
cost?: number;
|
|
22
23
|
}): Promise<GradingResult>;
|
|
23
|
-
export declare function isSql(outputString: string, renderedValue: AssertionValue | undefined, inverse: boolean, assertion: Assertion): Promise<GradingResult>;
|
|
24
24
|
export declare function runCompareAssertion(test: AtomicTestCase, assertion: Assertion, outputs: string[]): Promise<GradingResult[]>;
|
|
25
25
|
export declare function readAssertions(filePath: string): Promise<Assertion[]>;
|
|
26
26
|
declare const _default: {
|
|
27
|
+
runAssertion: typeof runAssertion;
|
|
28
|
+
runAssertions: typeof runAssertions;
|
|
27
29
|
matchesSimilarity: typeof matchesSimilarity;
|
|
28
30
|
matchesClassification: typeof matchesClassification;
|
|
29
31
|
matchesLlmRubric: typeof matchesLlmRubric;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../../src/assertions.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"assertions.d.ts","sourceRoot":"","sources":["../../src/assertions.ts"],"names":[],"mappings":"AAiBA,OAAO,EACL,iBAAiB,EACjB,gBAAgB,EAChB,iBAAiB,EACjB,eAAe,EACf,qBAAqB,EACrB,sBAAsB,EACtB,oBAAoB,EACpB,uBAAuB,EACvB,0BAA0B,EAC1B,iBAAiB,EACjB,iBAAiB,EAClB,MAAM,YAAY,CAAC;AAMpB,OAAO,EACL,KAAK,WAAW,EAChB,KAAK,SAAS,EACd,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,aAAa,EAGlB,cAAc,EACf,MAAM,SAAS,CAAC;AAOjB,eAAO,MAAM,4BAA4B,oBASvC,CAAC;AAqDH,wBAAsB,KAAK,CACzB,YAAY,EAAE,MAAM,EACpB,aAAa,EAAE,cAAc,GAAG,SAAS,EACzC,OAAO,EAAE,OAAO,EAChB,SAAS,EAAE,SAAS,GACnB,OAAO,CAAC,aAAa,CAAC,CA2ExB;AAED,wBAAsB,YAAY,CAAC,EACjC,MAAM,EACN,QAAQ,EACR,SAAS,EACT,IAAI,EACJ,MAAM,EACN,SAAS,EACT,QAAQ,EACR,IAAI,GACL,EAAE;IACD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,SAAS,EAAE,SAAS,CAAC;IACrB,IAAI,EAAE,cAAc,CAAC;IACrB,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf,GAAG,OAAO,CAAC,aAAa,CAAC,CAqhCzB;AAED,wBAAsB,aAAa,CAAC,EAClC,MAAM,EACN,QAAQ,EACR,IAAI,EACJ,MAAM,EACN,SAAS,EACT,QAAQ,EACR,IAAI,GACL,EAAE;IACD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,WAAW,CAAC;IACvB,IAAI,EAAE,cAAc,CAAC;IACrB,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf,GAAG,OAAO,CAAC,aAAa,CAAC,CAoFzB;AAED,wBAAsB,mBAAmB,CACvC,IAAI,EAAE,cAAc,EACpB,SAAS,EAAE,SAAS,EACpB,OAAO,EAAE,MAAM,EAAE,GAChB,OAAO,CAAC,aAAa,EAAE,CAAC,CAe1B;AAED,wBAAsB,cAAc,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,EAAE,CAAC,CAU3E;;;;;;;;;;;;;;;;AAGD,wBAcE"}
|
package/dist/src/assertions.js
CHANGED
|
@@ -26,30 +26,30 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
26
26
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
27
27
|
};
|
|
28
28
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
29
|
-
exports.readAssertions = exports.runCompareAssertion = exports.
|
|
29
|
+
exports.readAssertions = exports.runCompareAssertion = exports.runAssertions = exports.runAssertion = exports.isSql = exports.MODEL_GRADED_ASSERTION_TYPES = void 0;
|
|
30
|
+
const ajv_1 = __importDefault(require("ajv"));
|
|
31
|
+
const ajv_formats_1 = __importDefault(require("ajv-formats"));
|
|
32
|
+
const async_1 = __importDefault(require("async"));
|
|
33
|
+
const fastest_levenshtein_1 = require("fastest-levenshtein");
|
|
30
34
|
const fs_1 = __importDefault(require("fs"));
|
|
31
|
-
const
|
|
35
|
+
const js_yaml_1 = __importDefault(require("js-yaml"));
|
|
32
36
|
const node_util_1 = __importDefault(require("node:util"));
|
|
33
|
-
const
|
|
37
|
+
const path_1 = __importDefault(require("path"));
|
|
38
|
+
const rfdc_1 = __importDefault(require("rfdc"));
|
|
34
39
|
const rouge_1 = __importDefault(require("rouge"));
|
|
35
40
|
const tiny_invariant_1 = __importDefault(require("tiny-invariant"));
|
|
36
|
-
const
|
|
37
|
-
const ajv_1 = __importDefault(require("ajv"));
|
|
38
|
-
const ajv_formats_1 = __importDefault(require("ajv-formats"));
|
|
39
|
-
const rfdc_1 = __importDefault(require("rfdc"));
|
|
40
|
-
const fastest_levenshtein_1 = require("fastest-levenshtein");
|
|
41
|
+
const AssertionsResult_1 = require("./assertions/AssertionsResult");
|
|
41
42
|
const cliState_1 = __importDefault(require("./cliState"));
|
|
42
|
-
const
|
|
43
|
-
const logger_1 = __importDefault(require("./logger"));
|
|
43
|
+
const esm_1 = require("./esm");
|
|
44
44
|
const fetch_1 = require("./fetch");
|
|
45
|
-
const
|
|
45
|
+
const logger_1 = __importDefault(require("./logger"));
|
|
46
46
|
const matchers_1 = require("./matchers");
|
|
47
47
|
const openaiUtil_1 = require("./providers/openaiUtil");
|
|
48
|
+
const shared_1 = require("./providers/shared");
|
|
48
49
|
const wrapper_1 = require("./python/wrapper");
|
|
49
|
-
const
|
|
50
|
+
const telemetry_1 = __importDefault(require("./telemetry"));
|
|
50
51
|
const types_1 = require("./types");
|
|
51
|
-
const
|
|
52
|
-
const shared_1 = require("./providers/shared");
|
|
52
|
+
const util_1 = require("./util");
|
|
53
53
|
const ASSERTIONS_MAX_CONCURRENCY = process.env.PROMPTFOO_ASSERTIONS_MAX_CONCURRENCY
|
|
54
54
|
? parseInt(process.env.PROMPTFOO_ASSERTIONS_MAX_CONCURRENCY, 10)
|
|
55
55
|
: 3;
|
|
@@ -96,71 +96,68 @@ function handleRougeScore(baseType, assertion, expected, output, inverted) {
|
|
|
96
96
|
assertion,
|
|
97
97
|
};
|
|
98
98
|
}
|
|
99
|
-
async function
|
|
100
|
-
|
|
101
|
-
|
|
99
|
+
async function isSql(outputString, renderedValue, inverse, assertion) {
|
|
100
|
+
let pass = false;
|
|
101
|
+
let parsedSql;
|
|
102
|
+
let databaseType = 'MySQL';
|
|
103
|
+
let whiteTableList;
|
|
104
|
+
let whiteColumnList;
|
|
105
|
+
if (renderedValue && typeof renderedValue === 'object') {
|
|
106
|
+
const value = renderedValue;
|
|
107
|
+
databaseType = value.database || 'MySQL';
|
|
108
|
+
whiteTableList = value.allowedTables;
|
|
109
|
+
whiteColumnList = value.allowedColumns;
|
|
102
110
|
}
|
|
103
|
-
|
|
104
|
-
|
|
111
|
+
if (renderedValue && typeof renderedValue !== 'object') {
|
|
112
|
+
throw new Error('is-sql assertion must have a object value.');
|
|
113
|
+
}
|
|
114
|
+
const { Parser: SqlParser } = await Promise.resolve().then(() => __importStar(require('node-sql-parser'))).catch(() => {
|
|
115
|
+
throw new Error('node-sql-parser is not installed. Please install it first');
|
|
105
116
|
});
|
|
106
|
-
const
|
|
107
|
-
const
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
assertResult: subAssertResult,
|
|
122
|
-
index: j,
|
|
123
|
-
};
|
|
124
|
-
});
|
|
117
|
+
const sqlParser = new SqlParser();
|
|
118
|
+
const opt = { database: databaseType };
|
|
119
|
+
const failureReasons = [];
|
|
120
|
+
try {
|
|
121
|
+
parsedSql = sqlParser.astify(outputString, opt);
|
|
122
|
+
pass = !inverse;
|
|
123
|
+
}
|
|
124
|
+
catch (err) {
|
|
125
|
+
pass = inverse;
|
|
126
|
+
failureReasons.push(`SQL statement does not conform to the provided ${databaseType} database syntax.`);
|
|
127
|
+
}
|
|
128
|
+
if (whiteTableList) {
|
|
129
|
+
opt.type = 'table';
|
|
130
|
+
try {
|
|
131
|
+
sqlParser.whiteListCheck(outputString, whiteTableList, opt);
|
|
125
132
|
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
if (assertion.type.startsWith('select-')) {
|
|
131
|
-
// Select-type assertions are handled separately because they depend on multiple outputs.
|
|
132
|
-
return;
|
|
133
|
+
catch (err) {
|
|
134
|
+
pass = inverse;
|
|
135
|
+
const error = err;
|
|
136
|
+
failureReasons.push(`SQL validation failed: ${error.message}.`);
|
|
133
137
|
}
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
index,
|
|
156
|
-
result,
|
|
157
|
-
metric,
|
|
158
|
-
weight,
|
|
159
|
-
});
|
|
160
|
-
});
|
|
161
|
-
return mainAssertResult.testResult();
|
|
138
|
+
}
|
|
139
|
+
if (whiteColumnList) {
|
|
140
|
+
opt.type = 'column';
|
|
141
|
+
try {
|
|
142
|
+
sqlParser.whiteListCheck(outputString, whiteColumnList, opt);
|
|
143
|
+
}
|
|
144
|
+
catch (err) {
|
|
145
|
+
pass = inverse;
|
|
146
|
+
const error = err;
|
|
147
|
+
failureReasons.push(`SQL validation failed: ${error.message}.`);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
if (inverse && pass === false && failureReasons.length === 0) {
|
|
151
|
+
failureReasons.push('The output SQL statement is valid');
|
|
152
|
+
}
|
|
153
|
+
return {
|
|
154
|
+
pass,
|
|
155
|
+
score: pass ? 1 : 0,
|
|
156
|
+
reason: pass ? 'Assertion passed' : failureReasons.join(' '),
|
|
157
|
+
assertion,
|
|
158
|
+
};
|
|
162
159
|
}
|
|
163
|
-
exports.
|
|
160
|
+
exports.isSql = isSql;
|
|
164
161
|
async function runAssertion({ prompt, provider, assertion, test, output, latencyMs, logProbs, cost, }) {
|
|
165
162
|
let pass = false;
|
|
166
163
|
let score = 0.0;
|
|
@@ -441,10 +438,10 @@ async function runAssertion({ prompt, provider, assertion, test, output, latency
|
|
|
441
438
|
}
|
|
442
439
|
if (baseType === 'contains-json') {
|
|
443
440
|
let errorMessage = 'Expected output to contain valid JSON';
|
|
444
|
-
const
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
if (
|
|
441
|
+
const jsonObjects = (0, util_1.extractJsonObjects)(outputString);
|
|
442
|
+
pass = inverse ? jsonObjects.length === 0 : jsonObjects.length > 0;
|
|
443
|
+
for (const jsonObject of jsonObjects) {
|
|
444
|
+
if (renderedValue) {
|
|
448
445
|
let validate;
|
|
449
446
|
if (typeof renderedValue === 'string') {
|
|
450
447
|
if (renderedValue.startsWith('file://')) {
|
|
@@ -465,7 +462,7 @@ async function runAssertion({ prompt, provider, assertion, test, output, latency
|
|
|
465
462
|
else {
|
|
466
463
|
throw new Error('contains-json assertion must have a string or object value');
|
|
467
464
|
}
|
|
468
|
-
pass = validate(
|
|
465
|
+
pass = validate(jsonObject);
|
|
469
466
|
if (pass) {
|
|
470
467
|
break;
|
|
471
468
|
}
|
|
@@ -996,97 +993,71 @@ ${isMultiline
|
|
|
996
993
|
throw new Error('Unknown assertion type: ' + assertion.type);
|
|
997
994
|
}
|
|
998
995
|
exports.runAssertion = runAssertion;
|
|
999
|
-
function
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
let openBracket = str.indexOf('{');
|
|
1003
|
-
let closeBracket = str.indexOf('}', openBracket);
|
|
1004
|
-
// Iterate over the string until we find a valid JSON-like pattern
|
|
1005
|
-
// Iterate over all trailing } until the contents parse as json
|
|
1006
|
-
while (openBracket !== -1) {
|
|
1007
|
-
const jsonStr = str.slice(openBracket, closeBracket + 1);
|
|
1008
|
-
try {
|
|
1009
|
-
jsonObjects.push(JSON.parse(jsonStr));
|
|
1010
|
-
// This is a valid JSON object, so start looking for
|
|
1011
|
-
// an opening bracket after the last closing bracket
|
|
1012
|
-
openBracket = str.indexOf('{', closeBracket + 1);
|
|
1013
|
-
closeBracket = str.indexOf('}', openBracket);
|
|
1014
|
-
}
|
|
1015
|
-
catch (err) {
|
|
1016
|
-
// Not a valid object, move on to the next closing bracket
|
|
1017
|
-
closeBracket = str.indexOf('}', closeBracket + 1);
|
|
1018
|
-
while (closeBracket === -1) {
|
|
1019
|
-
// No closing brackets made a valid json object, so
|
|
1020
|
-
// start looking with the next opening bracket
|
|
1021
|
-
openBracket = str.indexOf('{', openBracket + 1);
|
|
1022
|
-
closeBracket = str.indexOf('}', openBracket);
|
|
1023
|
-
}
|
|
1024
|
-
}
|
|
1025
|
-
}
|
|
1026
|
-
return jsonObjects;
|
|
1027
|
-
}
|
|
1028
|
-
async function isSql(outputString, renderedValue, inverse, assertion) {
|
|
1029
|
-
let pass = false;
|
|
1030
|
-
let parsedSql;
|
|
1031
|
-
let databaseType = 'MySQL';
|
|
1032
|
-
let whiteTableList;
|
|
1033
|
-
let whiteColumnList;
|
|
1034
|
-
if (renderedValue && typeof renderedValue === 'object') {
|
|
1035
|
-
const value = renderedValue;
|
|
1036
|
-
databaseType = value.database || 'MySQL';
|
|
1037
|
-
whiteTableList = value.allowedTables;
|
|
1038
|
-
whiteColumnList = value.allowedColumns;
|
|
1039
|
-
}
|
|
1040
|
-
if (renderedValue && typeof renderedValue !== 'object') {
|
|
1041
|
-
throw new Error('is-sql assertion must have a object value.');
|
|
996
|
+
async function runAssertions({ prompt, provider, test, output, latencyMs, logProbs, cost, }) {
|
|
997
|
+
if (!test.assert || test.assert.length < 1) {
|
|
998
|
+
return AssertionsResult_1.AssertionsResult.noAssertsResult();
|
|
1042
999
|
}
|
|
1043
|
-
const
|
|
1044
|
-
|
|
1000
|
+
const mainAssertResult = new AssertionsResult_1.AssertionsResult({
|
|
1001
|
+
threshold: test.threshold,
|
|
1045
1002
|
});
|
|
1046
|
-
const
|
|
1047
|
-
const
|
|
1048
|
-
|
|
1049
|
-
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
failureReasons.push(`SQL validation failed: ${error.message}.`);
|
|
1066
|
-
}
|
|
1067
|
-
}
|
|
1068
|
-
if (whiteColumnList) {
|
|
1069
|
-
opt.type = 'column';
|
|
1070
|
-
try {
|
|
1071
|
-
sqlParser.whiteListCheck(outputString, whiteColumnList, opt);
|
|
1003
|
+
const subAssertResults = [];
|
|
1004
|
+
const asserts = test.assert
|
|
1005
|
+
.map((assertion, i) => {
|
|
1006
|
+
if (assertion.type === 'assert-set') {
|
|
1007
|
+
const subAssertResult = new AssertionsResult_1.AssertionsResult({
|
|
1008
|
+
threshold: assertion.threshold,
|
|
1009
|
+
parentAssertionSet: {
|
|
1010
|
+
assertionSet: assertion,
|
|
1011
|
+
index: i,
|
|
1012
|
+
},
|
|
1013
|
+
});
|
|
1014
|
+
subAssertResults.push(subAssertResult);
|
|
1015
|
+
return assertion.assert.map((subAssert, j) => {
|
|
1016
|
+
return {
|
|
1017
|
+
assertion: subAssert,
|
|
1018
|
+
assertResult: subAssertResult,
|
|
1019
|
+
index: j,
|
|
1020
|
+
};
|
|
1021
|
+
});
|
|
1072
1022
|
}
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1023
|
+
return { assertion, assertResult: mainAssertResult, index: i };
|
|
1024
|
+
})
|
|
1025
|
+
.flat();
|
|
1026
|
+
await async_1.default.forEachOfLimit(asserts, ASSERTIONS_MAX_CONCURRENCY, async ({ assertion, assertResult, index }) => {
|
|
1027
|
+
if (assertion.type.startsWith('select-')) {
|
|
1028
|
+
// Select-type assertions are handled separately because they depend on multiple outputs.
|
|
1029
|
+
return;
|
|
1077
1030
|
}
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1031
|
+
const result = await runAssertion({
|
|
1032
|
+
prompt,
|
|
1033
|
+
provider,
|
|
1034
|
+
assertion,
|
|
1035
|
+
test,
|
|
1036
|
+
output,
|
|
1037
|
+
latencyMs,
|
|
1038
|
+
logProbs,
|
|
1039
|
+
cost,
|
|
1040
|
+
});
|
|
1041
|
+
assertResult.addResult({
|
|
1042
|
+
index,
|
|
1043
|
+
result,
|
|
1044
|
+
metric: assertion.metric,
|
|
1045
|
+
weight: assertion.weight,
|
|
1046
|
+
});
|
|
1047
|
+
});
|
|
1048
|
+
subAssertResults.forEach((subAssertResult) => {
|
|
1049
|
+
const result = subAssertResult.testResult();
|
|
1050
|
+
const { index, assertionSet: { metric, weight }, } = subAssertResult.parentAssertionSet;
|
|
1051
|
+
mainAssertResult.addResult({
|
|
1052
|
+
index,
|
|
1053
|
+
result,
|
|
1054
|
+
metric,
|
|
1055
|
+
weight,
|
|
1056
|
+
});
|
|
1057
|
+
});
|
|
1058
|
+
return mainAssertResult.testResult();
|
|
1088
1059
|
}
|
|
1089
|
-
exports.
|
|
1060
|
+
exports.runAssertions = runAssertions;
|
|
1090
1061
|
async function runCompareAssertion(test, assertion, outputs) {
|
|
1091
1062
|
(0, tiny_invariant_1.default)(typeof assertion.value === 'string', 'select-best must have a string value');
|
|
1092
1063
|
test.options = test.options || {};
|
|
@@ -1114,6 +1085,8 @@ async function readAssertions(filePath) {
|
|
|
1114
1085
|
exports.readAssertions = readAssertions;
|
|
1115
1086
|
// These exports are used by the node.js package (index.ts)
|
|
1116
1087
|
exports.default = {
|
|
1088
|
+
runAssertion,
|
|
1089
|
+
runAssertions,
|
|
1117
1090
|
matchesSimilarity: matchers_1.matchesSimilarity,
|
|
1118
1091
|
matchesClassification: matchers_1.matchesClassification,
|
|
1119
1092
|
matchesLlmRubric: matchers_1.matchesLlmRubric,
|