promptfoo 0.103.0 → 0.103.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. package/README.md +36 -427
  2. package/dist/package.json +1 -1
  3. package/dist/src/app/assets/{index-Vk7H3M29.css → index-DdUNCsxz.css} +1 -1
  4. package/dist/src/app/assets/{index-hVGk-Oul.js → index-ziw_4_A9.js} +238 -238
  5. package/dist/src/app/assets/{index.es-CcK3JjZn.js → index.es-XehlSHxK.js} +1 -1
  6. package/dist/src/app/assets/{sync-BaigR5eq.js → sync-DDIaa9Ut.js} +1 -1
  7. package/dist/src/app/index.html +2 -2
  8. package/dist/src/cache.d.ts +1 -1
  9. package/dist/src/cache.d.ts.map +1 -1
  10. package/dist/src/cache.js +3 -3
  11. package/dist/src/cache.js.map +1 -1
  12. package/dist/src/envars.d.ts +6 -1
  13. package/dist/src/envars.d.ts.map +1 -1
  14. package/dist/src/envars.js.map +1 -1
  15. package/dist/src/esm.d.ts.map +1 -1
  16. package/dist/src/esm.js +4 -1
  17. package/dist/src/esm.js.map +1 -1
  18. package/dist/src/fetch.d.ts.map +1 -1
  19. package/dist/src/fetch.js +8 -5
  20. package/dist/src/fetch.js.map +1 -1
  21. package/dist/src/matchers.d.ts.map +1 -1
  22. package/dist/src/matchers.js +7 -0
  23. package/dist/src/matchers.js.map +1 -1
  24. package/dist/src/models/eval.d.ts.map +1 -1
  25. package/dist/src/models/eval.js +9 -1
  26. package/dist/src/models/eval.js.map +1 -1
  27. package/dist/src/models/evalResult.d.ts.map +1 -1
  28. package/dist/src/models/evalResult.js +30 -6
  29. package/dist/src/models/evalResult.js.map +1 -1
  30. package/dist/src/providers/bedrock.d.ts.map +1 -1
  31. package/dist/src/providers/bedrock.js +15 -22
  32. package/dist/src/providers/bedrock.js.map +1 -1
  33. package/dist/src/providers/http.d.ts +4 -1
  34. package/dist/src/providers/http.d.ts.map +1 -1
  35. package/dist/src/providers/http.js +4 -3
  36. package/dist/src/providers/http.js.map +1 -1
  37. package/dist/src/providers/promptfoo.d.ts.map +1 -1
  38. package/dist/src/providers/promptfoo.js +3 -3
  39. package/dist/src/providers/promptfoo.js.map +1 -1
  40. package/dist/src/redteam/commands/generate.d.ts.map +1 -1
  41. package/dist/src/redteam/commands/generate.js +2 -0
  42. package/dist/src/redteam/commands/generate.js.map +1 -1
  43. package/dist/src/redteam/commands/run.d.ts.map +1 -1
  44. package/dist/src/redteam/commands/run.js +6 -0
  45. package/dist/src/redteam/commands/run.js.map +1 -1
  46. package/dist/src/redteam/constants.d.ts +1 -1
  47. package/dist/src/redteam/constants.d.ts.map +1 -1
  48. package/dist/src/redteam/constants.js +7 -0
  49. package/dist/src/redteam/constants.js.map +1 -1
  50. package/dist/src/redteam/graders.d.ts.map +1 -1
  51. package/dist/src/redteam/graders.js +2 -0
  52. package/dist/src/redteam/graders.js.map +1 -1
  53. package/dist/src/redteam/plugins/base.d.ts.map +1 -1
  54. package/dist/src/redteam/plugins/base.js +7 -0
  55. package/dist/src/redteam/plugins/base.js.map +1 -1
  56. package/dist/src/redteam/plugins/divergentRepetition.d.ts +16 -0
  57. package/dist/src/redteam/plugins/divergentRepetition.d.ts.map +1 -0
  58. package/dist/src/redteam/plugins/divergentRepetition.js +133 -0
  59. package/dist/src/redteam/plugins/divergentRepetition.js.map +1 -0
  60. package/dist/src/redteam/plugins/index.d.ts.map +1 -1
  61. package/dist/src/redteam/plugins/index.js +2 -0
  62. package/dist/src/redteam/plugins/index.js.map +1 -1
  63. package/dist/src/redteam/providers/iterative.d.ts +1 -1
  64. package/dist/src/redteam/providers/iterative.d.ts.map +1 -1
  65. package/dist/src/redteam/providers/iterative.js +6 -6
  66. package/dist/src/redteam/providers/iterative.js.map +1 -1
  67. package/dist/src/redteam/types.d.ts +2 -0
  68. package/dist/src/redteam/types.d.ts.map +1 -1
  69. package/dist/src/redteam/util.d.ts +1 -0
  70. package/dist/src/redteam/util.d.ts.map +1 -1
  71. package/dist/src/redteam/util.js +24 -19
  72. package/dist/src/redteam/util.js.map +1 -1
  73. package/dist/src/util/cloud.d.ts +2 -0
  74. package/dist/src/util/cloud.d.ts.map +1 -0
  75. package/dist/src/util/cloud.js +34 -0
  76. package/dist/src/util/cloud.js.map +1 -0
  77. package/dist/test/fetch.test.js +27 -1
  78. package/dist/test/fetch.test.js.map +1 -1
  79. package/dist/test/integrations/huggingfaceDatasets.test.js +1 -0
  80. package/dist/test/integrations/huggingfaceDatasets.test.js.map +1 -1
  81. package/dist/test/matchers.test.js +15 -0
  82. package/dist/test/matchers.test.js.map +1 -1
  83. package/dist/test/providers/bedrock.test.js +21 -0
  84. package/dist/test/providers/bedrock.test.js.map +1 -1
  85. package/dist/test/providers/http.test.js +54 -25
  86. package/dist/test/providers/http.test.js.map +1 -1
  87. package/dist/test/redteam/plugins/base.test.js +69 -0
  88. package/dist/test/redteam/plugins/base.test.js.map +1 -1
  89. package/dist/test/redteam/plugins/intent.test.js +16 -1
  90. package/dist/test/redteam/plugins/intent.test.js.map +1 -1
  91. package/dist/test/redteam/providers/iterative.test.js +48 -3
  92. package/dist/test/redteam/providers/iterative.test.js.map +1 -1
  93. package/dist/tsconfig.tsbuildinfo +1 -1
  94. package/package.json +1 -1
package/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # promptfoo: test your LLM app locally
1
+ # Promptfoo: LLM evals & red teaming
2
2
 
3
3
  [![npm](https://img.shields.io/npm/v/promptfoo)](https://npmjs.com/package/promptfoo)
4
4
  [![npm](https://img.shields.io/npm/dm/promptfoo)](https://npmjs.com/package/promptfoo)
@@ -6,451 +6,60 @@
6
6
  ![MIT license](https://img.shields.io/github/license/promptfoo/promptfoo)
7
7
  [![Discord](https://github.com/user-attachments/assets/2092591a-ccc5-42a7-aeb6-24a2808950fd)](https://discord.gg/gHPS9jjfbs)
8
8
 
9
- `promptfoo` is a tool for testing, evaluating, and red-teaming LLM apps.
9
+ `promptfoo` is a developer-friendly local tool for testing LLM applications. Stop the trial-and-error approach - start shipping secure, reliable AI apps.
10
10
 
11
- With promptfoo, you can:
12
-
13
- - **Build reliable prompts, models, and RAGs** with benchmarks specific to your use-case
14
- - **Secure your apps** with automated [red teaming](https://www.promptfoo.dev/docs/red-team/) and pentesting
15
- - **Speed up evaluations** with caching, concurrency, and live reloading
16
- - **Score outputs automatically** by defining [metrics](https://www.promptfoo.dev/docs/configuration/expected-outputs)
17
- - Use as a [CLI](https://www.promptfoo.dev/docs/usage/command-line), [library](https://www.promptfoo.dev/docs/usage/node-package), or in [CI/CD](https://www.promptfoo.dev/docs/integrations/github-action)
18
- - Use OpenAI, Anthropic, Azure, Google, HuggingFace, open-source models like Llama, or integrate custom API providers for [any LLM API](https://www.promptfoo.dev/docs/providers)
19
-
20
- The goal: **test-driven LLM development** instead of trial-and-error.
11
+ ## Quick Start
21
12
 
22
13
  ```sh
14
+ # Install and initialize project
23
15
  npx promptfoo@latest init
24
- ```
25
-
26
- # [» View full documentation «](https://www.promptfoo.dev/docs/intro)
27
-
28
- promptfoo produces matrix views that let you quickly evaluate outputs across many prompts and inputs:
29
-
30
- ![prompt evaluation matrix - web viewer](https://github.com/promptfoo/promptfoo/assets/310310/ce5a7817-da82-4484-b26d-32474f1cabc5)
31
-
32
- It works on the command line too:
33
-
34
- ![Prompt evaluation](https://github.com/promptfoo/promptfoo/assets/310310/480e1114-d049-40b9-bd5f-f81c15060284)
35
-
36
- It also produces high-level vulnerability and risk reports:
37
-
38
- ![gen ai red team](https://www.promptfoo.dev/img/riskreport-1@2x.png)
39
-
40
- ## Why choose promptfoo?
41
-
42
- There are many different ways to evaluate prompts. Here are some reasons to consider promptfoo:
43
-
44
- - **Developer friendly**: promptfoo is fast, with quality-of-life features like live reloads and caching.
45
- - **Battle-tested**: Originally built for LLM apps serving over 10 million users in production. Our tooling is flexible and can be adapted to many setups.
46
- - **Simple, declarative test cases**: Define evals without writing code or working with heavy notebooks.
47
- - **Language agnostic**: Use Python, Javascript, or any other language.
48
- - **Share & collaborate**: Built-in share functionality & web viewer for working with teammates.
49
- - **Open-source**: LLM evals are a commodity and should be served by 100% open-source projects with no strings attached.
50
- - **Private**: This software runs completely locally. The evals run on your machine and talk directly with the LLM.
51
-
52
- ## Workflow
53
-
54
- Start by establishing a handful of test cases - core use cases and failure cases that you want to ensure your prompt can handle.
55
-
56
- As you explore modifications to the prompt, use `promptfoo eval` to rate all outputs. This ensures the prompt is actually improving overall.
57
-
58
- As you collect more examples and establish a user feedback loop, continue to build the pool of test cases.
59
-
60
- <img width="772" alt="LLM ops" src="https://github.com/promptfoo/promptfoo/assets/310310/cf0461a7-2832-4362-9fbb-4ebd911d06ff">
61
-
62
- ## Usage - evals
63
-
64
- To get started, run this command:
65
-
66
- ```sh
67
- npx promptfoo@latest init
68
- ```
69
-
70
- This will create a `promptfooconfig.yaml` placeholder in your current directory.
71
-
72
- After editing the prompts and variables to your liking, run the eval command to kick off an evaluation:
73
-
74
- ```
75
- npx promptfoo@latest eval
76
- ```
77
-
78
- ## Usage - red teaming/pentesting
79
-
80
- Run this command:
81
-
82
- ```sh
83
- npx promptfoo@latest redteam init
84
- ```
85
-
86
- This will ask you questions about what types of vulnerabilities you want to find and walk you through running your first scan.
87
16
 
88
- ### Configuration
89
-
90
- The YAML configuration format runs each prompt through a series of example inputs (aka "test case") and checks if they meet requirements (aka "assert").
91
-
92
- See the [Configuration docs](https://www.promptfoo.dev/docs/configuration/guide) for a detailed guide.
93
-
94
- ```yaml
95
- prompts:
96
- - file://prompt1.txt
97
- - file://prompt2.txt
98
- providers:
99
- - openai:gpt-4o-mini
100
- - ollama:llama3.1:70b
101
- tests:
102
- - description: 'Test translation to French'
103
- vars:
104
- language: French
105
- input: Hello world
106
- assert:
107
- - type: contains-json
108
- - type: javascript
109
- value: output.length < 100
110
-
111
- - description: 'Test translation to German'
112
- vars:
113
- language: German
114
- input: How's it going?
115
- assert:
116
- - type: llm-rubric
117
- value: does not describe self as an AI, model, or chatbot
118
- - type: similar
119
- value: was geht
120
- threshold: 0.6 # cosine similarity
17
+ # Run your first evaluation
18
+ npx promptfoo eval
121
19
  ```
122
20
 
123
- ### Supported assertion types
124
-
125
- See [Test assertions](https://www.promptfoo.dev/docs/configuration/expected-outputs) for full details.
126
-
127
- Deterministic eval metrics
128
-
129
- | Assertion Type | Returns true if... |
130
- | ------------------------------- | ----------------------------------------------------------------- |
131
- | `equals` | output matches exactly |
132
- | `contains` | output contains substring |
133
- | `icontains` | output contains substring, case insensitive |
134
- | `regex` | output matches regex |
135
- | `starts-with` | output starts with string |
136
- | `contains-any` | output contains any of the listed substrings |
137
- | `contains-all` | output contains all list of substrings |
138
- | `icontains-any` | output contains any of the listed substrings, case insensitive |
139
- | `icontains-all` | output contains all list of substrings, case insensitive |
140
- | `is-json` | output is valid json (optional json schema validation) |
141
- | `contains-json` | output contains valid json (optional json schema validation) |
142
- | `is-sql` | output is valid sql |
143
- | `contains-sql` | output contains valid sql |
144
- | `is-xml` | output is valid xml |
145
- | `contains-xml` | output contains valid xml |
146
- | `is-refusal` | output indicates the model refused to perform the task |
147
- | `javascript` | provided Javascript function validates the output |
148
- | `python` | provided Python function validates the output |
149
- | `webhook` | provided webhook returns `{pass: true}` |
150
- | `rouge-n` | Rouge-N score is above a given threshold (default 0.75) |
151
- | `bleu` | BLEU score is above a given threshold (default 0.5) |
152
- | `levenshtein` | Levenshtein distance is below a threshold |
153
- | `latency` | Latency is below a threshold (milliseconds) |
154
- | `perplexity` | Perplexity is below a threshold |
155
- | `perplexity-score` | Normalized perplexity |
156
- | `cost` | Cost is below a threshold (for models with cost info such as GPT) |
157
- | `is-valid-openai-function-call` | Ensure that the function call matches the function's JSON schema |
158
- | `is-valid-openai-tools-call` | Ensure that all tool calls match the tools JSON schema |
159
- | `assert-set` | Group assertions together with optional threshold |
160
-
161
- Model-assisted eval metrics
162
-
163
- | Assertion Type | Method |
164
- | --------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------- |
165
- | [similar](https://www.promptfoo.dev/docs/configuration/expected-outputs/similar) | Embeddings and cosine similarity are above a threshold |
166
- | [classifier](https://www.promptfoo.dev/docs/configuration/expected-outputs/classifier) | Run LLM output through a classifier |
167
- | [llm-rubric](https://www.promptfoo.dev/docs/configuration/expected-outputs/model-graded) | LLM output matches a given rubric, using a Language Model to grade output |
168
- | [answer-relevance](https://www.promptfoo.dev/docs/configuration/expected-outputs/model-graded) | Ensure that LLM output is related to original query |
169
- | [context-faithfulness](https://www.promptfoo.dev/docs/configuration/expected-outputs/model-graded) | Ensure that LLM output uses the context |
170
- | [context-recall](https://www.promptfoo.dev/docs/configuration/expected-outputs/model-graded) | Ensure that ground truth appears in context |
171
- | [context-relevance](https://www.promptfoo.dev/docs/configuration/expected-outputs/model-graded) | Ensure that context is relevant to original query |
172
- | [factuality](https://www.promptfoo.dev/docs/configuration/expected-outputs/model-graded) | LLM output adheres to the given facts, using Factuality method from OpenAI eval |
173
- | [model-graded-closedqa](https://www.promptfoo.dev/docs/configuration/expected-outputs/model-graded) | LLM output adheres to given criteria, using Closed QA method from OpenAI eval |
174
- | [moderation](https://www.promptfoo.dev/docs/configuration/expected-outputs/moderation) | Make sure outputs are safe |
175
- | [select-best](https://www.promptfoo.dev/docs/configuration/expected-outputs/model-graded) | Compare multiple outputs for a test case and pick the best one |
176
-
177
- Every test type can be negated by prepending `not-`. For example, `not-equals` or `not-regex`.
178
-
179
- ### Tests from spreadsheet
180
-
181
- Some people prefer to configure their LLM tests in a CSV. In that case, the config is pretty simple:
182
-
183
- ```yaml
184
- prompts:
185
- - file://prompts.txt
186
- providers:
187
- - openai:gpt-4o-mini
188
- tests: file://tests.csv
189
- ```
190
-
191
- See [example CSV](https://github.com/promptfoo/promptfoo/blob/main/examples/simple-test/tests.csv).
192
-
193
- ### Command-line
194
-
195
- If you're looking to customize your usage, you have a wide set of parameters at your disposal.
196
-
197
- | Option | Description |
198
- | ----------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
199
- | `-p, --prompts <paths...>` | Paths to [prompt files](https://www.promptfoo.dev/docs/configuration/parameters#prompts), directory, or glob |
200
- | `-r, --providers <name or path...>` | One of: openai:chat, openai:completion, openai:model-name, localai:chat:model-name, localai:completion:model-name. See [API providers][providers-docs] |
201
- | `-o, --output <path>` | Path to [output file](https://www.promptfoo.dev/docs/configuration/parameters#output-file) (csv, json, yaml, html) |
202
- | `--tests <path>` | Path to [external test file](https://www.promptfoo.dev/docs/configuration/expected-outputs/#load-assertions-from-external-file) |
203
- | `-c, --config <paths>` | Path to one or more [configuration files](https://www.promptfoo.dev/docs/configuration/guide). `promptfooconfig.yaml` is automatically loaded if present |
204
- | `-j, --max-concurrency <number>` | Maximum number of concurrent API calls |
205
- | `--table-cell-max-length <number>` | Truncate console table cells to this length |
206
- | `--prompt-prefix <path>` | This prefix is prepended to every prompt |
207
- | `--prompt-suffix <path>` | This suffix is append to every prompt |
208
- | `--grader` | [Provider][providers-docs] that will conduct the evaluation, if you are [using LLM to grade your output](https://www.promptfoo.dev/docs/configuration/expected-outputs#llm-evaluation) |
209
-
210
- After running an eval, you may optionally use the `view` command to open the web viewer:
21
+ See [Getting Started](https://www.promptfoo.dev/docs/getting-started/) (evals) or [Red Teaming](https://www.promptfoo.dev/docs/red-team/) (vulnerability scanning) for more.
211
22
 
212
- ```sh
213
- npx promptfoo view
214
- ```
215
-
216
- ### Examples
217
-
218
- #### Prompt quality
219
-
220
- In [this example](https://github.com/promptfoo/promptfoo/tree/main/examples/assistant-cli), we evaluate whether adding adjectives to the personality of an assistant bot affects the responses:
221
-
222
- ```
223
- npx promptfoo eval -p prompts.txt -r openai:gpt-4o-mini -t tests.csv
224
- ```
225
-
226
- <!--
227
- <img width="1362" alt="Side-by-side evaluation of LLM prompt quality, terminal output" src="https://user-images.githubusercontent.com/310310/235329207-e8c22459-5f51-4fee-9714-1b602ac3d7ca.png">
228
-
229
- ![Side-by-side evaluation of LLM prompt quality, html output](https://user-images.githubusercontent.com/310310/235483444-4ddb832d-e103-4b9c-a862-b0d6cc11cdc0.png)
230
- -->
231
-
232
- This command will evaluate the prompts in `prompts.txt`, substituting the variable values from `vars.csv`, and output results in your terminal.
233
-
234
- You can also output a nice [spreadsheet](https://docs.google.com/spreadsheets/d/1nanoj3_TniWrDl1Sj-qYqIMD6jwm5FBy15xPFdUTsmI/edit?usp=sharing), [JSON](https://github.com/promptfoo/promptfoo/blob/main/examples/simple-cli/output.json), YAML, or an HTML file:
235
-
236
- ![Table output](https://user-images.githubusercontent.com/310310/235483444-4ddb832d-e103-4b9c-a862-b0d6cc11cdc0.png)
237
-
238
- #### Model quality
239
-
240
- In the [next example](https://github.com/promptfoo/promptfoo/tree/main/examples/gpt-4o-vs-4o-mini), we evaluate the difference between GPT 3 and GPT 4 outputs for a given prompt:
241
-
242
- ```
243
- npx promptfoo eval -p prompts.txt -r openai:gpt-4o openai:gpt-4o-mini -o output.html
244
- ```
23
+ ## What can you do with Promptfoo?
245
24
 
246
- Produces this HTML table:
25
+ - **Test your prompts and models** with [automated evaluations](https://www.promptfoo.dev/docs/getting-started/)
26
+ - **Secure your LLM apps** with [red teaming](https://www.promptfoo.dev/docs/red-team/) and vulnerability scanning
27
+ - **Compare models** side-by-side (OpenAI, Anthropic, Azure, Bedrock, Ollama, and [more](https://www.promptfoo.dev/docs/providers/))
28
+ - **Automate checks** in [CI/CD](https://www.promptfoo.dev/docs/integrations/ci-cd/)
29
+ - **Share results** with your team
247
30
 
248
- ![Side-by-side evaluation of LLM model quality, gpt-4o vs gpt-4o-mini, html output](https://user-images.githubusercontent.com/310310/235490527-e0c31f40-00a0-493a-8afc-8ed6322bb5ca.png)
31
+ Here's what it looks like in action:
249
32
 
250
- ## Usage (node package)
33
+ ![prompt evaluation matrix - web viewer](https://www.promptfoo.dev/img/claude-vs-gpt-example@2x.png)
251
34
 
252
- You can also use `promptfoo` as a library in your project by importing the `evaluate` function. The function takes the following parameters:
253
-
254
- - `testSuite`: the Javascript equivalent of the promptfooconfig.yaml
255
-
256
- ```typescript
257
- interface EvaluateTestSuite {
258
- providers: string[]; // Valid provider name (e.g. openai:gpt-4o-mini)
259
- prompts: string[]; // List of prompts
260
- tests: string | TestCase[]; // Path to a CSV file, or list of test cases
261
-
262
- defaultTest?: Omit<TestCase, 'description'>; // Optional: add default vars and assertions on test case
263
- outputPath?: string | string[]; // Optional: write results to file
264
- }
265
-
266
- interface TestCase {
267
- // Optional description of what you're testing
268
- description?: string;
269
-
270
- // Key-value pairs to substitute in the prompt
271
- vars?: Record<string, string | string[] | object>;
272
-
273
- // Optional list of automatic checks to run on the LLM output
274
- assert?: Assertion[];
275
-
276
- // Additional configuration settings for the prompt
277
- options?: PromptConfig & OutputConfig & GradingConfig;
278
-
279
- // The required score for this test case. If not provided, the test case is graded pass/fail.
280
- threshold?: number;
281
-
282
- // Override the provider for this test
283
- provider?: string | ProviderOptions | ApiProvider;
284
- }
285
-
286
- interface Assertion {
287
- type: string;
288
- value?: string;
289
- threshold?: number; // Required score for pass
290
- weight?: number; // The weight of this assertion compared to other assertions in the test case. Defaults to 1.
291
- provider?: ApiProvider; // For assertions that require an LLM provider
292
- }
293
- ```
294
-
295
- - `options`: misc options related to how the tests are run
296
-
297
- ```typescript
298
- interface EvaluateOptions {
299
- maxConcurrency?: number;
300
- showProgressBar?: boolean;
301
- generateSuggestions?: boolean;
302
- }
303
- ```
304
-
305
- ### Example
306
-
307
- `promptfoo` exports an `evaluate` function that you can use to run prompt evaluations.
308
-
309
- ```js
310
- import promptfoo from 'promptfoo';
311
-
312
- const results = await promptfoo.evaluate({
313
- prompts: ['Rephrase this in French: {{body}}', 'Rephrase this like a pirate: {{body}}'],
314
- providers: ['openai:gpt-4o-mini'],
315
- tests: [
316
- {
317
- vars: {
318
- body: 'Hello world',
319
- },
320
- },
321
- {
322
- vars: {
323
- body: "I'm hungry",
324
- },
325
- },
326
- ],
327
- });
328
- ```
329
-
330
- This code imports the `promptfoo` library, defines the evaluation options, and then calls the `evaluate` function with these options.
331
-
332
- See the full example [here](https://github.com/promptfoo/promptfoo/tree/main/examples/simple-import), which includes an example results object.
333
-
334
- ## Configuration
335
-
336
- - **[Main guide](https://www.promptfoo.dev/docs/configuration/guide)**: Learn about how to configure your YAML file, setup prompt files, etc.
337
- - **[Configuring test cases](https://www.promptfoo.dev/docs/configuration/expected-outputs)**: Learn more about how to configure assertions and metrics.
338
-
339
- ## Installation
340
-
341
- Requires Node.js 18 or newer.
342
-
343
- You can install promptfoo using npm, npx, Homebrew, or by cloning the repository.
344
-
345
- ### npm (recommended)
346
-
347
- Install `promptfoo` globally:
348
-
349
- ```sh
350
- npm install -g promptfoo
351
- ```
352
-
353
- Or install it locally in your project:
354
-
355
- ```sh
356
- npm install promptfoo
357
- ```
358
-
359
- ### npx
360
-
361
- Run promptfoo without installing it:
362
-
363
- ```sh
364
- npx promptfoo@latest init
365
- ```
366
-
367
- This will create a `promptfooconfig.yaml` placeholder in your current directory.
368
-
369
- ### Homebrew
370
-
371
- If you prefer using Homebrew, you can install promptfoo with:
372
-
373
- ```sh
374
- brew install promptfoo
375
- ```
376
-
377
- ### From source
378
-
379
- For the latest development version:
380
-
381
- ```sh
382
- git clone https://github.com/promptfoo/promptfoo.git
383
- cd promptfoo
384
- npm install
385
- npm run build
386
- npm link
387
- ```
388
-
389
- ### Verify installation
390
-
391
- To verify that promptfoo is installed correctly, run:
392
-
393
- ```sh
394
- promptfoo --version
395
- ```
396
-
397
- This should display the version number of promptfoo.
398
-
399
- For more detailed installation instructions, including system requirements and troubleshooting, please visit our [installation guide](https://www.promptfoo.dev/docs/installation).
400
-
401
- ## API Providers
402
-
403
- We support OpenAI's API as well as a number of open-source models. It's also to set up your own custom API provider. **[See Provider documentation][providers-docs]** for more details.
404
-
405
- ## Development
406
-
407
- Here's how to build and run locally:
408
-
409
- ```sh
410
- git clone https://github.com/promptfoo/promptfoo.git
411
- cd promptfoo
412
-
413
- # Optionally use the Node.js version specified in the .nvmrc file - make sure you are on node >= 18
414
- nvm use
415
-
416
- npm i
417
- cd path/to/experiment-with-promptfoo # contains your promptfooconfig.yaml
418
- npx path/to/promptfoo-source eval
419
- ```
420
-
421
- The web UI is located in `src/app`. To run it in dev mode, run `npm run local:app`. This will host the web UI at http://localhost:3000. The web UI expects `promptfoo view` to be running separately.
422
-
423
- Then run:
424
-
425
- ```sh
426
- npm run build
427
- ```
35
+ It works on the command line too:
428
36
 
429
- The build has some side effects such as e.g. copying HTML templates, migrations, etc.
37
+ ![prompt evaluation matrix - command line](https://github.com/promptfoo/promptfoo/assets/310310/480e1114-d049-40b9-bd5f-f81c15060284)
430
38
 
431
- Contributions are welcome! Please feel free to submit a pull request or open an issue.
39
+ It also can generate [security vulnerability reports](https://www.promptfoo.dev/docs/red-team/):
432
40
 
433
- `promptfoo` includes several npm scripts to make development easier and more efficient. To use these scripts, run `npm run <script_name>` in the project directory.
41
+ ![gen ai red team](https://www.promptfoo.dev/img/riskreport-1@2x.png)
434
42
 
435
- Here are some of the available scripts:
43
+ ## Why promptfoo?
436
44
 
437
- - `build`: Transpile TypeScript files to JavaScript
438
- - `build:watch`: Continuously watch and transpile TypeScript files on changes
439
- - `test`: Run test suite
440
- - `test:watch`: Continuously run test suite on changes
441
- - `db:generate`: Generate new db migrations (and create the db if it doesn't already exist). Note that after generating a new migration, you'll have to `npm i` to copy the migrations into `dist/`.
442
- - `db:migrate`: Run existing db migrations (and create the db if it doesn't already exist)
45
+ - 🚀 **Developer-first**: Fast, with features like live reload and caching
46
+ - 🔒 **Private**: Runs 100% locally - your prompts never leave your machine
47
+ - 🔧 **Flexible**: Works with any LLM API or programming language
48
+ - 💪 **Battle-tested**: Powers LLM apps serving 10M+ users in production
49
+ - 📊 **Data-driven**: Make decisions based on metrics, not gut feel
50
+ - 🤝 **Open source**: MIT licensed, with an active community
443
51
 
444
- To run the CLI during development you can run a command like: `npm run local -- eval --config $(readlink -f ./examples/cloudflare-ai/chat_config.yaml)`, where any parts of the command after `--` are passed through to our CLI entrypoint. Since the Next dev server isn't supported in this mode, see the instructions above for running the web server.
52
+ ## Learn More
445
53
 
446
- # [» View full documentation «](https://www.promptfoo.dev/docs/intro)
54
+ - 📚 [Full Documentation](https://www.promptfoo.dev/docs/intro/)
55
+ - 🔐 [Red Teaming Guide](https://www.promptfoo.dev/docs/red-team/)
56
+ - 🎯 [Getting Started](https://www.promptfoo.dev/docs/getting-started/)
57
+ - 💻 [CLI Usage](https://www.promptfoo.dev/docs/usage/command-line/)
58
+ - 📦 [Node.js Package](https://www.promptfoo.dev/docs/usage/node-package/)
59
+ - 🤖 [Supported Models](https://www.promptfoo.dev/docs/providers/)
447
60
 
448
- [providers-docs]: https://www.promptfoo.dev/docs/providers
61
+ ## Contributing
449
62
 
450
- ### Adding a New Provider
63
+ We welcome contributions! Check out our [contributing guide](https://www.promptfoo.dev/docs/contributing/) to get started.
451
64
 
452
- 1. Create an implementation in `src/providers/SOME_PROVIDER_FILE`
453
- 2. Update `loadApiProvider` in `src/providers.ts` to load your provider via string
454
- 3. Add test cases in `test/providers.test.ts`
455
- 1. Test the actual provider implementation
456
- 2. Test loading the provider via a `loadApiProvider` test
65
+ Join our [Discord community](https://discord.gg/gHPS9jjfbs) for help and discussion.
package/dist/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "promptfoo",
3
3
  "description": "LLM eval & testing toolkit",
4
4
  "author": "Ian Webster",
5
- "version": "0.103.0",
5
+ "version": "0.103.2",
6
6
  "license": "MIT",
7
7
  "type": "commonjs",
8
8
  "repository": {
@@ -1 +1 @@
1
- .nav{padding:.25rem 0 .25rem 1rem;gap:1rem;background-color:#333;margin-bottom:1rem;box-shadow:0 4px 6px #0000001a}[data-theme=dark] .nav{background-color:#121212}.nav a{text-decoration:none;align-self:center}@media (max-width: 760px){.nav{font-size:.75rem}}.nav .active{font-weight:700}.nav a{color:#f0f0f0}.nav .right-aligned{display:flex;align-items:center;gap:1rem;margin-left:auto;margin-right:.5rem}.nav a:hover{text-decoration:underline}.nav a:hover{color:#ddd}.prompt-var-highlight{background-color:var(--prompt-highlight-color);padding:.25rem;border-radius:4px}.glowing-border{border:1px solid #5cb3ff;box-shadow:0 0 8px 2px #5cb3ff}code[class*=language-],pre[class*=language-]{color:#000;background:none;text-shadow:0 1px white;font-family:Consolas,Monaco,Andale Mono,Ubuntu Mono,monospace;font-size:1em;text-align:left;white-space:pre;word-spacing:normal;word-break:normal;word-wrap:normal;line-height:1.5;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-hyphens:none;-moz-hyphens:none;-ms-hyphens:none;hyphens:none}pre[class*=language-]::-moz-selection,pre[class*=language-] ::-moz-selection,code[class*=language-]::-moz-selection,code[class*=language-] ::-moz-selection{text-shadow:none;background:#b3d4fc}pre[class*=language-]::selection,pre[class*=language-] ::selection,code[class*=language-]::selection,code[class*=language-] ::selection{text-shadow:none;background:#b3d4fc}@media print{code[class*=language-],pre[class*=language-]{text-shadow:none}}pre[class*=language-]{padding:1em;margin:.5em 0;overflow:auto}:not(pre)>code[class*=language-],pre[class*=language-]{background:#f5f2f0}:not(pre)>code[class*=language-]{padding:.1em;border-radius:.3em;white-space:normal}.token.comment,.token.prolog,.token.doctype,.token.cdata{color:#708090}.token.punctuation{color:#999}.token.namespace{opacity:.7}.token.property,.token.tag,.token.boolean,.token.number,.token.constant,.token.symbol,.token.deleted{color:#905}.token.selector,.token.attr-name,.token.string,.token.char,.token.builtin,.token.inserted{color:#690}.token.operator,.token.entity,.token.url,.language-css .token.string,.style .token.string{color:#9a6e3a;background:#ffffff80}.token.atrule,.token.attr-value,.token.keyword{color:#07a}.token.function,.token.class-name{color:#dd4a68}.token.regex,.token.important,.token.variable{color:#e90}.token.important,.token.bold{font-weight:700}.token.italic{font-style:italic}.token.entity{cursor:help}.yaml-config{font-size:10px}.custom-metric-container{display:inline-flex;flex-wrap:wrap;gap:.25rem;font-weight:400}.custom-metric-container>span{padding:.25rem .5rem;border-radius:4px;color:var(--text-color);border:1px solid var(--border-color);margin-right:.25rem}.custom-metric-container>span.clickable{cursor:pointer}.custom-metric-container>span.clickable:hover{border-color:var(--link-color)}.fail-reason-carousel-controls{float:right;color:var(--text-color);font-weight:400}pre{white-space:pre-wrap}.font-bold{font-weight:700}table.results-table,.divTable{border-collapse:collapse;width:100%;margin:1rem 0;background-color:#fff}[data-theme=dark] table.results-table,[data-theme=dark] .divTable{background-color:#1a1a1a}.results-table ins{background-color:var(--insert-highlight-color);text-decoration:none}.results-table del{background-color:var(--delete-highlight-color);text-decoration:strikethrough}.tr{display:flex}.results-table tr,.tr{width:fit-content}.results-table tr:hover,.tr:hover{background-color:#00000005}.results-table th,.th,.results-table td,.td{position:relative;border:1px solid var(--border-color);vertical-align:top}.compact.results-table th,.compact .th,.compact.results-table td,.compact .td{padding:.5rem}.results-table th.variable,.th.variable,.results-table td.variable,.td.variable{background-color:var(--variable-background-color)}.variable .cell{max-height:100%;overflow-y:auto}.results-table tr.header{background-color:var(--header-background-color)}.results-table th,.th{padding:1rem;position:relative;vertical-align:bottom;text-align:left;white-space:pre-wrap;font-weight:400}.results-table th .action{cursor:pointer;margin-left:.5rem}.results-table th .action svg{vertical-align:middle}.results-table td,.results-table th{height:1px}@-moz-document url-prefix(){.results-table td,.results-table th{height:100%}}.firefox-fix{display:table;height:100%}.results-table tr .cell{display:flex;flex-direction:column;white-space:pre-wrap;height:100%;padding:1rem}.results-table tr .cell img{max-width:var(--max-image-width, 256px);max-height:var(--max-image-height, 256px);cursor:zoom-in}.results-table tr .cell .prompt{background-color:var(--variable-background-color);border:1px solid var(--border-color);padding:1rem;border-radius:4px;margin-bottom:.5rem;font-family:Courier New,Courier,monospace;font-size:.8rem}.results-table tr .cell .prompt .pill{display:block;background-color:var(--neutral-background-color);border:1px solid var(--border-color);border-radius:4px;padding:.25rem;width:fit-content;margin-bottom:.5rem}.results-table tr .cell-actions{display:flex;gap:.5rem;visibility:hidden;position:absolute;top:1.25rem;right:.75rem;line-height:0;font-size:1.25rem}.results-table .first-prompt-col:hover .cell-actions,.results-table .second-prompt-column:hover .cell-actions{visibility:visible}.results-table tr .cell-detail{display:flex;flex-wrap:wrap;column-gap:.5rem;row-gap:.25rem;padding-top:1rem;margin-top:auto}.results-table tr .stat-item{font-weight:400;font-size:.75rem;color:#888}.results-table tr .cell-actions .action{cursor:pointer}.results-table tr .cell table{width:100%;border-collapse:collapse;margin:1rem 0}.results-table tr .cell table th,.results-table tr .cell table td{border:1px solid var(--border-color);padding:.5rem;text-align:left}.results-table tr .cell table th{background-color:var(--header-background-color);font-weight:700}.results-table tr .cell table tr:nth-child(2n){background-color:var(--row-background-color)}.results-table tr .cell table tr:hover{background-color:var(--hover-background-color)}.results-table th .output-header{display:flex;flex-direction:column;height:100%;align-items:flex-start}.results-table th .output-header .pills{display:flex;flex-wrap:wrap;gap:.25rem;align-items:center;margin-bottom:.5rem}.results-table th .prompt-container{font-weight:700;margin-bottom:.5rem}.results-table th .prompt-container>*{display:inline}.results-table th .provider{display:inline-block;padding:.25rem .5rem;background-color:var(--neutral-background-color);border:1px solid var(--border-color);border-radius:4px;margin-right:.25rem}.results-table th .summary{font-weight:400}.results-table th .prompt-detail{display:flex;flex-wrap:wrap;column-gap:.5rem;row-gap:.25rem;font-size:.75rem;color:#888;margin-top:auto}.results-table th .summary .highlight{padding:.25rem .5rem;border-radius:4px;background-color:var(--fail-background-color);border:1px solid var(--border-color)}.results-table th .summary .highlight.success{background-color:var(--success-background-color);border:1px solid var(--pass-color)}.results-table .status{display:flex;flex-direction:column;gap:.25rem;font-weight:700;margin-bottom:.5rem}.results-table .status .pill{display:inline-block;padding:.25rem .5rem;border-radius:4px;margin-right:.25rem;align-self:flex-start}.results-table .pass .pill{background-color:var(--success-background-color);color:var(--pass-color);border:1px solid var(--pass-color)}.results-table .fail .pill{border:1px solid var(--fail-color)}.results-table .fail{color:var(--fail-color)}.fail-reason{color:var(--fail-color);font-weight:700}.compact .fail-reason{display:inline}.results-table .fail .pill{background-color:var(--fail-background-color)}.results-table td .score{font-weight:400}.results-table .comment{margin-top:.5rem;padding:.25rem .5rem;border-radius:4px;background-color:var(--neutral-background-color);font-style:italic;cursor:pointer}.results-table td.first-prompt-col{border-left:2px solid #888}.results-table td.first-prompt-row{border-top:2px solid #888}.search-highlight{color:var(--search-text-color);background-color:var(--search-highlight-color)}.results-table tr .cell .lightbox{position:fixed;top:0;left:0;right:0;bottom:0;background-color:#000000b3;display:flex;align-items:center;justify-content:center;z-index:1000}.results-table tr .cell .lightbox img{max-width:90%;max-height:90%;cursor:zoom-out}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;-webkit-user-select:none;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.results-table thead.sticky{position:sticky;top:0;z-index:100;transition:transform .2s ease-out}[data-theme=dark] .results-table thead.sticky{background:#1a1a1a}.header-dismiss{position:absolute;top:8px;right:8px;cursor:pointer;z-index:101;padding:4px;border-radius:4px;background:#eee;display:none}[data-theme=dark] .header-dismiss{background:#333}.header-dismiss:hover{background:#ddd}[data-theme=dark] .header-dismiss:hover{background:#444}thead.collapsed .header-dismiss{display:block}.results-table thead.collapsed .prompt-detail,.results-table thead.collapsed .custom-metric-container{display:none}.results-table thead.collapsed .MuiFormControlLabel-root{margin-top:-8px}.results-table{border-collapse:separate;border-spacing:0}.results-table thead.collapsed th{padding-top:4px;padding-bottom:4px}.results-table thead th{transition:padding .2s ease-out}.results-table thead .output-header{transition:max-height .2s ease-out}.results-table thead.collapsed{border-bottom:2px solid var(--border-color);box-shadow:0 2px 4px #0000001a}.results-table th{background:#fff;box-shadow:0 1px 2px #0000001a}[data-theme=dark] .results-table th{background:#1a1a1a}.lightbox{position:fixed;top:0;left:0;width:100%;height:100%;background:#000c;display:flex;justify-content:center;align-items:center;z-index:1000;cursor:pointer}.lightbox img{max-width:90%;max-height:90%;object-fit:contain}.error-pill{cursor:pointer}.description{cursor:pointer;transition:background-color .3s ease}.description:hover{background-color:#f0f0f0}[data-theme=dark] .description:hover{background-color:#2a2a2a}.eval-header{display:flex;align-items:center;flex-grow:1}.eval-header strong{font-weight:500}body{background-color:var(--background-color);color:var(--text-color)}.notice{display:flex;flex-direction:column;gap:1.5rem;justify-content:center;align-items:center;height:9rem}.framework-compliance-card{transition:box-shadow .3s ease-in-out;border-radius:12px;overflow:hidden;background-color:#f5f5f5}.framework-compliance-card:hover{box-shadow:0 8px 16px #0000001a}.compliance-summary{text-align:center;margin-bottom:32px;padding:24px;background-color:#fff;border-radius:8px;box-shadow:0 2px 4px #0000000d}.framework-grid{margin-top:24px}.framework-item{height:100%;transition:transform .3s ease,box-shadow .3s ease}.framework-item:hover{box-shadow:0 4px 8px #0000001a}.framework-item.compliant{background-color:#e8f5e9}.framework-item.non-compliant{background-color:#ffebee}.icon-compliant{color:#4caf50;font-size:24px}.icon-non-compliant{color:#f44336;font-size:24px}[data-theme=dark] .framework-compliance-card{background-color:#1e1e1e}[data-theme=dark] .compliance-summary{background-color:#2a2a2a}[data-theme=dark] .framework-item.compliant{background-color:#4caf501a}[data-theme=dark] .framework-item.non-compliant{background-color:#f443361a}[data-theme=dark] .framework-compliance-card:hover,[data-theme=dark] .framework-item:hover{box-shadow:0 8px 16px #ffffff1a}.framework-item .MuiList-root,.framework-item .MuiListItem-root{padding-top:0;padding-bottom:0}.framework-item .MuiListItemIcon-root{min-width:24px}.framework-item .MuiListItemText-root{margin-top:0;margin-bottom:0}.severity-card{transition:transform .3s ease-in-out,box-shadow .3s ease-in-out}.severity-card:hover{transform:translateY(-5px);box-shadow:0 8px 16px #0000001a;cursor:pointer}[data-theme=dark] .severity-card:hover{box-shadow:none}.card-critical{border-left:5px solid #ff1744}.card-high{border-left:5px solid #ff9100}.card-medium{border-left:5px solid #ffc400}.card-low{border-left:5px solid #00e676}.risk-category-drawer .MuiListItem-root.failure-item{border:1px solid #e0e0e0;border-radius:8px;margin-bottom:16px;padding:16px;transition:all .3s ease;box-shadow:0 2px 4px #0000000d}.risk-category-drawer .MuiListItem-root.failure-item:hover{border-color:#bdbdbd;box-shadow:0 4px 8px #0000001a}.risk-category-drawer .prompt{font-weight:500;margin-bottom:8px;word-break:break-word;max-width:100%;overflow-wrap:break-all}.risk-category-drawer .output{color:#555;white-space:pre-wrap;word-break:break-word;max-height:100px;overflow-y:auto;overflow-wrap:break-all;background-color:#f5f5f5;padding:8px;border-radius:4px}.risk-category-drawer .failed-tests-header{margin-top:8px;margin-bottom:8px;font-weight:500;border-top:2px solid #e0e0e0;padding-top:1.5rem}[data-theme=dark] .risk-category-drawer .MuiListItem-root.failure-item{border-color:#424242;background-color:#2c2c2c;box-shadow:0 2px 4px #0003}[data-theme=dark] .risk-category-drawer .MuiListItem-root.failure-item:hover{border-color:#616161;box-shadow:0 4px 8px #0000004d}[data-theme=dark] .risk-category-drawer .prompt{color:#e0e0e0}[data-theme=dark] .risk-category-drawer .output{color:#b0b0b0;background-color:#1e1e1e}[data-theme=dark] .risk-category-drawer .failed-tests-header{color:#e0e0e0;border-top-color:#424242}.risk-card-container{padding-top:3rem;padding-bottom:3rem}.risk-card-title{font-weight:700}.risk-card-progress{margin-bottom:1rem}.risk-card-issues{color:red}.risk-card-list-item{padding:0;transition:background-color .3s,box-shadow .3s}.risk-card-list-item:hover{background-color:#f0f8ff;box-shadow:0 0 0 1px #add8e6;border-radius:4px}[data-theme=dark] .risk-card-list-item:hover{background-color:#2a2a2a;box-shadow:0 0 0 1px #555;border-radius:4px}.risk-card-icon-passed{color:green;font-size:16px}.risk-card-icon-failed{color:red;font-size:16px}.risk-card-icon-no-tests{color:#888;font-size:16px}.risk-card-percentage{margin-left:8px;font-weight:700}.risk-card-percentage-high{color:#4caf50}.risk-card-percentage-medium{color:#ff9800}.risk-card-percentage-low{color:#f44336}[data-theme=dark] .risk-card-percentage-high{color:#81c784}[data-theme=dark] .risk-card-percentage-medium{color:#ffb74d}[data-theme=dark] .risk-card-percentage-low{color:#e57373}.strategy-stats-card{border-radius:.75rem}div.strategy-stats-content{padding:2rem}.strategy-grid{display:grid;grid-template-columns:repeat(2,1fr);gap:1.5rem}.strategy-item{border:1px solid #eee;border-radius:.5rem;padding:1rem;height:100%;transition:background-color .2s ease;border-radius:4px;padding:16px}.strategy-item:hover{background-color:#0000000a}[data-theme=dark] .strategy-item{border:1px solid #424242}.strategy-name{font-weight:500;margin-bottom:.25rem}.strategy-description{margin-bottom:.75rem}.progress-container{margin-bottom:.5rem}.fail-rate{text-align:right}.attack-stats{display:block;text-align:right}.pass-high{color:green!important}.pass-medium{color:#cb8503!important}.pass-low{color:red!important}.vuln-critical{color:red!important;font-weight:700!important}.vuln-high{color:red!important}.vuln-medium{color:#cb8503!important}.vuln-low{color:green!important}.report-header{padding:24px;border-radius:12px;box-shadow:0 4px 6px #0000001a}[data-theme=dark] .report-header{box-shadow:none}.report-details{display:flex;flex-wrap:wrap;gap:12px}.report-details .MuiChip-root{font-size:.875rem}.page-content{padding:0;margin-top:-15px}:root{font-family:Inter,system-ui,Avenir,Helvetica,Arial,sans-serif;font-style:normal;font-weight-normal:400;font-weight-bold:700;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #f5f5f5;--text-color: #404040;--link-color: #2e6585;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--success-background-color: #d1ffd7;--fail-background-color: #ffd1d1;--neutral-background-color: #eee;--variable-background-color: #f7f7f7;--header-background-color: #fffdf7;--insert-highlight-color: #d4fcbc;--delete-highlight-color: #fbb6c2;--prompt-highlight-color: linen;--textarea-background-color: #ffffff;--textarea-color: #404040;--search-highlight-color: #ffff00;--search-text-color: inherit}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--link-color: #6fcaff;--border-color: #444444;--table-border-color: #444444;--pass-color: #b1e9b3;--fail-color: #ee726a;--success-background-color: #216d2b;--fail-background-color: #6d2121;--neutral-background-color: #424242;--variable-background-color: #333;--header-background-color: #333;--insert-highlight-color: #4f8a34;--delete-highlight-color: #8a3434;--prompt-highlight-color: #67605a;--textarea-background-color: #2d2d2d;--textarea-color: #fff;--search-highlight-color: #ffff00;--search-text-color: #404040}html{font-size:16px;background-color:var(--background-color);color:var(--text-color)}body{margin:0}a{color:var(--link-color);text-decoration:none}a:hover{text-decoration:underline}textarea{background-color:var(--textarea-background-color);color:var(--textarea-color);padding:.5em;border:1px solid var(--border-color);border-radius:.25em;resize:vertical}*{box-sizing:border-box}
1
+ .nav{padding:.25rem 0 .25rem 1rem;gap:1rem;background-color:#333;margin-bottom:1rem;box-shadow:0 4px 6px #0000001a}[data-theme=dark] .nav{background-color:#121212}.nav a{text-decoration:none;align-self:center}@media (max-width: 760px){.nav{font-size:.75rem}}.nav .active{font-weight:700}.nav a{color:#f0f0f0}.nav .right-aligned{display:flex;align-items:center;gap:1rem;margin-left:auto;margin-right:.5rem}.nav a:hover{text-decoration:underline}.nav a:hover{color:#ddd}.prompt-var-highlight{background-color:var(--prompt-highlight-color);padding:.25rem;border-radius:4px}.glowing-border{border:1px solid #5cb3ff;box-shadow:0 0 8px 2px #5cb3ff}code[class*=language-],pre[class*=language-]{color:#000;background:none;text-shadow:0 1px white;font-family:Consolas,Monaco,Andale Mono,Ubuntu Mono,monospace;font-size:1em;text-align:left;white-space:pre;word-spacing:normal;word-break:normal;word-wrap:normal;line-height:1.5;-moz-tab-size:4;-o-tab-size:4;tab-size:4;-webkit-hyphens:none;-moz-hyphens:none;-ms-hyphens:none;hyphens:none}pre[class*=language-]::-moz-selection,pre[class*=language-] ::-moz-selection,code[class*=language-]::-moz-selection,code[class*=language-] ::-moz-selection{text-shadow:none;background:#b3d4fc}pre[class*=language-]::selection,pre[class*=language-] ::selection,code[class*=language-]::selection,code[class*=language-] ::selection{text-shadow:none;background:#b3d4fc}@media print{code[class*=language-],pre[class*=language-]{text-shadow:none}}pre[class*=language-]{padding:1em;margin:.5em 0;overflow:auto}:not(pre)>code[class*=language-],pre[class*=language-]{background:#f5f2f0}:not(pre)>code[class*=language-]{padding:.1em;border-radius:.3em;white-space:normal}.token.comment,.token.prolog,.token.doctype,.token.cdata{color:#708090}.token.punctuation{color:#999}.token.namespace{opacity:.7}.token.property,.token.tag,.token.boolean,.token.number,.token.constant,.token.symbol,.token.deleted{color:#905}.token.selector,.token.attr-name,.token.string,.token.char,.token.builtin,.token.inserted{color:#690}.token.operator,.token.entity,.token.url,.language-css .token.string,.style .token.string{color:#9a6e3a;background:#ffffff80}.token.atrule,.token.attr-value,.token.keyword{color:#07a}.token.function,.token.class-name{color:#dd4a68}.token.regex,.token.important,.token.variable{color:#e90}.token.important,.token.bold{font-weight:700}.token.italic{font-style:italic}.token.entity{cursor:help}.yaml-config{font-size:10px}.custom-metric-container{display:inline-flex;flex-wrap:wrap;gap:.25rem;font-weight:400}.custom-metric-container>span{padding:.25rem .5rem;border-radius:4px;color:var(--text-color);border:1px solid var(--border-color);margin-right:.25rem}.custom-metric-container>span.clickable{cursor:pointer}.custom-metric-container>span.clickable:hover{border-color:var(--link-color)}.fail-reason-carousel-controls{float:right;color:var(--text-color);font-weight:400}pre{white-space:pre-wrap}.font-bold{font-weight:700}table.results-table,.divTable{border-collapse:collapse;width:100%;margin:1rem 0;background-color:#fff}[data-theme=dark] table.results-table,[data-theme=dark] .divTable{background-color:#1a1a1a}.results-table ins{background-color:var(--insert-highlight-color);text-decoration:none}.results-table del{background-color:var(--delete-highlight-color);text-decoration:strikethrough}.tr{display:flex}.results-table tr,.tr{width:fit-content}.results-table tr:hover,.tr:hover{background-color:#00000005}.results-table th,.th,.results-table td,.td{position:relative;border:1px solid var(--border-color);vertical-align:top}.compact.results-table th,.compact .th,.compact.results-table td,.compact .td{padding:.5rem}.results-table th.variable,.th.variable,.results-table td.variable,.td.variable{background-color:var(--variable-background-color)}.variable .cell{max-height:100%;overflow-y:auto}.results-table tr.header{background-color:var(--header-background-color)}.results-table th,.th{padding:1rem;position:relative;vertical-align:bottom;text-align:left;white-space:pre-wrap;font-weight:400}.results-table th .action{cursor:pointer;margin-left:.5rem}.results-table th .action svg{vertical-align:middle}.results-table td,.results-table th{height:1px}@-moz-document url-prefix(){.results-table td,.results-table th{height:100%}}.firefox-fix{display:table;height:100%}.results-table tr .cell{display:flex;flex-direction:column;white-space:pre-wrap;height:100%;padding:1rem}.results-table tr .cell img{max-width:var(--max-image-width, 256px);max-height:var(--max-image-height, 256px);cursor:zoom-in}.results-table tr .cell .prompt{background-color:var(--variable-background-color);border:1px solid var(--border-color);padding:1rem;border-radius:4px;margin-bottom:.5rem;font-family:Courier New,Courier,monospace;font-size:.8rem}.results-table tr .cell .prompt .pill{display:block;background-color:var(--neutral-background-color);border:1px solid var(--border-color);border-radius:4px;padding:.25rem;width:fit-content;margin-bottom:.5rem}.results-table tr .cell-actions{display:flex;gap:.5rem;visibility:hidden;position:absolute;top:1.25rem;right:.75rem;line-height:0;font-size:1.25rem}.results-table .first-prompt-col:hover .cell-actions,.results-table .second-prompt-column:hover .cell-actions{visibility:visible}.results-table tr .cell-detail{display:flex;flex-wrap:wrap;column-gap:.5rem;row-gap:.25rem;padding-top:1rem;margin-top:auto}.results-table tr .stat-item{font-weight:400;font-size:.75rem;color:#888}.results-table tr .cell-actions .action{cursor:pointer}.results-table tr .cell table{width:100%;border-collapse:collapse;margin:1rem 0}.results-table tr .cell table th,.results-table tr .cell table td{border:1px solid var(--border-color);padding:.5rem;text-align:left}.results-table tr .cell table th{background-color:var(--header-background-color);font-weight:700}.results-table tr .cell table tr:nth-child(2n){background-color:var(--row-background-color)}.results-table tr .cell table tr:hover{background-color:var(--hover-background-color)}.results-table th .output-header{display:flex;flex-direction:column;height:100%;align-items:flex-start}.results-table th .output-header .pills{display:flex;flex-wrap:wrap;gap:.25rem;align-items:center;margin-bottom:.5rem}.results-table th .prompt-container{font-weight:700;margin-bottom:.5rem}.results-table th .prompt-container>*{display:inline}.results-table th .provider{display:inline-block;padding:.25rem .5rem;background-color:var(--neutral-background-color);border:1px solid var(--border-color);border-radius:4px;margin-right:.25rem}.results-table th .summary{font-weight:400}.results-table th .prompt-detail{display:flex;flex-wrap:wrap;column-gap:.5rem;row-gap:.25rem;font-size:.75rem;color:#888;margin-top:auto}.results-table th .summary .highlight{padding:.25rem .5rem;border-radius:4px;background-color:var(--fail-background-color);border:1px solid var(--border-color)}.results-table th .summary .highlight.success{background-color:var(--success-background-color);border:1px solid var(--pass-color)}.results-table .status{display:flex;flex-direction:column;gap:.25rem;font-weight:700;margin-bottom:.5rem}.results-table .status .pill{display:inline-block;padding:.25rem .5rem;border-radius:4px;margin-right:.25rem;align-self:flex-start}.results-table .pass .pill{background-color:var(--success-background-color);color:var(--pass-color);border:1px solid var(--pass-color)}.results-table .fail .pill{border:1px solid var(--fail-color)}.results-table .fail{color:var(--fail-color)}.fail-reason{color:var(--fail-color);font-weight:700}.compact .fail-reason{display:inline}.results-table .fail .pill{background-color:var(--fail-background-color)}.results-table td .score{font-weight:400}.results-table .comment{margin-top:.5rem;padding:.25rem .5rem;border-radius:4px;background-color:var(--neutral-background-color);font-style:italic;cursor:pointer}.results-table td.first-prompt-col{border-left:2px solid #888}.results-table td.first-prompt-row{border-top:2px solid #888}.search-highlight{color:var(--search-text-color);background-color:var(--search-highlight-color)}.results-table tr .cell .lightbox{position:fixed;top:0;left:0;right:0;bottom:0;background-color:#000000b3;display:flex;align-items:center;justify-content:center;z-index:1000}.results-table tr .cell .lightbox img{max-width:90%;max-height:90%;cursor:zoom-out}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;-webkit-user-select:none;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.results-table thead.sticky{position:sticky;top:0;z-index:100;transition:transform .2s ease-out}[data-theme=dark] .results-table thead.sticky{background:#1a1a1a}.header-dismiss{position:absolute;top:8px;right:8px;cursor:pointer;z-index:101;padding:4px;border-radius:4px;background:#eee;display:none}[data-theme=dark] .header-dismiss{background:#333}.header-dismiss:hover{background:#ddd}[data-theme=dark] .header-dismiss:hover{background:#444}thead.collapsed .header-dismiss{display:block}.results-table thead.collapsed .prompt-detail,.results-table thead.collapsed .custom-metric-container{display:none}.results-table thead.collapsed .MuiFormControlLabel-root{margin-top:-8px}.results-table{border-collapse:separate;border-spacing:0}.results-table thead.collapsed th{padding-top:4px;padding-bottom:4px}.results-table thead th{transition:padding .2s ease-out}.results-table thead .output-header{transition:max-height .2s ease-out}.results-table thead.collapsed{border-bottom:2px solid var(--border-color);box-shadow:0 2px 4px #0000001a}.results-table th{background:#fff;box-shadow:0 1px 2px #0000001a}[data-theme=dark] .results-table th{background:#1a1a1a}.lightbox{position:fixed;top:0;left:0;width:100%;height:100%;background:#000c;display:flex;justify-content:center;align-items:center;z-index:1000;cursor:pointer}.lightbox img{max-width:90%;max-height:90%;object-fit:contain}.error-pill{cursor:pointer}.description{cursor:pointer;transition:background-color .3s ease}.description:hover{background-color:#f0f0f0}[data-theme=dark] .description:hover{background-color:#2a2a2a}.eval-header{display:flex;align-items:center;flex-grow:1}.eval-header strong{font-weight:500}body{background-color:var(--background-color);color:var(--text-color)}.notice{display:flex;flex-direction:column;gap:1.5rem;justify-content:center;align-items:center;height:9rem}.framework-compliance-card{transition:box-shadow .3s ease-in-out;border-radius:12px;overflow:hidden;background-color:#f5f5f5}.framework-compliance-card:hover{box-shadow:0 8px 16px #0000001a}.compliance-summary{text-align:center;margin-bottom:32px;padding:24px;background-color:#fff;border-radius:8px;box-shadow:0 2px 4px #0000000d}.framework-grid{margin-top:24px}.framework-item{height:100%;transition:transform .3s ease,box-shadow .3s ease}.framework-item:hover{box-shadow:0 4px 8px #0000001a}.framework-item.compliant{background-color:#e8f5e9}.framework-item.non-compliant{background-color:#ffebee}.icon-compliant{color:#4caf50;font-size:24px}.icon-non-compliant{color:#f44336;font-size:24px}[data-theme=dark] .framework-compliance-card{background-color:#1e1e1e}[data-theme=dark] .compliance-summary{background-color:#2a2a2a}[data-theme=dark] .framework-item.compliant{background-color:#4caf501a}[data-theme=dark] .framework-item.non-compliant{background-color:#f443361a}[data-theme=dark] .framework-compliance-card:hover,[data-theme=dark] .framework-item:hover{box-shadow:0 8px 16px #ffffff1a}.framework-item .MuiList-root,.framework-item .MuiListItem-root{padding-top:0;padding-bottom:0}.framework-item .MuiListItemIcon-root{min-width:24px}.framework-item .MuiListItemText-root{margin-top:0;margin-bottom:0}.severity-card{transition:transform .3s ease-in-out,box-shadow .3s ease-in-out}.severity-card:hover{transform:translateY(-5px);box-shadow:0 8px 16px #0000001a;cursor:pointer}[data-theme=dark] .severity-card:hover{box-shadow:none}.card-critical{border-left:5px solid #ff1744}.card-high{border-left:5px solid #ff9100}.card-medium{border-left:5px solid #ffc400}.card-low{border-left:5px solid #00e676}.risk-category-drawer .MuiListItem-root.failure-item{border:1px solid #e0e0e0;border-radius:8px;margin-bottom:16px;padding:16px;transition:all .3s ease;box-shadow:0 2px 4px #0000000d}.risk-category-drawer .MuiListItem-root.failure-item:hover{border-color:#bdbdbd;box-shadow:0 4px 8px #0000001a}.risk-category-drawer .prompt{font-weight:500;margin-bottom:8px;word-break:break-word;max-width:100%;overflow-wrap:break-all}.risk-category-drawer .output{color:#555;white-space:pre-wrap;word-break:break-word;max-height:100px;overflow-y:auto;overflow-wrap:break-all;background-color:#f5f5f5;padding:8px;border-radius:4px}.risk-category-drawer .failed-tests-header{margin-top:8px;margin-bottom:8px;font-weight:500;border-top:2px solid #e0e0e0;padding-top:1.5rem}[data-theme=dark] .risk-category-drawer .MuiListItem-root.failure-item{border-color:#424242;background-color:#2c2c2c;box-shadow:0 2px 4px #0003}[data-theme=dark] .risk-category-drawer .MuiListItem-root.failure-item:hover{border-color:#616161;box-shadow:0 4px 8px #0000004d}[data-theme=dark] .risk-category-drawer .prompt{color:#e0e0e0}[data-theme=dark] .risk-category-drawer .output{color:#b0b0b0;background-color:#1e1e1e}[data-theme=dark] .risk-category-drawer .failed-tests-header{color:#e0e0e0;border-top-color:#424242}.risk-card-container{padding-top:3rem;padding-bottom:3rem}.risk-card-title{font-weight:700}.risk-card-progress{margin-bottom:1rem}.risk-card-issues{color:red}.risk-card-list-item{padding:0;transition:background-color .3s,box-shadow .3s}.risk-card-list-item:hover{background-color:#f0f8ff;box-shadow:0 0 0 1px #add8e6;border-radius:4px}[data-theme=dark] .risk-card-list-item:hover{background-color:#2a2a2a;box-shadow:0 0 0 1px #555;border-radius:4px}.risk-card-icon-passed{color:green;font-size:16px}.risk-card-icon-failed{color:red;font-size:16px}.risk-card-icon-no-tests{color:#888;font-size:16px}.risk-card-percentage{margin-left:8px;font-weight:700}.risk-card-percentage-high{color:#4caf50}.risk-card-percentage-medium{color:#ff9800}.risk-card-percentage-low{color:#f44336}[data-theme=dark] .risk-card-percentage-high{color:#81c784}[data-theme=dark] .risk-card-percentage-medium{color:#ffb74d}[data-theme=dark] .risk-card-percentage-low{color:#e57373}.pass-high{color:green!important}.pass-medium{color:#cb8503!important}.pass-low{color:red!important}.vuln-critical{color:red!important;font-weight:700!important}.vuln-high{color:red!important}.vuln-medium{color:#cb8503!important}.vuln-low{color:green!important}.report-header{padding:24px;border-radius:12px;box-shadow:0 4px 6px #0000001a}[data-theme=dark] .report-header{box-shadow:none}.report-details{display:flex;flex-wrap:wrap;gap:12px}.report-details .MuiChip-root{font-size:.875rem}.page-content{padding:0;margin-top:-15px}:root{font-family:Inter,system-ui,Avenir,Helvetica,Arial,sans-serif;font-style:normal;font-weight-normal:400;font-weight-bold:700;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #f5f5f5;--text-color: #404040;--link-color: #2e6585;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--success-background-color: #d1ffd7;--fail-background-color: #ffd1d1;--neutral-background-color: #eee;--variable-background-color: #f7f7f7;--header-background-color: #fffdf7;--insert-highlight-color: #d4fcbc;--delete-highlight-color: #fbb6c2;--prompt-highlight-color: linen;--textarea-background-color: #ffffff;--textarea-color: #404040;--search-highlight-color: #ffff00;--search-text-color: inherit}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--link-color: #6fcaff;--border-color: #444444;--table-border-color: #444444;--pass-color: #b1e9b3;--fail-color: #ee726a;--success-background-color: #216d2b;--fail-background-color: #6d2121;--neutral-background-color: #424242;--variable-background-color: #333;--header-background-color: #333;--insert-highlight-color: #4f8a34;--delete-highlight-color: #8a3434;--prompt-highlight-color: #67605a;--textarea-background-color: #2d2d2d;--textarea-color: #fff;--search-highlight-color: #ffff00;--search-text-color: #404040}html{font-size:16px;background-color:var(--background-color);color:var(--text-color)}body{margin:0}a{color:var(--link-color);text-decoration:none}a:hover{text-decoration:underline}textarea{background-color:var(--textarea-background-color);color:var(--textarea-color);padding:.5em;border:1px solid var(--border-color);border-radius:.25em;resize:vertical}*{box-sizing:border-box}