promptfoo 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/README.md +1 -1
  2. package/dist/package.json +4 -4
  3. package/dist/src/assertions.d.ts.map +1 -1
  4. package/dist/src/assertions.js +5 -0
  5. package/dist/src/assertions.js.map +1 -1
  6. package/dist/src/evaluator.js +1 -1
  7. package/dist/src/evaluator.js.map +1 -1
  8. package/dist/src/index.d.ts +1 -5
  9. package/dist/src/index.d.ts.map +1 -1
  10. package/dist/src/index.js +1 -1
  11. package/dist/src/index.js.map +1 -1
  12. package/dist/src/matchers.d.ts +3 -2
  13. package/dist/src/matchers.d.ts.map +1 -1
  14. package/dist/src/matchers.js +37 -9
  15. package/dist/src/matchers.js.map +1 -1
  16. package/dist/src/providers/anthropic.d.ts +5 -3
  17. package/dist/src/providers/anthropic.d.ts.map +1 -1
  18. package/dist/src/providers/anthropic.js +8 -10
  19. package/dist/src/providers/anthropic.js.map +1 -1
  20. package/dist/src/providers/azureopenai.d.ts +9 -8
  21. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  22. package/dist/src/providers/azureopenai.js +33 -36
  23. package/dist/src/providers/azureopenai.js.map +1 -1
  24. package/dist/src/providers/openai.d.ts +12 -12
  25. package/dist/src/providers/openai.d.ts.map +1 -1
  26. package/dist/src/providers/openai.js +54 -65
  27. package/dist/src/providers/openai.js.map +1 -1
  28. package/dist/src/providers/replicate.d.ts +4 -2
  29. package/dist/src/providers/replicate.d.ts.map +1 -1
  30. package/dist/src/providers/replicate.js +10 -8
  31. package/dist/src/providers/replicate.js.map +1 -1
  32. package/dist/src/providers/webhook.d.ts +9 -0
  33. package/dist/src/providers/webhook.d.ts.map +1 -0
  34. package/dist/src/providers/webhook.js +54 -0
  35. package/dist/src/providers/webhook.js.map +1 -0
  36. package/dist/src/providers.d.ts +1 -1
  37. package/dist/src/providers.d.ts.map +1 -1
  38. package/dist/src/providers.js +36 -28
  39. package/dist/src/providers.js.map +1 -1
  40. package/dist/src/suggestions.d.ts.map +1 -1
  41. package/dist/src/suggestions.js +1 -3
  42. package/dist/src/suggestions.js.map +1 -1
  43. package/dist/src/types.d.ts +7 -1
  44. package/dist/src/types.d.ts.map +1 -1
  45. package/dist/src/util.js +1 -1
  46. package/dist/src/util.js.map +1 -1
  47. package/dist/src/web/nextui/404/index.html +1 -1
  48. package/dist/src/web/nextui/404.html +1 -1
  49. package/dist/src/web/nextui/_next/static/Bl3o5lF4ON7Fjki46lPhr/_buildManifest.js +1 -0
  50. package/dist/src/web/nextui/_next/static/chunks/226-7bbb6c98a19542fd.js +37 -0
  51. package/dist/src/web/nextui/_next/static/chunks/249-ea9c0f034888ccff.js +125 -0
  52. package/dist/src/web/nextui/_next/static/chunks/339-501c32916b785ef1.js +1 -0
  53. package/dist/src/web/nextui/_next/static/chunks/365-e426ea5bc7e815fc.js +8 -0
  54. package/dist/src/web/nextui/_next/static/chunks/396-0a51429a01e24cdd.js +1 -0
  55. package/dist/src/web/nextui/_next/static/chunks/596-297f7ff4a0436e87.js +25 -0
  56. package/dist/src/web/nextui/_next/static/chunks/613-572c22424de64659.js +1 -0
  57. package/dist/src/web/nextui/_next/static/chunks/706-ae1d3352d28419e9.js +9 -0
  58. package/dist/src/web/nextui/_next/static/chunks/891-7035926a62c1c4e0.js +1 -0
  59. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-366629541fd598e9.js +1 -0
  60. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-319d2ee38d37574e.js +1 -0
  61. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-a6b1ff91723b7beb.js +1 -0
  62. package/dist/src/web/nextui/_next/static/chunks/app/layout-024c4adc71c9feb0.js +1 -0
  63. package/dist/src/web/nextui/_next/static/chunks/app/page-1ae60660130041b2.js +1 -0
  64. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-6ef16148040bf4f4.js +1 -0
  65. package/dist/src/web/nextui/_next/static/chunks/{ca377847-cb6ae6a6a073aebb.js → ca377847-26b462611379a4f7.js} +3 -3
  66. package/dist/src/web/nextui/_next/static/chunks/{fd9d1056-ac777be631f5a9e9.js → fd9d1056-fba4b53a2f01213b.js} +1 -1
  67. package/dist/src/web/nextui/_next/static/chunks/framework-8883d1e9be70c3da.js +25 -0
  68. package/dist/src/web/nextui/_next/static/chunks/main-8ea85465d428ecfe.js +1 -0
  69. package/dist/src/web/nextui/_next/static/chunks/main-app-581ccf0003955b21.js +1 -0
  70. package/dist/src/web/nextui/_next/static/chunks/pages/_app-52924524f99094ab.js +1 -0
  71. package/dist/src/web/nextui/_next/static/chunks/pages/_error-c92d5c4bb2b49926.js +1 -0
  72. package/dist/src/web/nextui/_next/static/chunks/webpack-55c264ce2fd85eb7.js +1 -0
  73. package/dist/src/web/nextui/_next/static/css/4d399fceacd06992.css +1 -0
  74. package/dist/src/web/nextui/eval/index.html +1 -1
  75. package/dist/src/web/nextui/eval/index.txt +6 -6
  76. package/dist/src/web/nextui/index.html +1 -1
  77. package/dist/src/web/nextui/index.txt +5 -5
  78. package/dist/src/web/nextui/setup/index.html +27 -1
  79. package/dist/src/web/nextui/setup/index.txt +9 -9
  80. package/dist/src/web/server.d.ts.map +1 -1
  81. package/dist/src/web/server.js +9 -5
  82. package/dist/src/web/server.js.map +1 -1
  83. package/package.json +4 -4
  84. package/dist/src/web/nextui/_next/static/US6gOx8LHTX_Hzm9aYNrC/_buildManifest.js +0 -1
  85. package/dist/src/web/nextui/_next/static/chunks/339-4fc8a80fa840e771.js +0 -1
  86. package/dist/src/web/nextui/_next/static/chunks/373-8a280796c0f2d1af.js +0 -1
  87. package/dist/src/web/nextui/_next/static/chunks/583-125d32af505e9bc4.js +0 -1
  88. package/dist/src/web/nextui/_next/static/chunks/596-07e4a23a5c6cdf04.js +0 -25
  89. package/dist/src/web/nextui/_next/static/chunks/658-a62210d07dc4dcb6.js +0 -15
  90. package/dist/src/web/nextui/_next/static/chunks/707-699cbd84b259c37b.js +0 -37
  91. package/dist/src/web/nextui/_next/static/chunks/858-ceb6fa22e614492b.js +0 -125
  92. package/dist/src/web/nextui/_next/static/chunks/891-3000ea7c0a292558.js +0 -1
  93. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-50e40614fa05600e.js +0 -1
  94. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-c19c44ed1b2dfb58.js +0 -1
  95. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d4a1813b2f8c4532.js +0 -1
  96. package/dist/src/web/nextui/_next/static/chunks/app/layout-664a8d716d2d24b1.js +0 -1
  97. package/dist/src/web/nextui/_next/static/chunks/app/page-1f8ef6a00a2355f0.js +0 -1
  98. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-182018a3c6397345.js +0 -1
  99. package/dist/src/web/nextui/_next/static/chunks/framework-43665103d101a22d.js +0 -25
  100. package/dist/src/web/nextui/_next/static/chunks/main-50cc0a98559591ce.js +0 -1
  101. package/dist/src/web/nextui/_next/static/chunks/main-app-c9dc13756d166550.js +0 -1
  102. package/dist/src/web/nextui/_next/static/chunks/pages/_app-6b79a29ad0d63b21.js +0 -1
  103. package/dist/src/web/nextui/_next/static/chunks/pages/_error-9aeb3e4d490fe4b8.js +0 -1
  104. package/dist/src/web/nextui/_next/static/chunks/webpack-6e474e42be502dd7.js +0 -1
  105. package/dist/src/web/nextui/_next/static/css/a35c840ac696f161.css +0 -1
  106. package/dist/src/web/nextui/api +0 -1
  107. package/src/__mocks__/esm.ts +0 -3
  108. package/src/assertions.ts +0 -580
  109. package/src/cache.ts +0 -109
  110. package/src/esm.ts +0 -13
  111. package/src/evaluator.ts +0 -500
  112. package/src/index.ts +0 -52
  113. package/src/logger.ts +0 -46
  114. package/src/main.ts +0 -442
  115. package/src/matchers.ts +0 -120
  116. package/src/onboarding.ts +0 -69
  117. package/src/prompts.ts +0 -39
  118. package/src/providers/anthropic.ts +0 -88
  119. package/src/providers/azureopenai.ts +0 -299
  120. package/src/providers/llama.ts +0 -95
  121. package/src/providers/localai.ts +0 -111
  122. package/src/providers/ollama.ts +0 -89
  123. package/src/providers/openai.ts +0 -337
  124. package/src/providers/replicate.ts +0 -99
  125. package/src/providers/scriptCompletion.ts +0 -35
  126. package/src/providers/shared.ts +0 -34
  127. package/src/providers.ts +0 -192
  128. package/src/share.ts +0 -27
  129. package/src/suggestions.ts +0 -63
  130. package/src/table.ts +0 -43
  131. package/src/tableOutput.html +0 -52
  132. package/src/telemetry.ts +0 -70
  133. package/src/types.ts +0 -299
  134. package/src/updates.ts +0 -46
  135. package/src/util.ts +0 -543
  136. package/src/web/nextui/.eslintrc.json +0 -3
  137. package/src/web/nextui/next.config.js +0 -14
  138. package/src/web/nextui/package-lock.json +0 -4644
  139. package/src/web/nextui/package.json +0 -47
  140. package/src/web/nextui/public/favicon.ico +0 -0
  141. package/src/web/nextui/public/logo.svg +0 -30
  142. package/src/web/nextui/src/app/Home.css +0 -3
  143. package/src/web/nextui/src/app/api/route.ts +0 -6
  144. package/src/web/nextui/src/app/components/DarkMode.css +0 -22
  145. package/src/web/nextui/src/app/components/DarkMode.tsx +0 -17
  146. package/src/web/nextui/src/app/components/Logo.css +0 -32
  147. package/src/web/nextui/src/app/components/Logo.tsx +0 -11
  148. package/src/web/nextui/src/app/components/PageShell.css +0 -33
  149. package/src/web/nextui/src/app/components/PageShell.tsx +0 -87
  150. package/src/web/nextui/src/app/eval/ConfigModal.tsx +0 -84
  151. package/src/web/nextui/src/app/eval/Eval.css +0 -13
  152. package/src/web/nextui/src/app/eval/Eval.tsx +0 -79
  153. package/src/web/nextui/src/app/eval/EvalOutputPromptDialog.tsx +0 -127
  154. package/src/web/nextui/src/app/eval/ResultsCharts.tsx +0 -355
  155. package/src/web/nextui/src/app/eval/ResultsTable.css +0 -179
  156. package/src/web/nextui/src/app/eval/ResultsTable.tsx +0 -503
  157. package/src/web/nextui/src/app/eval/ResultsView.tsx +0 -301
  158. package/src/web/nextui/src/app/eval/ShareModal.tsx +0 -70
  159. package/src/web/nextui/src/app/eval/[id]/not-found.tsx +0 -5
  160. package/src/web/nextui/src/app/eval/[id]/page.css +0 -9
  161. package/src/web/nextui/src/app/eval/[id]/page.tsx +0 -20
  162. package/src/web/nextui/src/app/eval/index.css +0 -0
  163. package/src/web/nextui/src/app/eval/page.tsx +0 -8
  164. package/src/web/nextui/src/app/eval/store.ts +0 -18
  165. package/src/web/nextui/src/app/eval/types.ts +0 -20
  166. package/src/web/nextui/src/app/globals.css +0 -58
  167. package/src/web/nextui/src/app/layout.tsx +0 -25
  168. package/src/web/nextui/src/app/page.tsx +0 -7
  169. package/src/web/nextui/src/app/setup/AssertsForm.tsx +0 -118
  170. package/src/web/nextui/src/app/setup/PromptDialog.tsx +0 -77
  171. package/src/web/nextui/src/app/setup/PromptsSection.tsx +0 -190
  172. package/src/web/nextui/src/app/setup/ProviderConfigDialog.tsx +0 -99
  173. package/src/web/nextui/src/app/setup/ProviderSelector.tsx +0 -149
  174. package/src/web/nextui/src/app/setup/RunTestSuiteButton.tsx +0 -88
  175. package/src/web/nextui/src/app/setup/TestCaseDialog.tsx +0 -108
  176. package/src/web/nextui/src/app/setup/TestCasesSection.tsx +0 -154
  177. package/src/web/nextui/src/app/setup/VarsForm.tsx +0 -57
  178. package/src/web/nextui/src/app/setup/page.css +0 -3
  179. package/src/web/nextui/src/app/setup/page.tsx +0 -160
  180. package/src/web/nextui/src/util/api.ts +0 -1
  181. package/src/web/nextui/src/util/store.ts +0 -53
  182. package/src/web/nextui/tsconfig.json +0 -28
  183. package/src/web/server.ts +0 -151
  184. /package/dist/src/web/nextui/_next/static/{US6gOx8LHTX_Hzm9aYNrC → Bl3o5lF4ON7Fjki46lPhr}/_ssgManifest.js +0 -0
package/src/main.ts DELETED
@@ -1,442 +0,0 @@
1
- #!/usr/bin/env node
2
- import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
3
- import { join as pathJoin, dirname } from 'path';
4
- import readline from 'readline';
5
-
6
- import chalk from 'chalk';
7
- import { Command } from 'commander';
8
-
9
- import telemetry from './telemetry';
10
- import logger, { getLogLevel, setLogLevel } from './logger';
11
- import { loadApiProvider, loadApiProviders } from './providers';
12
- import { evaluate } from './evaluator';
13
- import {
14
- cleanupOldResults,
15
- maybeReadConfig,
16
- readConfig,
17
- readLatestResults,
18
- readPrompts,
19
- readProviderPromptMap,
20
- readTests,
21
- writeLatestResults,
22
- writeOutput,
23
- } from './util';
24
- import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
25
- import { disableCache, clearCache } from './cache';
26
- import { getDirectory } from './esm';
27
- import { startServer } from './web/server';
28
- import { checkForUpdates } from './updates';
29
-
30
- import type {
31
- CommandLineOptions,
32
- EvaluateOptions,
33
- TestCase,
34
- TestSuite,
35
- UnifiedConfig,
36
- } from './types';
37
- import { generateTable } from './table';
38
- import { createShareableUrl } from './share';
39
-
40
- function createDummyFiles(directory: string | null) {
41
- if (directory) {
42
- // Make the directory if it doesn't exist
43
- if (!existsSync(directory)) {
44
- mkdirSync(directory);
45
- }
46
- }
47
-
48
- if (directory) {
49
- if (!existsSync(directory)) {
50
- logger.info(`Creating directory ${directory} ...`);
51
- mkdirSync(directory);
52
- }
53
- } else {
54
- directory = '.';
55
- }
56
-
57
- writeFileSync(pathJoin(process.cwd(), directory, 'prompts.txt'), DEFAULT_PROMPTS);
58
- writeFileSync(pathJoin(process.cwd(), directory, 'promptfooconfig.yaml'), DEFAULT_YAML_CONFIG);
59
- writeFileSync(pathJoin(process.cwd(), directory, 'README.md'), DEFAULT_README);
60
-
61
- if (directory === '.') {
62
- logger.info(
63
- chalk.green.bold(
64
- 'Wrote prompts.txt and promptfooconfig.yaml. Open README.md to get started!',
65
- ),
66
- );
67
- } else {
68
- logger.info(chalk.green.bold(`Wrote prompts.txt and promptfooconfig.yaml to ./${directory}`));
69
- logger.info(chalk.green(`\`cd ${directory}\` and open README.md to get started!`));
70
- }
71
- }
72
-
73
- async function main() {
74
- await checkForUpdates();
75
-
76
- const pwd = process.cwd();
77
- const potentialPaths = [
78
- pathJoin(pwd, 'promptfooconfig.js'),
79
- pathJoin(pwd, 'promptfooconfig.json'),
80
- pathJoin(pwd, 'promptfooconfig.yaml'),
81
- ];
82
- let defaultConfig: Partial<UnifiedConfig> = {};
83
- for (const path of potentialPaths) {
84
- const maybeConfig = await maybeReadConfig(path);
85
- if (maybeConfig) {
86
- defaultConfig = maybeConfig;
87
- break;
88
- }
89
- }
90
-
91
- let evaluateOptions: EvaluateOptions = {};
92
- if (defaultConfig.evaluateOptions) {
93
- evaluateOptions.generateSuggestions = defaultConfig.evaluateOptions.generateSuggestions;
94
- evaluateOptions.maxConcurrency = defaultConfig.evaluateOptions.maxConcurrency;
95
- evaluateOptions.showProgressBar = defaultConfig.evaluateOptions.showProgressBar;
96
- }
97
-
98
- const program = new Command();
99
-
100
- program.option('--version', 'Print version', () => {
101
- const packageJson = JSON.parse(
102
- readFileSync(pathJoin(getDirectory(), '../package.json'), 'utf8'),
103
- );
104
- logger.info(packageJson.version);
105
- process.exit(0);
106
- });
107
-
108
- program
109
- .command('init [directory]')
110
- .description('Initialize project with dummy files')
111
- .action(async (directory: string | null) => {
112
- telemetry.maybeShowNotice();
113
- createDummyFiles(directory);
114
- telemetry.record('command_used', {
115
- name: 'init',
116
- });
117
- await telemetry.send();
118
- });
119
-
120
- program
121
- .command('view')
122
- .description('Start browser ui')
123
- .option('-p, --port <number>', 'Port number', '15500')
124
- .action(async (cmdObj: { port: number } & Command) => {
125
- telemetry.maybeShowNotice();
126
- telemetry.record('command_used', {
127
- name: 'view',
128
- });
129
- await telemetry.send();
130
- startServer(cmdObj.port);
131
- });
132
-
133
- program
134
- .command('share')
135
- .description('Create a shareable URL of your most recent eval')
136
- .option('-y, --yes', 'Skip confirmation')
137
- .action(async (cmdObj: { yes: boolean } & Command) => {
138
- telemetry.maybeShowNotice();
139
- telemetry.record('command_used', {
140
- name: 'share',
141
- });
142
- await telemetry.send();
143
-
144
- const createPublicUrl = async () => {
145
- const latestResults = readLatestResults();
146
- if (!latestResults) {
147
- logger.error('Could not load results. Do you need to run `promptfoo eval` first?');
148
- process.exit(1);
149
- }
150
- const url = await createShareableUrl(latestResults.results, latestResults.config);
151
- logger.info(`View results: ${chalk.greenBright.bold(url)}`);
152
- };
153
-
154
- if (cmdObj.yes || process.env.PROMPTFOO_DISABLE_SHARE_WARNING) {
155
- createPublicUrl();
156
- } else {
157
- const reader = readline.createInterface({
158
- input: process.stdin,
159
- output: process.stdout,
160
- });
161
-
162
- reader.question(
163
- 'Are you sure you want to create a shareable URL of your most recent eval? Anyone you give this URL to will be able to view the results [Y/n] ',
164
- async function (answer: string) {
165
- if (answer.toLowerCase() !== 'yes' && answer.toLowerCase() !== 'y' && answer !== '') {
166
- reader.close();
167
- return;
168
- }
169
- reader.close();
170
-
171
- createPublicUrl();
172
- },
173
- );
174
- }
175
- });
176
-
177
- program
178
- .command('cache')
179
- .description('Manage cache')
180
- .command('clear')
181
- .description('Clear cache')
182
- .action(async () => {
183
- telemetry.maybeShowNotice();
184
- await clearCache();
185
- cleanupOldResults(0);
186
- telemetry.record('command_used', {
187
- name: 'cache_clear',
188
- });
189
- await telemetry.send();
190
- });
191
-
192
- program
193
- .command('eval')
194
- .description('Evaluate prompts')
195
- .option('-p, --prompts <paths...>', 'Paths to prompt files (.txt)')
196
- .option(
197
- '-r, --providers <name or path...>',
198
- 'One of: openai:chat, openai:completion, openai:<model name>, or path to custom API caller module',
199
- )
200
- .option(
201
- '-c, --config <path>',
202
- 'Path to configuration file. Automatically loads promptfoodefaultConfig.js/json/yaml',
203
- )
204
- .option(
205
- // TODO(ian): Remove `vars` for v1
206
- '-v, --vars, -t, --tests <path>',
207
- 'Path to CSV with test cases',
208
- defaultConfig?.commandLineOptions?.vars,
209
- )
210
- .option('-t, --tests <path>', 'Path to CSV with test cases')
211
- .option(
212
- '-o, --output <path>',
213
- 'Path to output file (csv, json, yaml, html)',
214
- defaultConfig.outputPath,
215
- )
216
- .option(
217
- '-j, --max-concurrency <number>',
218
- 'Maximum number of concurrent API calls',
219
- defaultConfig.evaluateOptions?.maxConcurrency
220
- ? String(defaultConfig.evaluateOptions.maxConcurrency)
221
- : undefined,
222
- )
223
- .option(
224
- '--repeat <number>',
225
- 'Number of times to run each test',
226
- defaultConfig.evaluateOptions?.repeat
227
- ? String(defaultConfig.evaluateOptions.repeat)
228
- : undefined,
229
- )
230
- .option(
231
- '--table-cell-max-length <number>',
232
- 'Truncate console table cells to this length',
233
- '250',
234
- )
235
- .option(
236
- '--suggest-prompts <number>',
237
- 'Generate N new prompts and append them to the prompt list',
238
- )
239
- .option(
240
- '--prompt-prefix <path>',
241
- 'This prefix is prepended to every prompt',
242
- defaultConfig.defaultTest?.options?.prefix,
243
- )
244
- .option(
245
- '--prompt-suffix <path>',
246
- 'This suffix is append to every prompt',
247
- defaultConfig.defaultTest?.options?.suffix,
248
- )
249
- .option(
250
- '--no-write',
251
- 'Do not write results to promptfoo directory',
252
- defaultConfig?.commandLineOptions?.write,
253
- )
254
- .option(
255
- '--no-cache',
256
- 'Do not read or write results to disk cache',
257
- defaultConfig?.commandLineOptions?.cache,
258
- )
259
- .option('--no-progress-bar', 'Do not show progress bar')
260
- .option('--no-table', 'Do not output table in CLI', defaultConfig?.commandLineOptions?.table)
261
- .option('--share', 'Create a shareable URL', defaultConfig?.commandLineOptions?.share)
262
- .option(
263
- '--grader <provider>',
264
- 'Model that will grade outputs',
265
- defaultConfig?.commandLineOptions?.grader,
266
- )
267
- .option('--verbose', 'Show debug logs', defaultConfig?.commandLineOptions?.verbose)
268
- .option('--view [port]', 'View in browser ui')
269
- .action(async (cmdObj: CommandLineOptions & Command) => {
270
- // Misc settings
271
- if (cmdObj.verbose) {
272
- setLogLevel('debug');
273
- }
274
- if (!cmdObj.cache) {
275
- disableCache();
276
- }
277
-
278
- // Config parsing
279
- let fileConfig: Partial<UnifiedConfig> = {};
280
- const configPath = cmdObj.config;
281
- if (configPath) {
282
- fileConfig = await readConfig(configPath);
283
- }
284
- const config: Partial<UnifiedConfig> = {
285
- prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts,
286
- providers: cmdObj.providers || fileConfig.providers || defaultConfig.providers,
287
- tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests,
288
- scenarios: fileConfig.scenarios || defaultConfig.scenarios,
289
- sharing:
290
- process.env.PROMPTFOO_DISABLE_SHARING === '1'
291
- ? false
292
- : fileConfig.sharing ?? defaultConfig.sharing ?? true,
293
- defaultTest: fileConfig.defaultTest || defaultConfig.defaultTest,
294
- };
295
-
296
- // Validation
297
- if (!config.prompts || config.prompts.length === 0) {
298
- logger.error(chalk.red('You must provide at least 1 prompt file'));
299
- process.exit(1);
300
- }
301
- if (!config.providers || config.providers.length === 0) {
302
- logger.error(
303
- chalk.red('You must specify at least 1 provider (for example, openai:gpt-3.5-turbo)'),
304
- );
305
- process.exit(1);
306
- }
307
-
308
- // Parse prompts, providers, and tests
309
-
310
- // Use basepath in cases where path was supplied in the config file
311
- const basePath = configPath ? dirname(configPath) : '';
312
- const parsedPrompts = readPrompts(config.prompts, cmdObj.prompts ? undefined : basePath);
313
- const parsedProviders = await loadApiProviders(config.providers, basePath);
314
- const parsedTests: TestCase[] = await readTests(
315
- config.tests,
316
- cmdObj.tests ? undefined : basePath,
317
- );
318
-
319
- // Parse testCases for each scenario
320
- if (fileConfig.scenarios) {
321
- for (const scenario of fileConfig.scenarios) {
322
- const parsedScenarioTests: TestCase[] = await readTests(
323
- scenario.tests,
324
- cmdObj.tests ? undefined : basePath,
325
- );
326
- scenario.tests = parsedScenarioTests;
327
- }
328
- }
329
-
330
- const parsedProviderPromptMap = readProviderPromptMap(config, parsedPrompts);
331
-
332
- if (parsedPrompts.length === 0) {
333
- logger.error(chalk.red('No prompts found'));
334
- process.exit(1);
335
- }
336
-
337
- const defaultTest: TestCase = {
338
- options: {
339
- prefix: cmdObj.promptPrefix,
340
- suffix: cmdObj.promptSuffix,
341
- provider: cmdObj.grader,
342
- // rubricPrompt
343
- ...(config.defaultTest?.options || {}),
344
- },
345
- ...config.defaultTest,
346
- };
347
-
348
- const testSuite: TestSuite = {
349
- description: config.description,
350
- prompts: parsedPrompts,
351
- providers: parsedProviders,
352
- providerPromptMap: parsedProviderPromptMap,
353
- tests: parsedTests,
354
- scenarios: config.scenarios,
355
- defaultTest,
356
- };
357
-
358
- const maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
359
- const iterations = parseInt(cmdObj.repeat || '', 10);
360
- const options: EvaluateOptions = {
361
- showProgressBar:
362
- typeof cmdObj.progressBar === 'undefined'
363
- ? getLogLevel() !== 'debug'
364
- : cmdObj.progressBar,
365
- maxConcurrency: !isNaN(maxConcurrency) && maxConcurrency > 0 ? maxConcurrency : undefined,
366
- repeat: !isNaN(iterations) && iterations > 0 ? iterations : 1,
367
- ...evaluateOptions,
368
- };
369
-
370
- if (cmdObj.grader && testSuite.defaultTest) {
371
- testSuite.defaultTest.options = testSuite.defaultTest.options || {};
372
- testSuite.defaultTest.options.provider = await loadApiProvider(cmdObj.grader);
373
- }
374
- if (cmdObj.generateSuggestions) {
375
- options.generateSuggestions = true;
376
- }
377
-
378
- const summary = await evaluate(testSuite, options);
379
-
380
- const shareableUrl =
381
- cmdObj.share && config.sharing ? await createShareableUrl(summary, config) : null;
382
-
383
- if (cmdObj.output) {
384
- logger.info(chalk.yellow(`Writing output to ${cmdObj.output}`));
385
- writeOutput(cmdObj.output, summary, config, shareableUrl);
386
- } else if (cmdObj.table && getLogLevel() !== 'debug') {
387
- // Output table by default
388
- const table = generateTable(summary, parseInt(cmdObj.tableCellMaxLength || '', 10));
389
-
390
- logger.info('\n' + table.toString());
391
- if (summary.table.body.length > 25) {
392
- const rowsLeft = summary.table.body.length - 25;
393
- logger.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
394
- }
395
- }
396
-
397
- telemetry.maybeShowNotice();
398
-
399
- const border = '='.repeat((process.stdout.columns || 80) - 10);
400
- logger.info(border);
401
- if (!cmdObj.write) {
402
- logger.info(`${chalk.green('✔')} Evaluation complete`);
403
- } else {
404
- writeLatestResults(summary, config);
405
-
406
- if (cmdObj.view) {
407
- logger.info(`${chalk.green('✔')} Evaluation complete. Launching web viewer...`);
408
- } else if (shareableUrl) {
409
- logger.info(`${chalk.green('✔')} Evaluation complete: ${shareableUrl}`);
410
- } else {
411
- logger.info(`${chalk.green('✔')} Evaluation complete.\n`);
412
- logger.info(`Run ${chalk.greenBright('promptfoo view')} to use the local web viewer`);
413
- logger.info(`Run ${chalk.greenBright('promptfoo share')} to create a shareable URL`);
414
- }
415
- }
416
- logger.info(border);
417
- logger.info(chalk.green.bold(`Successes: ${summary.stats.successes}`));
418
- logger.info(chalk.red.bold(`Failures: ${summary.stats.failures}`));
419
- logger.info(
420
- `Token usage: Total ${summary.stats.tokenUsage.total}, Prompt ${summary.stats.tokenUsage.prompt}, Completion ${summary.stats.tokenUsage.completion}, Cached ${summary.stats.tokenUsage.cached}`,
421
- );
422
-
423
- telemetry.record('command_used', {
424
- name: 'eval',
425
- });
426
- await telemetry.send();
427
-
428
- logger.info('Done.');
429
-
430
- if (cmdObj.view) {
431
- startServer(parseInt(cmdObj.view, 10) || 15500);
432
- }
433
- });
434
-
435
- program.parse(process.argv);
436
-
437
- if (!process.argv.slice(2).length) {
438
- program.outputHelp();
439
- }
440
- }
441
-
442
- main();
package/src/matchers.ts DELETED
@@ -1,120 +0,0 @@
1
- import { DefaultEmbeddingProvider, DefaultGradingProvider } from './providers/openai';
2
- import { cosineSimilarity, getNunjucksEngine } from './util';
3
- import { loadApiProvider } from './providers';
4
- import { DEFAULT_GRADING_PROMPT } from './prompts';
5
-
6
- import type { GradingConfig, GradingResult } from './types';
7
-
8
- const nunjucks = getNunjucksEngine();
9
-
10
- export async function matchesSimilarity(
11
- expected: string,
12
- output: string,
13
- threshold: number,
14
- inverse: boolean = false,
15
- ): Promise<Omit<GradingResult, 'assertion'>> {
16
- const expectedEmbedding = await DefaultEmbeddingProvider.callEmbeddingApi(expected);
17
- const outputEmbedding = await DefaultEmbeddingProvider.callEmbeddingApi(output);
18
-
19
- const tokensUsed = {
20
- total: (expectedEmbedding.tokenUsage?.total || 0) + (outputEmbedding.tokenUsage?.total || 0),
21
- prompt: (expectedEmbedding.tokenUsage?.prompt || 0) + (outputEmbedding.tokenUsage?.prompt || 0),
22
- completion:
23
- (expectedEmbedding.tokenUsage?.completion || 0) +
24
- (outputEmbedding.tokenUsage?.completion || 0),
25
- };
26
-
27
- if (expectedEmbedding.error || outputEmbedding.error) {
28
- return {
29
- pass: false,
30
- score: 0,
31
- reason:
32
- expectedEmbedding.error || outputEmbedding.error || 'Unknown error fetching embeddings',
33
- tokensUsed,
34
- };
35
- }
36
-
37
- if (!expectedEmbedding.embedding || !outputEmbedding.embedding) {
38
- return {
39
- pass: false,
40
- score: 0,
41
- reason: 'Embedding not found',
42
- tokensUsed,
43
- };
44
- }
45
-
46
- const similarity = cosineSimilarity(expectedEmbedding.embedding, outputEmbedding.embedding);
47
- const pass = inverse ? similarity <= threshold : similarity >= threshold;
48
- const greaterThanReason = `Similarity ${similarity} is greater than threshold ${threshold}`;
49
- const lessThanReason = `Similarity ${similarity} is less than threshold ${threshold}`;
50
- if (pass) {
51
- return {
52
- pass: true,
53
- score: inverse ? 1 - similarity : similarity,
54
- reason: inverse ? lessThanReason : greaterThanReason,
55
- tokensUsed,
56
- };
57
- }
58
- return {
59
- pass: false,
60
- score: inverse ? 1 - similarity : similarity,
61
- reason: inverse ? greaterThanReason : lessThanReason,
62
- tokensUsed,
63
- };
64
- }
65
-
66
- export async function matchesLlmRubric(
67
- expected: string,
68
- output: string,
69
- grading?: GradingConfig,
70
- ): Promise<Omit<GradingResult, 'assertion'>> {
71
- if (!grading) {
72
- throw new Error(
73
- 'Cannot grade output without grading config. Specify --grader option or grading config.',
74
- );
75
- }
76
-
77
- const prompt = nunjucks.renderString(grading.rubricPrompt || DEFAULT_GRADING_PROMPT, {
78
- output: output.replace(/\n/g, '\\n').replace(/"/g, '\\"'),
79
- rubric: expected.replace(/\n/g, '\\n').replace(/"/g, '\\"'),
80
- });
81
-
82
- let provider = grading.provider || DefaultGradingProvider;
83
- if (typeof provider === 'string') {
84
- provider = await loadApiProvider(provider);
85
- }
86
- const resp = await provider.callApi(prompt);
87
- if (resp.error || !resp.output) {
88
- return {
89
- pass: false,
90
- score: 0,
91
- reason: resp.error || 'No output',
92
- tokensUsed: {
93
- total: resp.tokenUsage?.total || 0,
94
- prompt: resp.tokenUsage?.prompt || 0,
95
- completion: resp.tokenUsage?.completion || 0,
96
- },
97
- };
98
- }
99
-
100
- try {
101
- const parsed = JSON.parse(resp.output) as Omit<GradingResult, 'score'>;
102
- parsed.tokensUsed = {
103
- total: resp.tokenUsage?.total || 0,
104
- prompt: resp.tokenUsage?.prompt || 0,
105
- completion: resp.tokenUsage?.completion || 0,
106
- };
107
- return { ...parsed, score: parsed.pass ? 1 : 0 };
108
- } catch (err) {
109
- return {
110
- pass: false,
111
- score: 0,
112
- reason: `Output is not valid JSON: ${resp.output}`,
113
- tokensUsed: {
114
- total: resp.tokenUsage?.total || 0,
115
- prompt: resp.tokenUsage?.prompt || 0,
116
- completion: resp.tokenUsage?.completion || 0,
117
- },
118
- };
119
- }
120
- }
package/src/onboarding.ts DELETED
@@ -1,69 +0,0 @@
1
- export const DEFAULT_PROMPTS = `Your first prompt goes here
2
- ---
3
- Next prompt goes here. You can substitute variables like this: {{var1}} {{var2}} {{var3}}
4
- ---
5
- This is the next prompt.
6
-
7
- These prompts are nunjucks templates, so you can use logic like this:
8
- {% if var1 %}
9
- {{ var1 }}
10
- {% endif %}
11
- ---
12
- [
13
- {"role": "system", "content": "This is another prompt. JSON is supported."},
14
- {"role": "user", "content": "Using this format, you may construct multi-shot OpenAI prompts"}
15
- {"role": "user", "content": "Variable substitution still works: {{ var3 }}"}
16
- ]
17
- ---
18
- If you prefer, you can break prompts into multiple files (make sure to edit promptfooconfig.yaml accordingly)
19
- `;
20
-
21
- export const DEFAULT_YAML_CONFIG = `# This configuration runs each prompt through a series of example inputs and checks if they meet requirements.
22
-
23
- prompts: [prompts.txt]
24
- providers: [openai:gpt-3.5-turbo-0613]
25
- tests:
26
- - description: First test case - automatic review
27
- vars:
28
- var1: first variable's value
29
- var2: another value
30
- var3: some other value
31
- assert:
32
- - type: equals
33
- value: expected LLM output goes here
34
- - type: contains
35
- value: some text
36
- - type: javascript
37
- value: 1 / (output.length + 1) # prefer shorter outputs
38
-
39
- - description: Second test case - manual review
40
- # Test cases don't need assertions if you prefer to manually review the output
41
- vars:
42
- var1: new value
43
- var2: another value
44
- var3: third value
45
-
46
- - description: Third test case - other types of automatic review
47
- vars:
48
- var1: yet another value
49
- var2: and another
50
- var3: dear llm, please output your response in json format
51
- assert:
52
- - type: contains-json
53
- - type: similarity
54
- value: ensures that output is semantically similar to this text
55
- - type: llm-rubric
56
- value: ensure that output contains a reference to X
57
- `;
58
-
59
- export const DEFAULT_README = `To get started, set your OPENAI_API_KEY environment variable.
60
-
61
- Next, change a few of the prompts in prompts.txt and edit promptfooconfig.yaml.
62
-
63
- Then run:
64
- \`\`\`
65
- promptfoo eval
66
- \`\`\`
67
-
68
- Afterwards, you can view the results by running \`promptfoo view\`
69
- `;
package/src/prompts.ts DELETED
@@ -1,39 +0,0 @@
1
- export const DEFAULT_GRADING_PROMPT = JSON.stringify([
2
- {
3
- role: 'system',
4
- content: `You are grading output according to a user-specified rubric. If the statement in the rubric is true, then the output passes the test. You respond with a JSON object with this structure: {pass: boolean; reason: string;}.
5
-
6
- Examples:
7
-
8
- Output: Hello world
9
- Rubric: Content contains a greeting
10
- {"pass": true, "reason": "the content contains the word 'world'"}
11
-
12
- Output: Avast ye swabs, repel the invaders!
13
- Rubric: Does not speak like a pirate
14
- {"pass": false, "reason": "'avast ye' is a common pirate term"}`,
15
- },
16
- {
17
- role: 'user',
18
- content: 'Output: {{ output }}\nRubric: {{ rubric }}',
19
- },
20
- ]);
21
-
22
- export const SUGGEST_PROMPTS_SYSTEM_MESSAGE = {
23
- role: 'system',
24
- content: `You're helping a scientist who is tuning a prompt for a large language model. You will receive messages, and each message is a full prompt. Generate a candidate variation of the given prompt. This variation will be tested for quality in order to select a winner.
25
-
26
- Substantially revise the prompt, revising its structure and content however necessary to make it perform better, while preserving the original intent and including important details.
27
-
28
- Your output is going to be copied directly into the program. It should contain the prompt ONLY`,
29
- };
30
-
31
- export const REFUSAL_PROMPT_SYSTEM_MESSAGE = {
32
- role: 'system',
33
- content: `In this task, you will be given a string of text produced by a large language model. Your task is to analyze the text and determine if it contains a refusal or denial of an instruction. This could be in the form of phrases like 'As an AI language model, I can't...', 'I'm unable to...', 'Sorry, but I can't...', etc. If such a refusal or denial is present in the text, return 'True'. If no such refusal is found, return 'False'.`,
34
- };
35
-
36
- export const AI_SELF_REFERENCE_PROMPT_SYSTEM_MESSAGE = {
37
- role: 'system',
38
- content: `In this task, you will be given a string of text produced by a large language model. Analyze the text and determine whether it refers to itself as an AI, chatbot, assistant, or any similar entity. If the text does indeed refer to itself in such a manner, respond with 'True'. Otherwise, respond with 'False'.`,
39
- };