promptfoo 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/package.json +4 -4
- package/dist/src/assertions.d.ts.map +1 -1
- package/dist/src/assertions.js +5 -0
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/evaluator.js +1 -1
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/index.d.ts +1 -5
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1 -1
- package/dist/src/index.js.map +1 -1
- package/dist/src/matchers.d.ts +3 -2
- package/dist/src/matchers.d.ts.map +1 -1
- package/dist/src/matchers.js +37 -9
- package/dist/src/matchers.js.map +1 -1
- package/dist/src/providers/anthropic.d.ts +5 -3
- package/dist/src/providers/anthropic.d.ts.map +1 -1
- package/dist/src/providers/anthropic.js +8 -10
- package/dist/src/providers/anthropic.js.map +1 -1
- package/dist/src/providers/azureopenai.d.ts +9 -8
- package/dist/src/providers/azureopenai.d.ts.map +1 -1
- package/dist/src/providers/azureopenai.js +33 -36
- package/dist/src/providers/azureopenai.js.map +1 -1
- package/dist/src/providers/openai.d.ts +12 -12
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +54 -65
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/replicate.d.ts +4 -2
- package/dist/src/providers/replicate.d.ts.map +1 -1
- package/dist/src/providers/replicate.js +10 -8
- package/dist/src/providers/replicate.js.map +1 -1
- package/dist/src/providers/webhook.d.ts +9 -0
- package/dist/src/providers/webhook.d.ts.map +1 -0
- package/dist/src/providers/webhook.js +54 -0
- package/dist/src/providers/webhook.js.map +1 -0
- package/dist/src/providers.d.ts +1 -1
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +36 -28
- package/dist/src/providers.js.map +1 -1
- package/dist/src/suggestions.d.ts.map +1 -1
- package/dist/src/suggestions.js +1 -3
- package/dist/src/suggestions.js.map +1 -1
- package/dist/src/types.d.ts +7 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.js +1 -1
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/nextui/404/index.html +1 -1
- package/dist/src/web/nextui/404.html +1 -1
- package/dist/src/web/nextui/_next/static/Bl3o5lF4ON7Fjki46lPhr/_buildManifest.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/226-7bbb6c98a19542fd.js +37 -0
- package/dist/src/web/nextui/_next/static/chunks/249-ea9c0f034888ccff.js +125 -0
- package/dist/src/web/nextui/_next/static/chunks/339-501c32916b785ef1.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/365-e426ea5bc7e815fc.js +8 -0
- package/dist/src/web/nextui/_next/static/chunks/396-0a51429a01e24cdd.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/596-297f7ff4a0436e87.js +25 -0
- package/dist/src/web/nextui/_next/static/chunks/613-572c22424de64659.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/706-ae1d3352d28419e9.js +9 -0
- package/dist/src/web/nextui/_next/static/chunks/891-7035926a62c1c4e0.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-366629541fd598e9.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-319d2ee38d37574e.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-a6b1ff91723b7beb.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/layout-024c4adc71c9feb0.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/page-1ae60660130041b2.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-6ef16148040bf4f4.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/{ca377847-cb6ae6a6a073aebb.js → ca377847-26b462611379a4f7.js} +3 -3
- package/dist/src/web/nextui/_next/static/chunks/{fd9d1056-ac777be631f5a9e9.js → fd9d1056-fba4b53a2f01213b.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/framework-8883d1e9be70c3da.js +25 -0
- package/dist/src/web/nextui/_next/static/chunks/main-8ea85465d428ecfe.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/main-app-581ccf0003955b21.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/pages/_app-52924524f99094ab.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/pages/_error-c92d5c4bb2b49926.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/webpack-55c264ce2fd85eb7.js +1 -0
- package/dist/src/web/nextui/_next/static/css/4d399fceacd06992.css +1 -0
- package/dist/src/web/nextui/eval/index.html +1 -1
- package/dist/src/web/nextui/eval/index.txt +6 -6
- package/dist/src/web/nextui/index.html +1 -1
- package/dist/src/web/nextui/index.txt +5 -5
- package/dist/src/web/nextui/setup/index.html +27 -1
- package/dist/src/web/nextui/setup/index.txt +9 -9
- package/dist/src/web/server.d.ts.map +1 -1
- package/dist/src/web/server.js +9 -5
- package/dist/src/web/server.js.map +1 -1
- package/package.json +4 -4
- package/dist/src/web/nextui/_next/static/US6gOx8LHTX_Hzm9aYNrC/_buildManifest.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/339-4fc8a80fa840e771.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/373-8a280796c0f2d1af.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/583-125d32af505e9bc4.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/596-07e4a23a5c6cdf04.js +0 -25
- package/dist/src/web/nextui/_next/static/chunks/658-a62210d07dc4dcb6.js +0 -15
- package/dist/src/web/nextui/_next/static/chunks/707-699cbd84b259c37b.js +0 -37
- package/dist/src/web/nextui/_next/static/chunks/858-ceb6fa22e614492b.js +0 -125
- package/dist/src/web/nextui/_next/static/chunks/891-3000ea7c0a292558.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-50e40614fa05600e.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-c19c44ed1b2dfb58.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d4a1813b2f8c4532.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/layout-664a8d716d2d24b1.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/page-1f8ef6a00a2355f0.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-182018a3c6397345.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/framework-43665103d101a22d.js +0 -25
- package/dist/src/web/nextui/_next/static/chunks/main-50cc0a98559591ce.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/main-app-c9dc13756d166550.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/pages/_app-6b79a29ad0d63b21.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/pages/_error-9aeb3e4d490fe4b8.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/webpack-6e474e42be502dd7.js +0 -1
- package/dist/src/web/nextui/_next/static/css/a35c840ac696f161.css +0 -1
- package/dist/src/web/nextui/api +0 -1
- package/src/__mocks__/esm.ts +0 -3
- package/src/assertions.ts +0 -580
- package/src/cache.ts +0 -109
- package/src/esm.ts +0 -13
- package/src/evaluator.ts +0 -500
- package/src/index.ts +0 -52
- package/src/logger.ts +0 -46
- package/src/main.ts +0 -442
- package/src/matchers.ts +0 -120
- package/src/onboarding.ts +0 -69
- package/src/prompts.ts +0 -39
- package/src/providers/anthropic.ts +0 -88
- package/src/providers/azureopenai.ts +0 -299
- package/src/providers/llama.ts +0 -95
- package/src/providers/localai.ts +0 -111
- package/src/providers/ollama.ts +0 -89
- package/src/providers/openai.ts +0 -337
- package/src/providers/replicate.ts +0 -99
- package/src/providers/scriptCompletion.ts +0 -35
- package/src/providers/shared.ts +0 -34
- package/src/providers.ts +0 -192
- package/src/share.ts +0 -27
- package/src/suggestions.ts +0 -63
- package/src/table.ts +0 -43
- package/src/tableOutput.html +0 -52
- package/src/telemetry.ts +0 -70
- package/src/types.ts +0 -299
- package/src/updates.ts +0 -46
- package/src/util.ts +0 -543
- package/src/web/nextui/.eslintrc.json +0 -3
- package/src/web/nextui/next.config.js +0 -14
- package/src/web/nextui/package-lock.json +0 -4644
- package/src/web/nextui/package.json +0 -47
- package/src/web/nextui/public/favicon.ico +0 -0
- package/src/web/nextui/public/logo.svg +0 -30
- package/src/web/nextui/src/app/Home.css +0 -3
- package/src/web/nextui/src/app/api/route.ts +0 -6
- package/src/web/nextui/src/app/components/DarkMode.css +0 -22
- package/src/web/nextui/src/app/components/DarkMode.tsx +0 -17
- package/src/web/nextui/src/app/components/Logo.css +0 -32
- package/src/web/nextui/src/app/components/Logo.tsx +0 -11
- package/src/web/nextui/src/app/components/PageShell.css +0 -33
- package/src/web/nextui/src/app/components/PageShell.tsx +0 -87
- package/src/web/nextui/src/app/eval/ConfigModal.tsx +0 -84
- package/src/web/nextui/src/app/eval/Eval.css +0 -13
- package/src/web/nextui/src/app/eval/Eval.tsx +0 -79
- package/src/web/nextui/src/app/eval/EvalOutputPromptDialog.tsx +0 -127
- package/src/web/nextui/src/app/eval/ResultsCharts.tsx +0 -355
- package/src/web/nextui/src/app/eval/ResultsTable.css +0 -179
- package/src/web/nextui/src/app/eval/ResultsTable.tsx +0 -503
- package/src/web/nextui/src/app/eval/ResultsView.tsx +0 -301
- package/src/web/nextui/src/app/eval/ShareModal.tsx +0 -70
- package/src/web/nextui/src/app/eval/[id]/not-found.tsx +0 -5
- package/src/web/nextui/src/app/eval/[id]/page.css +0 -9
- package/src/web/nextui/src/app/eval/[id]/page.tsx +0 -20
- package/src/web/nextui/src/app/eval/index.css +0 -0
- package/src/web/nextui/src/app/eval/page.tsx +0 -8
- package/src/web/nextui/src/app/eval/store.ts +0 -18
- package/src/web/nextui/src/app/eval/types.ts +0 -20
- package/src/web/nextui/src/app/globals.css +0 -58
- package/src/web/nextui/src/app/layout.tsx +0 -25
- package/src/web/nextui/src/app/page.tsx +0 -7
- package/src/web/nextui/src/app/setup/AssertsForm.tsx +0 -118
- package/src/web/nextui/src/app/setup/PromptDialog.tsx +0 -77
- package/src/web/nextui/src/app/setup/PromptsSection.tsx +0 -190
- package/src/web/nextui/src/app/setup/ProviderConfigDialog.tsx +0 -99
- package/src/web/nextui/src/app/setup/ProviderSelector.tsx +0 -149
- package/src/web/nextui/src/app/setup/RunTestSuiteButton.tsx +0 -88
- package/src/web/nextui/src/app/setup/TestCaseDialog.tsx +0 -108
- package/src/web/nextui/src/app/setup/TestCasesSection.tsx +0 -154
- package/src/web/nextui/src/app/setup/VarsForm.tsx +0 -57
- package/src/web/nextui/src/app/setup/page.css +0 -3
- package/src/web/nextui/src/app/setup/page.tsx +0 -160
- package/src/web/nextui/src/util/api.ts +0 -1
- package/src/web/nextui/src/util/store.ts +0 -53
- package/src/web/nextui/tsconfig.json +0 -28
- package/src/web/server.ts +0 -151
- /package/dist/src/web/nextui/_next/static/{US6gOx8LHTX_Hzm9aYNrC → Bl3o5lF4ON7Fjki46lPhr}/_ssgManifest.js +0 -0
package/src/main.ts
DELETED
|
@@ -1,442 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
|
|
3
|
-
import { join as pathJoin, dirname } from 'path';
|
|
4
|
-
import readline from 'readline';
|
|
5
|
-
|
|
6
|
-
import chalk from 'chalk';
|
|
7
|
-
import { Command } from 'commander';
|
|
8
|
-
|
|
9
|
-
import telemetry from './telemetry';
|
|
10
|
-
import logger, { getLogLevel, setLogLevel } from './logger';
|
|
11
|
-
import { loadApiProvider, loadApiProviders } from './providers';
|
|
12
|
-
import { evaluate } from './evaluator';
|
|
13
|
-
import {
|
|
14
|
-
cleanupOldResults,
|
|
15
|
-
maybeReadConfig,
|
|
16
|
-
readConfig,
|
|
17
|
-
readLatestResults,
|
|
18
|
-
readPrompts,
|
|
19
|
-
readProviderPromptMap,
|
|
20
|
-
readTests,
|
|
21
|
-
writeLatestResults,
|
|
22
|
-
writeOutput,
|
|
23
|
-
} from './util';
|
|
24
|
-
import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
|
|
25
|
-
import { disableCache, clearCache } from './cache';
|
|
26
|
-
import { getDirectory } from './esm';
|
|
27
|
-
import { startServer } from './web/server';
|
|
28
|
-
import { checkForUpdates } from './updates';
|
|
29
|
-
|
|
30
|
-
import type {
|
|
31
|
-
CommandLineOptions,
|
|
32
|
-
EvaluateOptions,
|
|
33
|
-
TestCase,
|
|
34
|
-
TestSuite,
|
|
35
|
-
UnifiedConfig,
|
|
36
|
-
} from './types';
|
|
37
|
-
import { generateTable } from './table';
|
|
38
|
-
import { createShareableUrl } from './share';
|
|
39
|
-
|
|
40
|
-
function createDummyFiles(directory: string | null) {
|
|
41
|
-
if (directory) {
|
|
42
|
-
// Make the directory if it doesn't exist
|
|
43
|
-
if (!existsSync(directory)) {
|
|
44
|
-
mkdirSync(directory);
|
|
45
|
-
}
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
if (directory) {
|
|
49
|
-
if (!existsSync(directory)) {
|
|
50
|
-
logger.info(`Creating directory ${directory} ...`);
|
|
51
|
-
mkdirSync(directory);
|
|
52
|
-
}
|
|
53
|
-
} else {
|
|
54
|
-
directory = '.';
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
writeFileSync(pathJoin(process.cwd(), directory, 'prompts.txt'), DEFAULT_PROMPTS);
|
|
58
|
-
writeFileSync(pathJoin(process.cwd(), directory, 'promptfooconfig.yaml'), DEFAULT_YAML_CONFIG);
|
|
59
|
-
writeFileSync(pathJoin(process.cwd(), directory, 'README.md'), DEFAULT_README);
|
|
60
|
-
|
|
61
|
-
if (directory === '.') {
|
|
62
|
-
logger.info(
|
|
63
|
-
chalk.green.bold(
|
|
64
|
-
'Wrote prompts.txt and promptfooconfig.yaml. Open README.md to get started!',
|
|
65
|
-
),
|
|
66
|
-
);
|
|
67
|
-
} else {
|
|
68
|
-
logger.info(chalk.green.bold(`Wrote prompts.txt and promptfooconfig.yaml to ./${directory}`));
|
|
69
|
-
logger.info(chalk.green(`\`cd ${directory}\` and open README.md to get started!`));
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
async function main() {
|
|
74
|
-
await checkForUpdates();
|
|
75
|
-
|
|
76
|
-
const pwd = process.cwd();
|
|
77
|
-
const potentialPaths = [
|
|
78
|
-
pathJoin(pwd, 'promptfooconfig.js'),
|
|
79
|
-
pathJoin(pwd, 'promptfooconfig.json'),
|
|
80
|
-
pathJoin(pwd, 'promptfooconfig.yaml'),
|
|
81
|
-
];
|
|
82
|
-
let defaultConfig: Partial<UnifiedConfig> = {};
|
|
83
|
-
for (const path of potentialPaths) {
|
|
84
|
-
const maybeConfig = await maybeReadConfig(path);
|
|
85
|
-
if (maybeConfig) {
|
|
86
|
-
defaultConfig = maybeConfig;
|
|
87
|
-
break;
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
let evaluateOptions: EvaluateOptions = {};
|
|
92
|
-
if (defaultConfig.evaluateOptions) {
|
|
93
|
-
evaluateOptions.generateSuggestions = defaultConfig.evaluateOptions.generateSuggestions;
|
|
94
|
-
evaluateOptions.maxConcurrency = defaultConfig.evaluateOptions.maxConcurrency;
|
|
95
|
-
evaluateOptions.showProgressBar = defaultConfig.evaluateOptions.showProgressBar;
|
|
96
|
-
}
|
|
97
|
-
|
|
98
|
-
const program = new Command();
|
|
99
|
-
|
|
100
|
-
program.option('--version', 'Print version', () => {
|
|
101
|
-
const packageJson = JSON.parse(
|
|
102
|
-
readFileSync(pathJoin(getDirectory(), '../package.json'), 'utf8'),
|
|
103
|
-
);
|
|
104
|
-
logger.info(packageJson.version);
|
|
105
|
-
process.exit(0);
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
program
|
|
109
|
-
.command('init [directory]')
|
|
110
|
-
.description('Initialize project with dummy files')
|
|
111
|
-
.action(async (directory: string | null) => {
|
|
112
|
-
telemetry.maybeShowNotice();
|
|
113
|
-
createDummyFiles(directory);
|
|
114
|
-
telemetry.record('command_used', {
|
|
115
|
-
name: 'init',
|
|
116
|
-
});
|
|
117
|
-
await telemetry.send();
|
|
118
|
-
});
|
|
119
|
-
|
|
120
|
-
program
|
|
121
|
-
.command('view')
|
|
122
|
-
.description('Start browser ui')
|
|
123
|
-
.option('-p, --port <number>', 'Port number', '15500')
|
|
124
|
-
.action(async (cmdObj: { port: number } & Command) => {
|
|
125
|
-
telemetry.maybeShowNotice();
|
|
126
|
-
telemetry.record('command_used', {
|
|
127
|
-
name: 'view',
|
|
128
|
-
});
|
|
129
|
-
await telemetry.send();
|
|
130
|
-
startServer(cmdObj.port);
|
|
131
|
-
});
|
|
132
|
-
|
|
133
|
-
program
|
|
134
|
-
.command('share')
|
|
135
|
-
.description('Create a shareable URL of your most recent eval')
|
|
136
|
-
.option('-y, --yes', 'Skip confirmation')
|
|
137
|
-
.action(async (cmdObj: { yes: boolean } & Command) => {
|
|
138
|
-
telemetry.maybeShowNotice();
|
|
139
|
-
telemetry.record('command_used', {
|
|
140
|
-
name: 'share',
|
|
141
|
-
});
|
|
142
|
-
await telemetry.send();
|
|
143
|
-
|
|
144
|
-
const createPublicUrl = async () => {
|
|
145
|
-
const latestResults = readLatestResults();
|
|
146
|
-
if (!latestResults) {
|
|
147
|
-
logger.error('Could not load results. Do you need to run `promptfoo eval` first?');
|
|
148
|
-
process.exit(1);
|
|
149
|
-
}
|
|
150
|
-
const url = await createShareableUrl(latestResults.results, latestResults.config);
|
|
151
|
-
logger.info(`View results: ${chalk.greenBright.bold(url)}`);
|
|
152
|
-
};
|
|
153
|
-
|
|
154
|
-
if (cmdObj.yes || process.env.PROMPTFOO_DISABLE_SHARE_WARNING) {
|
|
155
|
-
createPublicUrl();
|
|
156
|
-
} else {
|
|
157
|
-
const reader = readline.createInterface({
|
|
158
|
-
input: process.stdin,
|
|
159
|
-
output: process.stdout,
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
reader.question(
|
|
163
|
-
'Are you sure you want to create a shareable URL of your most recent eval? Anyone you give this URL to will be able to view the results [Y/n] ',
|
|
164
|
-
async function (answer: string) {
|
|
165
|
-
if (answer.toLowerCase() !== 'yes' && answer.toLowerCase() !== 'y' && answer !== '') {
|
|
166
|
-
reader.close();
|
|
167
|
-
return;
|
|
168
|
-
}
|
|
169
|
-
reader.close();
|
|
170
|
-
|
|
171
|
-
createPublicUrl();
|
|
172
|
-
},
|
|
173
|
-
);
|
|
174
|
-
}
|
|
175
|
-
});
|
|
176
|
-
|
|
177
|
-
program
|
|
178
|
-
.command('cache')
|
|
179
|
-
.description('Manage cache')
|
|
180
|
-
.command('clear')
|
|
181
|
-
.description('Clear cache')
|
|
182
|
-
.action(async () => {
|
|
183
|
-
telemetry.maybeShowNotice();
|
|
184
|
-
await clearCache();
|
|
185
|
-
cleanupOldResults(0);
|
|
186
|
-
telemetry.record('command_used', {
|
|
187
|
-
name: 'cache_clear',
|
|
188
|
-
});
|
|
189
|
-
await telemetry.send();
|
|
190
|
-
});
|
|
191
|
-
|
|
192
|
-
program
|
|
193
|
-
.command('eval')
|
|
194
|
-
.description('Evaluate prompts')
|
|
195
|
-
.option('-p, --prompts <paths...>', 'Paths to prompt files (.txt)')
|
|
196
|
-
.option(
|
|
197
|
-
'-r, --providers <name or path...>',
|
|
198
|
-
'One of: openai:chat, openai:completion, openai:<model name>, or path to custom API caller module',
|
|
199
|
-
)
|
|
200
|
-
.option(
|
|
201
|
-
'-c, --config <path>',
|
|
202
|
-
'Path to configuration file. Automatically loads promptfoodefaultConfig.js/json/yaml',
|
|
203
|
-
)
|
|
204
|
-
.option(
|
|
205
|
-
// TODO(ian): Remove `vars` for v1
|
|
206
|
-
'-v, --vars, -t, --tests <path>',
|
|
207
|
-
'Path to CSV with test cases',
|
|
208
|
-
defaultConfig?.commandLineOptions?.vars,
|
|
209
|
-
)
|
|
210
|
-
.option('-t, --tests <path>', 'Path to CSV with test cases')
|
|
211
|
-
.option(
|
|
212
|
-
'-o, --output <path>',
|
|
213
|
-
'Path to output file (csv, json, yaml, html)',
|
|
214
|
-
defaultConfig.outputPath,
|
|
215
|
-
)
|
|
216
|
-
.option(
|
|
217
|
-
'-j, --max-concurrency <number>',
|
|
218
|
-
'Maximum number of concurrent API calls',
|
|
219
|
-
defaultConfig.evaluateOptions?.maxConcurrency
|
|
220
|
-
? String(defaultConfig.evaluateOptions.maxConcurrency)
|
|
221
|
-
: undefined,
|
|
222
|
-
)
|
|
223
|
-
.option(
|
|
224
|
-
'--repeat <number>',
|
|
225
|
-
'Number of times to run each test',
|
|
226
|
-
defaultConfig.evaluateOptions?.repeat
|
|
227
|
-
? String(defaultConfig.evaluateOptions.repeat)
|
|
228
|
-
: undefined,
|
|
229
|
-
)
|
|
230
|
-
.option(
|
|
231
|
-
'--table-cell-max-length <number>',
|
|
232
|
-
'Truncate console table cells to this length',
|
|
233
|
-
'250',
|
|
234
|
-
)
|
|
235
|
-
.option(
|
|
236
|
-
'--suggest-prompts <number>',
|
|
237
|
-
'Generate N new prompts and append them to the prompt list',
|
|
238
|
-
)
|
|
239
|
-
.option(
|
|
240
|
-
'--prompt-prefix <path>',
|
|
241
|
-
'This prefix is prepended to every prompt',
|
|
242
|
-
defaultConfig.defaultTest?.options?.prefix,
|
|
243
|
-
)
|
|
244
|
-
.option(
|
|
245
|
-
'--prompt-suffix <path>',
|
|
246
|
-
'This suffix is append to every prompt',
|
|
247
|
-
defaultConfig.defaultTest?.options?.suffix,
|
|
248
|
-
)
|
|
249
|
-
.option(
|
|
250
|
-
'--no-write',
|
|
251
|
-
'Do not write results to promptfoo directory',
|
|
252
|
-
defaultConfig?.commandLineOptions?.write,
|
|
253
|
-
)
|
|
254
|
-
.option(
|
|
255
|
-
'--no-cache',
|
|
256
|
-
'Do not read or write results to disk cache',
|
|
257
|
-
defaultConfig?.commandLineOptions?.cache,
|
|
258
|
-
)
|
|
259
|
-
.option('--no-progress-bar', 'Do not show progress bar')
|
|
260
|
-
.option('--no-table', 'Do not output table in CLI', defaultConfig?.commandLineOptions?.table)
|
|
261
|
-
.option('--share', 'Create a shareable URL', defaultConfig?.commandLineOptions?.share)
|
|
262
|
-
.option(
|
|
263
|
-
'--grader <provider>',
|
|
264
|
-
'Model that will grade outputs',
|
|
265
|
-
defaultConfig?.commandLineOptions?.grader,
|
|
266
|
-
)
|
|
267
|
-
.option('--verbose', 'Show debug logs', defaultConfig?.commandLineOptions?.verbose)
|
|
268
|
-
.option('--view [port]', 'View in browser ui')
|
|
269
|
-
.action(async (cmdObj: CommandLineOptions & Command) => {
|
|
270
|
-
// Misc settings
|
|
271
|
-
if (cmdObj.verbose) {
|
|
272
|
-
setLogLevel('debug');
|
|
273
|
-
}
|
|
274
|
-
if (!cmdObj.cache) {
|
|
275
|
-
disableCache();
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
// Config parsing
|
|
279
|
-
let fileConfig: Partial<UnifiedConfig> = {};
|
|
280
|
-
const configPath = cmdObj.config;
|
|
281
|
-
if (configPath) {
|
|
282
|
-
fileConfig = await readConfig(configPath);
|
|
283
|
-
}
|
|
284
|
-
const config: Partial<UnifiedConfig> = {
|
|
285
|
-
prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts,
|
|
286
|
-
providers: cmdObj.providers || fileConfig.providers || defaultConfig.providers,
|
|
287
|
-
tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests,
|
|
288
|
-
scenarios: fileConfig.scenarios || defaultConfig.scenarios,
|
|
289
|
-
sharing:
|
|
290
|
-
process.env.PROMPTFOO_DISABLE_SHARING === '1'
|
|
291
|
-
? false
|
|
292
|
-
: fileConfig.sharing ?? defaultConfig.sharing ?? true,
|
|
293
|
-
defaultTest: fileConfig.defaultTest || defaultConfig.defaultTest,
|
|
294
|
-
};
|
|
295
|
-
|
|
296
|
-
// Validation
|
|
297
|
-
if (!config.prompts || config.prompts.length === 0) {
|
|
298
|
-
logger.error(chalk.red('You must provide at least 1 prompt file'));
|
|
299
|
-
process.exit(1);
|
|
300
|
-
}
|
|
301
|
-
if (!config.providers || config.providers.length === 0) {
|
|
302
|
-
logger.error(
|
|
303
|
-
chalk.red('You must specify at least 1 provider (for example, openai:gpt-3.5-turbo)'),
|
|
304
|
-
);
|
|
305
|
-
process.exit(1);
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
// Parse prompts, providers, and tests
|
|
309
|
-
|
|
310
|
-
// Use basepath in cases where path was supplied in the config file
|
|
311
|
-
const basePath = configPath ? dirname(configPath) : '';
|
|
312
|
-
const parsedPrompts = readPrompts(config.prompts, cmdObj.prompts ? undefined : basePath);
|
|
313
|
-
const parsedProviders = await loadApiProviders(config.providers, basePath);
|
|
314
|
-
const parsedTests: TestCase[] = await readTests(
|
|
315
|
-
config.tests,
|
|
316
|
-
cmdObj.tests ? undefined : basePath,
|
|
317
|
-
);
|
|
318
|
-
|
|
319
|
-
// Parse testCases for each scenario
|
|
320
|
-
if (fileConfig.scenarios) {
|
|
321
|
-
for (const scenario of fileConfig.scenarios) {
|
|
322
|
-
const parsedScenarioTests: TestCase[] = await readTests(
|
|
323
|
-
scenario.tests,
|
|
324
|
-
cmdObj.tests ? undefined : basePath,
|
|
325
|
-
);
|
|
326
|
-
scenario.tests = parsedScenarioTests;
|
|
327
|
-
}
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
const parsedProviderPromptMap = readProviderPromptMap(config, parsedPrompts);
|
|
331
|
-
|
|
332
|
-
if (parsedPrompts.length === 0) {
|
|
333
|
-
logger.error(chalk.red('No prompts found'));
|
|
334
|
-
process.exit(1);
|
|
335
|
-
}
|
|
336
|
-
|
|
337
|
-
const defaultTest: TestCase = {
|
|
338
|
-
options: {
|
|
339
|
-
prefix: cmdObj.promptPrefix,
|
|
340
|
-
suffix: cmdObj.promptSuffix,
|
|
341
|
-
provider: cmdObj.grader,
|
|
342
|
-
// rubricPrompt
|
|
343
|
-
...(config.defaultTest?.options || {}),
|
|
344
|
-
},
|
|
345
|
-
...config.defaultTest,
|
|
346
|
-
};
|
|
347
|
-
|
|
348
|
-
const testSuite: TestSuite = {
|
|
349
|
-
description: config.description,
|
|
350
|
-
prompts: parsedPrompts,
|
|
351
|
-
providers: parsedProviders,
|
|
352
|
-
providerPromptMap: parsedProviderPromptMap,
|
|
353
|
-
tests: parsedTests,
|
|
354
|
-
scenarios: config.scenarios,
|
|
355
|
-
defaultTest,
|
|
356
|
-
};
|
|
357
|
-
|
|
358
|
-
const maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
|
|
359
|
-
const iterations = parseInt(cmdObj.repeat || '', 10);
|
|
360
|
-
const options: EvaluateOptions = {
|
|
361
|
-
showProgressBar:
|
|
362
|
-
typeof cmdObj.progressBar === 'undefined'
|
|
363
|
-
? getLogLevel() !== 'debug'
|
|
364
|
-
: cmdObj.progressBar,
|
|
365
|
-
maxConcurrency: !isNaN(maxConcurrency) && maxConcurrency > 0 ? maxConcurrency : undefined,
|
|
366
|
-
repeat: !isNaN(iterations) && iterations > 0 ? iterations : 1,
|
|
367
|
-
...evaluateOptions,
|
|
368
|
-
};
|
|
369
|
-
|
|
370
|
-
if (cmdObj.grader && testSuite.defaultTest) {
|
|
371
|
-
testSuite.defaultTest.options = testSuite.defaultTest.options || {};
|
|
372
|
-
testSuite.defaultTest.options.provider = await loadApiProvider(cmdObj.grader);
|
|
373
|
-
}
|
|
374
|
-
if (cmdObj.generateSuggestions) {
|
|
375
|
-
options.generateSuggestions = true;
|
|
376
|
-
}
|
|
377
|
-
|
|
378
|
-
const summary = await evaluate(testSuite, options);
|
|
379
|
-
|
|
380
|
-
const shareableUrl =
|
|
381
|
-
cmdObj.share && config.sharing ? await createShareableUrl(summary, config) : null;
|
|
382
|
-
|
|
383
|
-
if (cmdObj.output) {
|
|
384
|
-
logger.info(chalk.yellow(`Writing output to ${cmdObj.output}`));
|
|
385
|
-
writeOutput(cmdObj.output, summary, config, shareableUrl);
|
|
386
|
-
} else if (cmdObj.table && getLogLevel() !== 'debug') {
|
|
387
|
-
// Output table by default
|
|
388
|
-
const table = generateTable(summary, parseInt(cmdObj.tableCellMaxLength || '', 10));
|
|
389
|
-
|
|
390
|
-
logger.info('\n' + table.toString());
|
|
391
|
-
if (summary.table.body.length > 25) {
|
|
392
|
-
const rowsLeft = summary.table.body.length - 25;
|
|
393
|
-
logger.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
|
|
394
|
-
}
|
|
395
|
-
}
|
|
396
|
-
|
|
397
|
-
telemetry.maybeShowNotice();
|
|
398
|
-
|
|
399
|
-
const border = '='.repeat((process.stdout.columns || 80) - 10);
|
|
400
|
-
logger.info(border);
|
|
401
|
-
if (!cmdObj.write) {
|
|
402
|
-
logger.info(`${chalk.green('✔')} Evaluation complete`);
|
|
403
|
-
} else {
|
|
404
|
-
writeLatestResults(summary, config);
|
|
405
|
-
|
|
406
|
-
if (cmdObj.view) {
|
|
407
|
-
logger.info(`${chalk.green('✔')} Evaluation complete. Launching web viewer...`);
|
|
408
|
-
} else if (shareableUrl) {
|
|
409
|
-
logger.info(`${chalk.green('✔')} Evaluation complete: ${shareableUrl}`);
|
|
410
|
-
} else {
|
|
411
|
-
logger.info(`${chalk.green('✔')} Evaluation complete.\n`);
|
|
412
|
-
logger.info(`Run ${chalk.greenBright('promptfoo view')} to use the local web viewer`);
|
|
413
|
-
logger.info(`Run ${chalk.greenBright('promptfoo share')} to create a shareable URL`);
|
|
414
|
-
}
|
|
415
|
-
}
|
|
416
|
-
logger.info(border);
|
|
417
|
-
logger.info(chalk.green.bold(`Successes: ${summary.stats.successes}`));
|
|
418
|
-
logger.info(chalk.red.bold(`Failures: ${summary.stats.failures}`));
|
|
419
|
-
logger.info(
|
|
420
|
-
`Token usage: Total ${summary.stats.tokenUsage.total}, Prompt ${summary.stats.tokenUsage.prompt}, Completion ${summary.stats.tokenUsage.completion}, Cached ${summary.stats.tokenUsage.cached}`,
|
|
421
|
-
);
|
|
422
|
-
|
|
423
|
-
telemetry.record('command_used', {
|
|
424
|
-
name: 'eval',
|
|
425
|
-
});
|
|
426
|
-
await telemetry.send();
|
|
427
|
-
|
|
428
|
-
logger.info('Done.');
|
|
429
|
-
|
|
430
|
-
if (cmdObj.view) {
|
|
431
|
-
startServer(parseInt(cmdObj.view, 10) || 15500);
|
|
432
|
-
}
|
|
433
|
-
});
|
|
434
|
-
|
|
435
|
-
program.parse(process.argv);
|
|
436
|
-
|
|
437
|
-
if (!process.argv.slice(2).length) {
|
|
438
|
-
program.outputHelp();
|
|
439
|
-
}
|
|
440
|
-
}
|
|
441
|
-
|
|
442
|
-
main();
|
package/src/matchers.ts
DELETED
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
import { DefaultEmbeddingProvider, DefaultGradingProvider } from './providers/openai';
|
|
2
|
-
import { cosineSimilarity, getNunjucksEngine } from './util';
|
|
3
|
-
import { loadApiProvider } from './providers';
|
|
4
|
-
import { DEFAULT_GRADING_PROMPT } from './prompts';
|
|
5
|
-
|
|
6
|
-
import type { GradingConfig, GradingResult } from './types';
|
|
7
|
-
|
|
8
|
-
const nunjucks = getNunjucksEngine();
|
|
9
|
-
|
|
10
|
-
export async function matchesSimilarity(
|
|
11
|
-
expected: string,
|
|
12
|
-
output: string,
|
|
13
|
-
threshold: number,
|
|
14
|
-
inverse: boolean = false,
|
|
15
|
-
): Promise<Omit<GradingResult, 'assertion'>> {
|
|
16
|
-
const expectedEmbedding = await DefaultEmbeddingProvider.callEmbeddingApi(expected);
|
|
17
|
-
const outputEmbedding = await DefaultEmbeddingProvider.callEmbeddingApi(output);
|
|
18
|
-
|
|
19
|
-
const tokensUsed = {
|
|
20
|
-
total: (expectedEmbedding.tokenUsage?.total || 0) + (outputEmbedding.tokenUsage?.total || 0),
|
|
21
|
-
prompt: (expectedEmbedding.tokenUsage?.prompt || 0) + (outputEmbedding.tokenUsage?.prompt || 0),
|
|
22
|
-
completion:
|
|
23
|
-
(expectedEmbedding.tokenUsage?.completion || 0) +
|
|
24
|
-
(outputEmbedding.tokenUsage?.completion || 0),
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
if (expectedEmbedding.error || outputEmbedding.error) {
|
|
28
|
-
return {
|
|
29
|
-
pass: false,
|
|
30
|
-
score: 0,
|
|
31
|
-
reason:
|
|
32
|
-
expectedEmbedding.error || outputEmbedding.error || 'Unknown error fetching embeddings',
|
|
33
|
-
tokensUsed,
|
|
34
|
-
};
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
if (!expectedEmbedding.embedding || !outputEmbedding.embedding) {
|
|
38
|
-
return {
|
|
39
|
-
pass: false,
|
|
40
|
-
score: 0,
|
|
41
|
-
reason: 'Embedding not found',
|
|
42
|
-
tokensUsed,
|
|
43
|
-
};
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
const similarity = cosineSimilarity(expectedEmbedding.embedding, outputEmbedding.embedding);
|
|
47
|
-
const pass = inverse ? similarity <= threshold : similarity >= threshold;
|
|
48
|
-
const greaterThanReason = `Similarity ${similarity} is greater than threshold ${threshold}`;
|
|
49
|
-
const lessThanReason = `Similarity ${similarity} is less than threshold ${threshold}`;
|
|
50
|
-
if (pass) {
|
|
51
|
-
return {
|
|
52
|
-
pass: true,
|
|
53
|
-
score: inverse ? 1 - similarity : similarity,
|
|
54
|
-
reason: inverse ? lessThanReason : greaterThanReason,
|
|
55
|
-
tokensUsed,
|
|
56
|
-
};
|
|
57
|
-
}
|
|
58
|
-
return {
|
|
59
|
-
pass: false,
|
|
60
|
-
score: inverse ? 1 - similarity : similarity,
|
|
61
|
-
reason: inverse ? greaterThanReason : lessThanReason,
|
|
62
|
-
tokensUsed,
|
|
63
|
-
};
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
export async function matchesLlmRubric(
|
|
67
|
-
expected: string,
|
|
68
|
-
output: string,
|
|
69
|
-
grading?: GradingConfig,
|
|
70
|
-
): Promise<Omit<GradingResult, 'assertion'>> {
|
|
71
|
-
if (!grading) {
|
|
72
|
-
throw new Error(
|
|
73
|
-
'Cannot grade output without grading config. Specify --grader option or grading config.',
|
|
74
|
-
);
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
const prompt = nunjucks.renderString(grading.rubricPrompt || DEFAULT_GRADING_PROMPT, {
|
|
78
|
-
output: output.replace(/\n/g, '\\n').replace(/"/g, '\\"'),
|
|
79
|
-
rubric: expected.replace(/\n/g, '\\n').replace(/"/g, '\\"'),
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
let provider = grading.provider || DefaultGradingProvider;
|
|
83
|
-
if (typeof provider === 'string') {
|
|
84
|
-
provider = await loadApiProvider(provider);
|
|
85
|
-
}
|
|
86
|
-
const resp = await provider.callApi(prompt);
|
|
87
|
-
if (resp.error || !resp.output) {
|
|
88
|
-
return {
|
|
89
|
-
pass: false,
|
|
90
|
-
score: 0,
|
|
91
|
-
reason: resp.error || 'No output',
|
|
92
|
-
tokensUsed: {
|
|
93
|
-
total: resp.tokenUsage?.total || 0,
|
|
94
|
-
prompt: resp.tokenUsage?.prompt || 0,
|
|
95
|
-
completion: resp.tokenUsage?.completion || 0,
|
|
96
|
-
},
|
|
97
|
-
};
|
|
98
|
-
}
|
|
99
|
-
|
|
100
|
-
try {
|
|
101
|
-
const parsed = JSON.parse(resp.output) as Omit<GradingResult, 'score'>;
|
|
102
|
-
parsed.tokensUsed = {
|
|
103
|
-
total: resp.tokenUsage?.total || 0,
|
|
104
|
-
prompt: resp.tokenUsage?.prompt || 0,
|
|
105
|
-
completion: resp.tokenUsage?.completion || 0,
|
|
106
|
-
};
|
|
107
|
-
return { ...parsed, score: parsed.pass ? 1 : 0 };
|
|
108
|
-
} catch (err) {
|
|
109
|
-
return {
|
|
110
|
-
pass: false,
|
|
111
|
-
score: 0,
|
|
112
|
-
reason: `Output is not valid JSON: ${resp.output}`,
|
|
113
|
-
tokensUsed: {
|
|
114
|
-
total: resp.tokenUsage?.total || 0,
|
|
115
|
-
prompt: resp.tokenUsage?.prompt || 0,
|
|
116
|
-
completion: resp.tokenUsage?.completion || 0,
|
|
117
|
-
},
|
|
118
|
-
};
|
|
119
|
-
}
|
|
120
|
-
}
|
package/src/onboarding.ts
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
export const DEFAULT_PROMPTS = `Your first prompt goes here
|
|
2
|
-
---
|
|
3
|
-
Next prompt goes here. You can substitute variables like this: {{var1}} {{var2}} {{var3}}
|
|
4
|
-
---
|
|
5
|
-
This is the next prompt.
|
|
6
|
-
|
|
7
|
-
These prompts are nunjucks templates, so you can use logic like this:
|
|
8
|
-
{% if var1 %}
|
|
9
|
-
{{ var1 }}
|
|
10
|
-
{% endif %}
|
|
11
|
-
---
|
|
12
|
-
[
|
|
13
|
-
{"role": "system", "content": "This is another prompt. JSON is supported."},
|
|
14
|
-
{"role": "user", "content": "Using this format, you may construct multi-shot OpenAI prompts"}
|
|
15
|
-
{"role": "user", "content": "Variable substitution still works: {{ var3 }}"}
|
|
16
|
-
]
|
|
17
|
-
---
|
|
18
|
-
If you prefer, you can break prompts into multiple files (make sure to edit promptfooconfig.yaml accordingly)
|
|
19
|
-
`;
|
|
20
|
-
|
|
21
|
-
export const DEFAULT_YAML_CONFIG = `# This configuration runs each prompt through a series of example inputs and checks if they meet requirements.
|
|
22
|
-
|
|
23
|
-
prompts: [prompts.txt]
|
|
24
|
-
providers: [openai:gpt-3.5-turbo-0613]
|
|
25
|
-
tests:
|
|
26
|
-
- description: First test case - automatic review
|
|
27
|
-
vars:
|
|
28
|
-
var1: first variable's value
|
|
29
|
-
var2: another value
|
|
30
|
-
var3: some other value
|
|
31
|
-
assert:
|
|
32
|
-
- type: equals
|
|
33
|
-
value: expected LLM output goes here
|
|
34
|
-
- type: contains
|
|
35
|
-
value: some text
|
|
36
|
-
- type: javascript
|
|
37
|
-
value: 1 / (output.length + 1) # prefer shorter outputs
|
|
38
|
-
|
|
39
|
-
- description: Second test case - manual review
|
|
40
|
-
# Test cases don't need assertions if you prefer to manually review the output
|
|
41
|
-
vars:
|
|
42
|
-
var1: new value
|
|
43
|
-
var2: another value
|
|
44
|
-
var3: third value
|
|
45
|
-
|
|
46
|
-
- description: Third test case - other types of automatic review
|
|
47
|
-
vars:
|
|
48
|
-
var1: yet another value
|
|
49
|
-
var2: and another
|
|
50
|
-
var3: dear llm, please output your response in json format
|
|
51
|
-
assert:
|
|
52
|
-
- type: contains-json
|
|
53
|
-
- type: similarity
|
|
54
|
-
value: ensures that output is semantically similar to this text
|
|
55
|
-
- type: llm-rubric
|
|
56
|
-
value: ensure that output contains a reference to X
|
|
57
|
-
`;
|
|
58
|
-
|
|
59
|
-
export const DEFAULT_README = `To get started, set your OPENAI_API_KEY environment variable.
|
|
60
|
-
|
|
61
|
-
Next, change a few of the prompts in prompts.txt and edit promptfooconfig.yaml.
|
|
62
|
-
|
|
63
|
-
Then run:
|
|
64
|
-
\`\`\`
|
|
65
|
-
promptfoo eval
|
|
66
|
-
\`\`\`
|
|
67
|
-
|
|
68
|
-
Afterwards, you can view the results by running \`promptfoo view\`
|
|
69
|
-
`;
|
package/src/prompts.ts
DELETED
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
export const DEFAULT_GRADING_PROMPT = JSON.stringify([
|
|
2
|
-
{
|
|
3
|
-
role: 'system',
|
|
4
|
-
content: `You are grading output according to a user-specified rubric. If the statement in the rubric is true, then the output passes the test. You respond with a JSON object with this structure: {pass: boolean; reason: string;}.
|
|
5
|
-
|
|
6
|
-
Examples:
|
|
7
|
-
|
|
8
|
-
Output: Hello world
|
|
9
|
-
Rubric: Content contains a greeting
|
|
10
|
-
{"pass": true, "reason": "the content contains the word 'world'"}
|
|
11
|
-
|
|
12
|
-
Output: Avast ye swabs, repel the invaders!
|
|
13
|
-
Rubric: Does not speak like a pirate
|
|
14
|
-
{"pass": false, "reason": "'avast ye' is a common pirate term"}`,
|
|
15
|
-
},
|
|
16
|
-
{
|
|
17
|
-
role: 'user',
|
|
18
|
-
content: 'Output: {{ output }}\nRubric: {{ rubric }}',
|
|
19
|
-
},
|
|
20
|
-
]);
|
|
21
|
-
|
|
22
|
-
export const SUGGEST_PROMPTS_SYSTEM_MESSAGE = {
|
|
23
|
-
role: 'system',
|
|
24
|
-
content: `You're helping a scientist who is tuning a prompt for a large language model. You will receive messages, and each message is a full prompt. Generate a candidate variation of the given prompt. This variation will be tested for quality in order to select a winner.
|
|
25
|
-
|
|
26
|
-
Substantially revise the prompt, revising its structure and content however necessary to make it perform better, while preserving the original intent and including important details.
|
|
27
|
-
|
|
28
|
-
Your output is going to be copied directly into the program. It should contain the prompt ONLY`,
|
|
29
|
-
};
|
|
30
|
-
|
|
31
|
-
export const REFUSAL_PROMPT_SYSTEM_MESSAGE = {
|
|
32
|
-
role: 'system',
|
|
33
|
-
content: `In this task, you will be given a string of text produced by a large language model. Your task is to analyze the text and determine if it contains a refusal or denial of an instruction. This could be in the form of phrases like 'As an AI language model, I can't...', 'I'm unable to...', 'Sorry, but I can't...', etc. If such a refusal or denial is present in the text, return 'True'. If no such refusal is found, return 'False'.`,
|
|
34
|
-
};
|
|
35
|
-
|
|
36
|
-
export const AI_SELF_REFERENCE_PROMPT_SYSTEM_MESSAGE = {
|
|
37
|
-
role: 'system',
|
|
38
|
-
content: `In this task, you will be given a string of text produced by a large language model. Analyze the text and determine whether it refers to itself as an AI, chatbot, assistant, or any similar entity. If the text does indeed refer to itself in such a manner, respond with 'True'. Otherwise, respond with 'False'.`,
|
|
39
|
-
};
|