promptfoo 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/package.json +4 -4
- package/dist/src/assertions.d.ts.map +1 -1
- package/dist/src/assertions.js +5 -0
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/evaluator.js +1 -1
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/index.d.ts +1 -5
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1 -1
- package/dist/src/index.js.map +1 -1
- package/dist/src/matchers.d.ts +3 -2
- package/dist/src/matchers.d.ts.map +1 -1
- package/dist/src/matchers.js +37 -9
- package/dist/src/matchers.js.map +1 -1
- package/dist/src/providers/anthropic.d.ts +5 -3
- package/dist/src/providers/anthropic.d.ts.map +1 -1
- package/dist/src/providers/anthropic.js +8 -10
- package/dist/src/providers/anthropic.js.map +1 -1
- package/dist/src/providers/azureopenai.d.ts +9 -8
- package/dist/src/providers/azureopenai.d.ts.map +1 -1
- package/dist/src/providers/azureopenai.js +33 -36
- package/dist/src/providers/azureopenai.js.map +1 -1
- package/dist/src/providers/openai.d.ts +12 -12
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +54 -65
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/replicate.d.ts +4 -2
- package/dist/src/providers/replicate.d.ts.map +1 -1
- package/dist/src/providers/replicate.js +10 -8
- package/dist/src/providers/replicate.js.map +1 -1
- package/dist/src/providers/webhook.d.ts +9 -0
- package/dist/src/providers/webhook.d.ts.map +1 -0
- package/dist/src/providers/webhook.js +54 -0
- package/dist/src/providers/webhook.js.map +1 -0
- package/dist/src/providers.d.ts +1 -1
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +36 -28
- package/dist/src/providers.js.map +1 -1
- package/dist/src/suggestions.d.ts.map +1 -1
- package/dist/src/suggestions.js +1 -3
- package/dist/src/suggestions.js.map +1 -1
- package/dist/src/types.d.ts +7 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.js +1 -1
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/nextui/404/index.html +1 -1
- package/dist/src/web/nextui/404.html +1 -1
- package/dist/src/web/nextui/_next/static/Bl3o5lF4ON7Fjki46lPhr/_buildManifest.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/226-7bbb6c98a19542fd.js +37 -0
- package/dist/src/web/nextui/_next/static/chunks/249-ea9c0f034888ccff.js +125 -0
- package/dist/src/web/nextui/_next/static/chunks/339-501c32916b785ef1.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/365-e426ea5bc7e815fc.js +8 -0
- package/dist/src/web/nextui/_next/static/chunks/396-0a51429a01e24cdd.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/596-297f7ff4a0436e87.js +25 -0
- package/dist/src/web/nextui/_next/static/chunks/613-572c22424de64659.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/706-ae1d3352d28419e9.js +9 -0
- package/dist/src/web/nextui/_next/static/chunks/891-7035926a62c1c4e0.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-366629541fd598e9.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-319d2ee38d37574e.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-a6b1ff91723b7beb.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/layout-024c4adc71c9feb0.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/page-1ae60660130041b2.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-6ef16148040bf4f4.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/{ca377847-cb6ae6a6a073aebb.js → ca377847-26b462611379a4f7.js} +3 -3
- package/dist/src/web/nextui/_next/static/chunks/{fd9d1056-ac777be631f5a9e9.js → fd9d1056-fba4b53a2f01213b.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/framework-8883d1e9be70c3da.js +25 -0
- package/dist/src/web/nextui/_next/static/chunks/main-8ea85465d428ecfe.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/main-app-581ccf0003955b21.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/pages/_app-52924524f99094ab.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/pages/_error-c92d5c4bb2b49926.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/webpack-55c264ce2fd85eb7.js +1 -0
- package/dist/src/web/nextui/_next/static/css/4d399fceacd06992.css +1 -0
- package/dist/src/web/nextui/eval/index.html +1 -1
- package/dist/src/web/nextui/eval/index.txt +6 -6
- package/dist/src/web/nextui/index.html +1 -1
- package/dist/src/web/nextui/index.txt +5 -5
- package/dist/src/web/nextui/setup/index.html +27 -1
- package/dist/src/web/nextui/setup/index.txt +9 -9
- package/dist/src/web/server.d.ts.map +1 -1
- package/dist/src/web/server.js +9 -5
- package/dist/src/web/server.js.map +1 -1
- package/package.json +4 -4
- package/dist/src/web/nextui/_next/static/US6gOx8LHTX_Hzm9aYNrC/_buildManifest.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/339-4fc8a80fa840e771.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/373-8a280796c0f2d1af.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/583-125d32af505e9bc4.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/596-07e4a23a5c6cdf04.js +0 -25
- package/dist/src/web/nextui/_next/static/chunks/658-a62210d07dc4dcb6.js +0 -15
- package/dist/src/web/nextui/_next/static/chunks/707-699cbd84b259c37b.js +0 -37
- package/dist/src/web/nextui/_next/static/chunks/858-ceb6fa22e614492b.js +0 -125
- package/dist/src/web/nextui/_next/static/chunks/891-3000ea7c0a292558.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-50e40614fa05600e.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-c19c44ed1b2dfb58.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d4a1813b2f8c4532.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/layout-664a8d716d2d24b1.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/page-1f8ef6a00a2355f0.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-182018a3c6397345.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/framework-43665103d101a22d.js +0 -25
- package/dist/src/web/nextui/_next/static/chunks/main-50cc0a98559591ce.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/main-app-c9dc13756d166550.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/pages/_app-6b79a29ad0d63b21.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/pages/_error-9aeb3e4d490fe4b8.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/webpack-6e474e42be502dd7.js +0 -1
- package/dist/src/web/nextui/_next/static/css/a35c840ac696f161.css +0 -1
- package/dist/src/web/nextui/api +0 -1
- package/src/__mocks__/esm.ts +0 -3
- package/src/assertions.ts +0 -580
- package/src/cache.ts +0 -109
- package/src/esm.ts +0 -13
- package/src/evaluator.ts +0 -500
- package/src/index.ts +0 -52
- package/src/logger.ts +0 -46
- package/src/main.ts +0 -442
- package/src/matchers.ts +0 -120
- package/src/onboarding.ts +0 -69
- package/src/prompts.ts +0 -39
- package/src/providers/anthropic.ts +0 -88
- package/src/providers/azureopenai.ts +0 -299
- package/src/providers/llama.ts +0 -95
- package/src/providers/localai.ts +0 -111
- package/src/providers/ollama.ts +0 -89
- package/src/providers/openai.ts +0 -337
- package/src/providers/replicate.ts +0 -99
- package/src/providers/scriptCompletion.ts +0 -35
- package/src/providers/shared.ts +0 -34
- package/src/providers.ts +0 -192
- package/src/share.ts +0 -27
- package/src/suggestions.ts +0 -63
- package/src/table.ts +0 -43
- package/src/tableOutput.html +0 -52
- package/src/telemetry.ts +0 -70
- package/src/types.ts +0 -299
- package/src/updates.ts +0 -46
- package/src/util.ts +0 -543
- package/src/web/nextui/.eslintrc.json +0 -3
- package/src/web/nextui/next.config.js +0 -14
- package/src/web/nextui/package-lock.json +0 -4644
- package/src/web/nextui/package.json +0 -47
- package/src/web/nextui/public/favicon.ico +0 -0
- package/src/web/nextui/public/logo.svg +0 -30
- package/src/web/nextui/src/app/Home.css +0 -3
- package/src/web/nextui/src/app/api/route.ts +0 -6
- package/src/web/nextui/src/app/components/DarkMode.css +0 -22
- package/src/web/nextui/src/app/components/DarkMode.tsx +0 -17
- package/src/web/nextui/src/app/components/Logo.css +0 -32
- package/src/web/nextui/src/app/components/Logo.tsx +0 -11
- package/src/web/nextui/src/app/components/PageShell.css +0 -33
- package/src/web/nextui/src/app/components/PageShell.tsx +0 -87
- package/src/web/nextui/src/app/eval/ConfigModal.tsx +0 -84
- package/src/web/nextui/src/app/eval/Eval.css +0 -13
- package/src/web/nextui/src/app/eval/Eval.tsx +0 -79
- package/src/web/nextui/src/app/eval/EvalOutputPromptDialog.tsx +0 -127
- package/src/web/nextui/src/app/eval/ResultsCharts.tsx +0 -355
- package/src/web/nextui/src/app/eval/ResultsTable.css +0 -179
- package/src/web/nextui/src/app/eval/ResultsTable.tsx +0 -503
- package/src/web/nextui/src/app/eval/ResultsView.tsx +0 -301
- package/src/web/nextui/src/app/eval/ShareModal.tsx +0 -70
- package/src/web/nextui/src/app/eval/[id]/not-found.tsx +0 -5
- package/src/web/nextui/src/app/eval/[id]/page.css +0 -9
- package/src/web/nextui/src/app/eval/[id]/page.tsx +0 -20
- package/src/web/nextui/src/app/eval/index.css +0 -0
- package/src/web/nextui/src/app/eval/page.tsx +0 -8
- package/src/web/nextui/src/app/eval/store.ts +0 -18
- package/src/web/nextui/src/app/eval/types.ts +0 -20
- package/src/web/nextui/src/app/globals.css +0 -58
- package/src/web/nextui/src/app/layout.tsx +0 -25
- package/src/web/nextui/src/app/page.tsx +0 -7
- package/src/web/nextui/src/app/setup/AssertsForm.tsx +0 -118
- package/src/web/nextui/src/app/setup/PromptDialog.tsx +0 -77
- package/src/web/nextui/src/app/setup/PromptsSection.tsx +0 -190
- package/src/web/nextui/src/app/setup/ProviderConfigDialog.tsx +0 -99
- package/src/web/nextui/src/app/setup/ProviderSelector.tsx +0 -149
- package/src/web/nextui/src/app/setup/RunTestSuiteButton.tsx +0 -88
- package/src/web/nextui/src/app/setup/TestCaseDialog.tsx +0 -108
- package/src/web/nextui/src/app/setup/TestCasesSection.tsx +0 -154
- package/src/web/nextui/src/app/setup/VarsForm.tsx +0 -57
- package/src/web/nextui/src/app/setup/page.css +0 -3
- package/src/web/nextui/src/app/setup/page.tsx +0 -160
- package/src/web/nextui/src/util/api.ts +0 -1
- package/src/web/nextui/src/util/store.ts +0 -53
- package/src/web/nextui/tsconfig.json +0 -28
- package/src/web/server.ts +0 -151
- /package/dist/src/web/nextui/_next/static/{US6gOx8LHTX_Hzm9aYNrC → Bl3o5lF4ON7Fjki46lPhr}/_ssgManifest.js +0 -0
package/src/share.ts
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import fetch from 'node-fetch';
|
|
2
|
-
|
|
3
|
-
import type { EvaluateSummary, SharedResults, UnifiedConfig } from './types';
|
|
4
|
-
|
|
5
|
-
export async function createShareableUrl(
|
|
6
|
-
results: EvaluateSummary,
|
|
7
|
-
config: Partial<UnifiedConfig>,
|
|
8
|
-
): Promise<string> {
|
|
9
|
-
const sharedResults: SharedResults = {
|
|
10
|
-
data: {
|
|
11
|
-
version: 1,
|
|
12
|
-
results,
|
|
13
|
-
config,
|
|
14
|
-
},
|
|
15
|
-
};
|
|
16
|
-
|
|
17
|
-
const response = await fetch('https://api.promptfoo.dev/eval', {
|
|
18
|
-
method: 'POST',
|
|
19
|
-
headers: {
|
|
20
|
-
'Content-Type': 'application/json',
|
|
21
|
-
},
|
|
22
|
-
body: JSON.stringify(sharedResults),
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
const { id } = (await response.json()) as { id: string };
|
|
26
|
-
return `https://app.promptfoo.dev/eval/${id}`;
|
|
27
|
-
}
|
package/src/suggestions.ts
DELETED
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
import { SUGGEST_PROMPTS_SYSTEM_MESSAGE } from './prompts';
|
|
2
|
-
import { DefaultSuggestionsProvider } from './providers/openai';
|
|
3
|
-
|
|
4
|
-
import type { TokenUsage } from './types';
|
|
5
|
-
|
|
6
|
-
const DEFAULT_TEMPERATURE = 0.9;
|
|
7
|
-
|
|
8
|
-
interface GeneratePromptsOutput {
|
|
9
|
-
prompts?: string[];
|
|
10
|
-
error?: string;
|
|
11
|
-
tokensUsed: TokenUsage;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
export async function generatePrompts(prompt: string, num: number): Promise<GeneratePromptsOutput> {
|
|
15
|
-
const provider = DefaultSuggestionsProvider;
|
|
16
|
-
|
|
17
|
-
const resp = await provider.callApi(
|
|
18
|
-
JSON.stringify([
|
|
19
|
-
SUGGEST_PROMPTS_SYSTEM_MESSAGE,
|
|
20
|
-
{
|
|
21
|
-
role: 'user',
|
|
22
|
-
content: 'Generate a variant for the following prompt:',
|
|
23
|
-
},
|
|
24
|
-
{
|
|
25
|
-
role: 'user',
|
|
26
|
-
content: prompt,
|
|
27
|
-
},
|
|
28
|
-
]),
|
|
29
|
-
{
|
|
30
|
-
temperature: DEFAULT_TEMPERATURE,
|
|
31
|
-
},
|
|
32
|
-
);
|
|
33
|
-
if (resp.error || !resp.output) {
|
|
34
|
-
return {
|
|
35
|
-
error: resp.error || 'Unknown error',
|
|
36
|
-
tokensUsed: {
|
|
37
|
-
total: resp.tokenUsage?.total || 0,
|
|
38
|
-
prompt: resp.tokenUsage?.prompt || 0,
|
|
39
|
-
completion: resp.tokenUsage?.completion || 0,
|
|
40
|
-
},
|
|
41
|
-
};
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
try {
|
|
45
|
-
return {
|
|
46
|
-
prompts: [resp.output],
|
|
47
|
-
tokensUsed: {
|
|
48
|
-
total: resp.tokenUsage?.total || 0,
|
|
49
|
-
prompt: resp.tokenUsage?.prompt || 0,
|
|
50
|
-
completion: resp.tokenUsage?.completion || 0,
|
|
51
|
-
},
|
|
52
|
-
};
|
|
53
|
-
} catch (err) {
|
|
54
|
-
return {
|
|
55
|
-
error: `Output is not valid JSON: ${resp.output}`,
|
|
56
|
-
tokensUsed: {
|
|
57
|
-
total: resp.tokenUsage?.total || 0,
|
|
58
|
-
prompt: resp.tokenUsage?.prompt || 0,
|
|
59
|
-
completion: resp.tokenUsage?.completion || 0,
|
|
60
|
-
},
|
|
61
|
-
};
|
|
62
|
-
}
|
|
63
|
-
}
|
package/src/table.ts
DELETED
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
import Table from 'cli-table3';
|
|
2
|
-
import chalk from 'chalk';
|
|
3
|
-
import type { EvaluateSummary } from './types';
|
|
4
|
-
|
|
5
|
-
export function generateTable(summary: EvaluateSummary, tableCellMaxLength = 250, maxRows = 25) {
|
|
6
|
-
const maxWidth = process.stdout.columns ? process.stdout.columns - 10 : 120;
|
|
7
|
-
const head = summary.table.head;
|
|
8
|
-
const headLength = head.prompts.length + head.vars.length;
|
|
9
|
-
const table = new Table({
|
|
10
|
-
head: [...head.vars, ...head.prompts.map((prompt) => prompt.display)],
|
|
11
|
-
colWidths: Array(headLength).fill(Math.floor(maxWidth / headLength)),
|
|
12
|
-
wordWrap: true,
|
|
13
|
-
wrapOnWordBoundary: false,
|
|
14
|
-
style: {
|
|
15
|
-
head: ['blue', 'bold'],
|
|
16
|
-
},
|
|
17
|
-
});
|
|
18
|
-
// Skip first row (header) and add the rest. Color PASS/FAIL
|
|
19
|
-
for (const row of summary.table.body.slice(0, maxRows)) {
|
|
20
|
-
table.push([
|
|
21
|
-
...row.vars,
|
|
22
|
-
...row.outputs.map(({ pass, score, text }) => {
|
|
23
|
-
if (text.length > tableCellMaxLength) {
|
|
24
|
-
text = text.slice(0, tableCellMaxLength) + '...';
|
|
25
|
-
}
|
|
26
|
-
if (pass) {
|
|
27
|
-
return chalk.green('[PASS] ') + text;
|
|
28
|
-
} else if (!pass) {
|
|
29
|
-
// color everything red up until '---'
|
|
30
|
-
return (
|
|
31
|
-
chalk.red('[FAIL] ') +
|
|
32
|
-
text
|
|
33
|
-
.split('---')
|
|
34
|
-
.map((c, idx) => (idx === 0 ? chalk.red.bold(c) : c))
|
|
35
|
-
.join('---')
|
|
36
|
-
);
|
|
37
|
-
}
|
|
38
|
-
return text;
|
|
39
|
-
}),
|
|
40
|
-
]);
|
|
41
|
-
}
|
|
42
|
-
return table;
|
|
43
|
-
}
|
package/src/tableOutput.html
DELETED
|
@@ -1,52 +0,0 @@
|
|
|
1
|
-
<!DOCTYPE html>
|
|
2
|
-
<html>
|
|
3
|
-
<head>
|
|
4
|
-
<meta charset="utf-8" />
|
|
5
|
-
<meta name="viewport" content="width=device-width" />
|
|
6
|
-
<title>Table Output</title>
|
|
7
|
-
<style>
|
|
8
|
-
body {
|
|
9
|
-
font-family: -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Helvetica, Arial,
|
|
10
|
-
sans-serif;
|
|
11
|
-
}
|
|
12
|
-
table,
|
|
13
|
-
th,
|
|
14
|
-
td {
|
|
15
|
-
border: 1px solid black;
|
|
16
|
-
border-collapse: collapse;
|
|
17
|
-
text-align: left;
|
|
18
|
-
word-break: break-all;
|
|
19
|
-
}
|
|
20
|
-
th,
|
|
21
|
-
td {
|
|
22
|
-
padding: 5px;
|
|
23
|
-
min-width: 200px;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
tr > td[data-content^='[PASS]'] {
|
|
27
|
-
color: green;
|
|
28
|
-
}
|
|
29
|
-
tr > td[data-content^='[FAIL]'] {
|
|
30
|
-
color: #ad0000;
|
|
31
|
-
}
|
|
32
|
-
</style>
|
|
33
|
-
</head>
|
|
34
|
-
<body>
|
|
35
|
-
<table>
|
|
36
|
-
<thead>
|
|
37
|
-
{% for header in table[0] %}
|
|
38
|
-
<th>{{ header }}</th>
|
|
39
|
-
{% endfor %}
|
|
40
|
-
</thead>
|
|
41
|
-
<tbody>
|
|
42
|
-
{% for row in table.slice(1) %}
|
|
43
|
-
<tr>
|
|
44
|
-
{% for cell in row %}
|
|
45
|
-
<td data-content="{{cell}}">{{ cell }}</td>
|
|
46
|
-
{% endfor %}
|
|
47
|
-
</tr>
|
|
48
|
-
{% endfor %}
|
|
49
|
-
</tbody>
|
|
50
|
-
</table>
|
|
51
|
-
</body>
|
|
52
|
-
</html>
|
package/src/telemetry.ts
DELETED
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
import chalk from 'chalk';
|
|
2
|
-
|
|
3
|
-
import packageJson from '../package.json';
|
|
4
|
-
import logger from './logger';
|
|
5
|
-
import { fetchWithTimeout, maybeRecordFirstRun } from './util';
|
|
6
|
-
|
|
7
|
-
type TelemetryEvent = {
|
|
8
|
-
event: string;
|
|
9
|
-
packageVersion: string;
|
|
10
|
-
properties: Record<string, string | number>;
|
|
11
|
-
};
|
|
12
|
-
|
|
13
|
-
type TelemetryEventTypes = 'eval_ran' | 'assertion_used' | 'command_used';
|
|
14
|
-
|
|
15
|
-
const TELEMETRY_ENDPOINT = 'https://api.promptfoo.dev/telemetry';
|
|
16
|
-
|
|
17
|
-
const TELEMETRY_TIMEOUT_MS = 1000;
|
|
18
|
-
|
|
19
|
-
export class Telemetry {
|
|
20
|
-
private events: TelemetryEvent[] = [];
|
|
21
|
-
|
|
22
|
-
get disabled() {
|
|
23
|
-
return process.env.PROMPTFOO_DISABLE_TELEMETRY === '1';
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
record(eventName: TelemetryEventTypes, properties: Record<string, string | number>): void {
|
|
27
|
-
if (!this.disabled) {
|
|
28
|
-
this.events.push({
|
|
29
|
-
event: eventName,
|
|
30
|
-
packageVersion: packageJson.version,
|
|
31
|
-
properties,
|
|
32
|
-
});
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
maybeShowNotice(): void {
|
|
37
|
-
if (maybeRecordFirstRun()) {
|
|
38
|
-
logger.info(
|
|
39
|
-
chalk.gray(
|
|
40
|
-
'Anonymous telemetry is enabled. For more info, see https://www.promptfoo.dev/docs/configuration/telemetry',
|
|
41
|
-
),
|
|
42
|
-
);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
async send(): Promise<void> {
|
|
47
|
-
if (!this.disabled && this.events.length > 0) {
|
|
48
|
-
try {
|
|
49
|
-
const response = await fetchWithTimeout(
|
|
50
|
-
TELEMETRY_ENDPOINT,
|
|
51
|
-
{
|
|
52
|
-
method: 'POST',
|
|
53
|
-
headers: {
|
|
54
|
-
'Content-Type': 'application/json',
|
|
55
|
-
},
|
|
56
|
-
body: JSON.stringify(this.events),
|
|
57
|
-
},
|
|
58
|
-
TELEMETRY_TIMEOUT_MS,
|
|
59
|
-
);
|
|
60
|
-
|
|
61
|
-
if (response.ok) {
|
|
62
|
-
this.events = [];
|
|
63
|
-
}
|
|
64
|
-
} catch (err) {}
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
const telemetry = new Telemetry();
|
|
70
|
-
export default telemetry;
|
package/src/types.ts
DELETED
|
@@ -1,299 +0,0 @@
|
|
|
1
|
-
export interface CommandLineOptions {
|
|
2
|
-
// Shared with TestSuite
|
|
3
|
-
prompts: string[];
|
|
4
|
-
providers: string[];
|
|
5
|
-
output: string;
|
|
6
|
-
|
|
7
|
-
// Shared with EvaluateOptions
|
|
8
|
-
maxConcurrency: string;
|
|
9
|
-
repeat: string;
|
|
10
|
-
|
|
11
|
-
// Command line only
|
|
12
|
-
vars?: string;
|
|
13
|
-
tests?: string;
|
|
14
|
-
config?: string;
|
|
15
|
-
verbose?: boolean;
|
|
16
|
-
grader?: string;
|
|
17
|
-
view?: string;
|
|
18
|
-
tableCellMaxLength?: string;
|
|
19
|
-
write?: boolean;
|
|
20
|
-
cache?: boolean;
|
|
21
|
-
table?: boolean;
|
|
22
|
-
share?: boolean;
|
|
23
|
-
progressBar?: boolean;
|
|
24
|
-
|
|
25
|
-
generateSuggestions?: boolean;
|
|
26
|
-
promptPrefix?: string;
|
|
27
|
-
promptSuffix?: string;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
export interface ProviderOptions {
|
|
31
|
-
id?: ProviderId;
|
|
32
|
-
config?: any;
|
|
33
|
-
prompts?: string[]; // List of prompt display strings
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export interface ApiProvider {
|
|
37
|
-
id: () => string;
|
|
38
|
-
callApi: (prompt: string) => Promise<ProviderResponse>;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
export interface TokenUsage {
|
|
42
|
-
total: number;
|
|
43
|
-
prompt: number;
|
|
44
|
-
completion: number;
|
|
45
|
-
cached?: number;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
export interface ProviderResponse {
|
|
49
|
-
error?: string;
|
|
50
|
-
output?: string;
|
|
51
|
-
tokenUsage?: Partial<TokenUsage>;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
export interface ProviderEmbeddingResponse {
|
|
55
|
-
error?: string;
|
|
56
|
-
embedding?: number[];
|
|
57
|
-
tokenUsage?: Partial<TokenUsage>;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
export interface CsvRow {
|
|
61
|
-
[key: string]: string;
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
export type VarMapping = Record<string, string>;
|
|
65
|
-
|
|
66
|
-
export interface GradingConfig {
|
|
67
|
-
rubricPrompt?: string;
|
|
68
|
-
provider?: string | ApiProvider;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
export interface PromptConfig {
|
|
72
|
-
prefix?: string;
|
|
73
|
-
suffix?: string;
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
export interface OutputConfig {
|
|
77
|
-
postprocess?: string;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
export interface EvaluateOptions {
|
|
81
|
-
maxConcurrency?: number;
|
|
82
|
-
showProgressBar?: boolean;
|
|
83
|
-
progressCallback?: (progress: number, total: number) => void;
|
|
84
|
-
generateSuggestions?: boolean;
|
|
85
|
-
repeat?: number;
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
export interface Prompt {
|
|
89
|
-
raw: string;
|
|
90
|
-
display: string;
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
export interface EvaluateResult {
|
|
94
|
-
prompt: Prompt;
|
|
95
|
-
vars: Record<string, string | object>;
|
|
96
|
-
response?: ProviderResponse;
|
|
97
|
-
error?: string;
|
|
98
|
-
success: boolean;
|
|
99
|
-
score: number;
|
|
100
|
-
latencyMs: number;
|
|
101
|
-
gradingResult?: GradingResult;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
export interface EvaluateTableOutput {
|
|
105
|
-
pass: boolean;
|
|
106
|
-
score: number;
|
|
107
|
-
text: string;
|
|
108
|
-
prompt: string;
|
|
109
|
-
latencyMs: number;
|
|
110
|
-
tokenUsage?: Partial<TokenUsage>;
|
|
111
|
-
gradingResult?: GradingResult;
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
export interface EvaluateTable {
|
|
115
|
-
head: {
|
|
116
|
-
prompts: Prompt[];
|
|
117
|
-
vars: string[];
|
|
118
|
-
};
|
|
119
|
-
|
|
120
|
-
body: {
|
|
121
|
-
outputs: EvaluateTableOutput[];
|
|
122
|
-
vars: string[];
|
|
123
|
-
}[];
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
export interface EvaluateStats {
|
|
127
|
-
successes: number;
|
|
128
|
-
failures: number;
|
|
129
|
-
tokenUsage: Required<TokenUsage>;
|
|
130
|
-
}
|
|
131
|
-
|
|
132
|
-
export interface EvaluateSummary {
|
|
133
|
-
version: number;
|
|
134
|
-
results: EvaluateResult[];
|
|
135
|
-
table: EvaluateTable;
|
|
136
|
-
stats: EvaluateStats;
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
export interface GradingResult {
|
|
140
|
-
pass: boolean;
|
|
141
|
-
score: number;
|
|
142
|
-
reason: string;
|
|
143
|
-
tokensUsed?: TokenUsage;
|
|
144
|
-
componentResults?: GradingResult[];
|
|
145
|
-
assertion: Assertion | null;
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
type BaseAssertionTypes =
|
|
149
|
-
| 'equals'
|
|
150
|
-
| 'contains'
|
|
151
|
-
| 'icontains'
|
|
152
|
-
| 'contains-all'
|
|
153
|
-
| 'contains-any'
|
|
154
|
-
| 'starts-with'
|
|
155
|
-
| 'regex'
|
|
156
|
-
| 'is-json'
|
|
157
|
-
| 'contains-json'
|
|
158
|
-
| 'javascript'
|
|
159
|
-
| 'python'
|
|
160
|
-
| 'similar'
|
|
161
|
-
| 'llm-rubric'
|
|
162
|
-
| 'webhook'
|
|
163
|
-
| 'rouge-n'
|
|
164
|
-
| 'rouge-s'
|
|
165
|
-
| 'rouge-l'
|
|
166
|
-
| 'levenshtein';
|
|
167
|
-
|
|
168
|
-
type NotPrefixed<T extends string> = `not-${T}`;
|
|
169
|
-
|
|
170
|
-
export type AssertionType = BaseAssertionTypes | NotPrefixed<BaseAssertionTypes>;
|
|
171
|
-
|
|
172
|
-
// TODO(ian): maybe Assertion should support {type: config} to make the yaml cleaner
|
|
173
|
-
export interface Assertion {
|
|
174
|
-
// Type of assertion
|
|
175
|
-
type: AssertionType;
|
|
176
|
-
|
|
177
|
-
// The expected value, if applicable
|
|
178
|
-
value?:
|
|
179
|
-
| string
|
|
180
|
-
| string[]
|
|
181
|
-
| object
|
|
182
|
-
| ((output: string, testCase: AtomicTestCase, assertion: Assertion) => Promise<GradingResult>);
|
|
183
|
-
|
|
184
|
-
// The threshold value, only applicable for similarity (cosine distance)
|
|
185
|
-
threshold?: number;
|
|
186
|
-
|
|
187
|
-
// The weight of this assertion compared to other assertions in the test case. Defaults to 1.
|
|
188
|
-
weight?: number;
|
|
189
|
-
|
|
190
|
-
// Some assertions (similarity, llm-rubric) require an LLM provider
|
|
191
|
-
provider?: GradingConfig['provider'];
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
// Each test case is graded pass/fail. A test case represents a unique input to the LLM after substituting `vars` in the prompt.
|
|
195
|
-
export interface TestCase {
|
|
196
|
-
// Optional description of what you're testing
|
|
197
|
-
description?: string;
|
|
198
|
-
|
|
199
|
-
// Key-value pairs to substitute in the prompt
|
|
200
|
-
vars?: Record<string, string | string[] | object>;
|
|
201
|
-
|
|
202
|
-
// Optional list of automatic checks to run on the LLM output
|
|
203
|
-
assert?: Assertion[];
|
|
204
|
-
|
|
205
|
-
// Additional configuration settings for the prompt
|
|
206
|
-
options?: PromptConfig & OutputConfig & GradingConfig;
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
export interface Scenario {
|
|
210
|
-
// Optional description of what you're testing
|
|
211
|
-
description?: string;
|
|
212
|
-
|
|
213
|
-
// Default test case config
|
|
214
|
-
config: Partial<TestCase>[];
|
|
215
|
-
|
|
216
|
-
// Optional list of automatic checks to run on the LLM output
|
|
217
|
-
tests: TestCase[];
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
// Same as a TestCase, except the `vars` object has been flattened into its final form.
|
|
221
|
-
export interface AtomicTestCase extends TestCase {
|
|
222
|
-
vars?: Record<string, string | object>;
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
// The test suite defines the "knobs" that we are tuning in prompt engineering: providers and prompts
|
|
226
|
-
export interface TestSuite {
|
|
227
|
-
// Optional description of what your LLM is trying to do
|
|
228
|
-
description?: string;
|
|
229
|
-
|
|
230
|
-
// One or more LLM APIs to use
|
|
231
|
-
providers: ApiProvider[];
|
|
232
|
-
|
|
233
|
-
// One or more prompt strings
|
|
234
|
-
prompts: Prompt[];
|
|
235
|
-
|
|
236
|
-
// Optional mapping of provider to prompt display strings. If not provided,
|
|
237
|
-
// all prompts are used for all providers.
|
|
238
|
-
providerPromptMap?: Record<string, string[]>;
|
|
239
|
-
|
|
240
|
-
// Test cases
|
|
241
|
-
tests?: TestCase[];
|
|
242
|
-
|
|
243
|
-
// scenarios
|
|
244
|
-
scenarios?: Scenario[];
|
|
245
|
-
|
|
246
|
-
// Default test case config
|
|
247
|
-
defaultTest?: Partial<TestCase>;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
export type ProviderId = string;
|
|
251
|
-
|
|
252
|
-
export type ProviderFunction = (prompt: string) => Promise<ProviderResponse>;
|
|
253
|
-
|
|
254
|
-
export type ProviderOptionsMap = Record<ProviderId, ProviderOptions>;
|
|
255
|
-
|
|
256
|
-
// TestSuiteConfig = Test Suite, but before everything is parsed and resolved. Providers are just strings, prompts are filepaths, tests can be filepath or inline.
|
|
257
|
-
export interface TestSuiteConfig {
|
|
258
|
-
// Optional description of what your LLM is trying to do
|
|
259
|
-
description?: string;
|
|
260
|
-
|
|
261
|
-
// One or more LLM APIs to use, for example: openai:gpt-3.5-turbo, openai:gpt-4, localai:chat:vicuna
|
|
262
|
-
providers:
|
|
263
|
-
| ProviderId
|
|
264
|
-
| ProviderId[]
|
|
265
|
-
| ProviderOptionsMap[]
|
|
266
|
-
| ProviderOptions[]
|
|
267
|
-
| ProviderFunction;
|
|
268
|
-
|
|
269
|
-
// One or more prompt files to load
|
|
270
|
-
prompts: string | string[];
|
|
271
|
-
|
|
272
|
-
// Path to a test file, OR list of LLM prompt variations (aka "test case")
|
|
273
|
-
tests: string | string[] | TestCase[];
|
|
274
|
-
|
|
275
|
-
// Scenarios, groupings of data and tests to be evaluated
|
|
276
|
-
scenarios?: Scenario[];
|
|
277
|
-
|
|
278
|
-
// Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
|
|
279
|
-
defaultTest?: Omit<TestCase, 'description'>;
|
|
280
|
-
|
|
281
|
-
// Path to write output. Writes to console/web viewer if not set.
|
|
282
|
-
outputPath?: string;
|
|
283
|
-
|
|
284
|
-
// Determines whether or not sharing is enabled.
|
|
285
|
-
sharing?: boolean;
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
export type UnifiedConfig = TestSuiteConfig & {
|
|
289
|
-
evaluateOptions: EvaluateOptions;
|
|
290
|
-
commandLineOptions: Partial<CommandLineOptions>;
|
|
291
|
-
};
|
|
292
|
-
|
|
293
|
-
export interface SharedResults {
|
|
294
|
-
data: {
|
|
295
|
-
version: number;
|
|
296
|
-
results: EvaluateSummary;
|
|
297
|
-
config: Partial<UnifiedConfig>;
|
|
298
|
-
};
|
|
299
|
-
}
|
package/src/updates.ts
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
import chalk from 'chalk';
|
|
2
|
-
import semverGt from 'semver/functions/gt';
|
|
3
|
-
|
|
4
|
-
import logger from './logger';
|
|
5
|
-
import { fetchWithTimeout } from './util';
|
|
6
|
-
import packageJson from '../package.json';
|
|
7
|
-
|
|
8
|
-
const VERSION = packageJson.version;
|
|
9
|
-
|
|
10
|
-
export async function getLatestVersion(packageName: string) {
|
|
11
|
-
const response = await fetchWithTimeout(`https://registry.npmjs.org/${packageName}`, {}, 1000);
|
|
12
|
-
if (!response.ok) {
|
|
13
|
-
throw new Error(`Failed to fetch package information for ${packageName}`);
|
|
14
|
-
}
|
|
15
|
-
const data = await response.json();
|
|
16
|
-
return data['dist-tags'].latest;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
export async function checkForUpdates(): Promise<boolean> {
|
|
20
|
-
if (process.env.PROMPTFOO_DISABLE_UPDATE) {
|
|
21
|
-
return false;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
let latestVersion: string;
|
|
25
|
-
try {
|
|
26
|
-
latestVersion = await getLatestVersion('promptfoo');
|
|
27
|
-
} catch {
|
|
28
|
-
return false;
|
|
29
|
-
}
|
|
30
|
-
if (semverGt(latestVersion, VERSION)) {
|
|
31
|
-
const border = '='.repeat(process.stdout.columns - 10);
|
|
32
|
-
logger.info(
|
|
33
|
-
`\n${border}
|
|
34
|
-
${chalk.yellow('⚠️')} The current version of promptfoo ${chalk.yellow(
|
|
35
|
-
VERSION,
|
|
36
|
-
)} is lower than the latest available version ${chalk.green(latestVersion)}.
|
|
37
|
-
|
|
38
|
-
Please run ${chalk.green('npx promptfoo@latest')} or ${chalk.green(
|
|
39
|
-
'npm install -g promptfoo@latest',
|
|
40
|
-
)} to update.
|
|
41
|
-
${border}\n`,
|
|
42
|
-
);
|
|
43
|
-
return true;
|
|
44
|
-
}
|
|
45
|
-
return false;
|
|
46
|
-
}
|