promptfoo 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/package.json +4 -4
- package/dist/src/assertions.d.ts.map +1 -1
- package/dist/src/assertions.js +5 -0
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/evaluator.js +1 -1
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/index.d.ts +1 -5
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1 -1
- package/dist/src/index.js.map +1 -1
- package/dist/src/matchers.d.ts +3 -2
- package/dist/src/matchers.d.ts.map +1 -1
- package/dist/src/matchers.js +37 -9
- package/dist/src/matchers.js.map +1 -1
- package/dist/src/providers/anthropic.d.ts +5 -3
- package/dist/src/providers/anthropic.d.ts.map +1 -1
- package/dist/src/providers/anthropic.js +8 -10
- package/dist/src/providers/anthropic.js.map +1 -1
- package/dist/src/providers/azureopenai.d.ts +9 -8
- package/dist/src/providers/azureopenai.d.ts.map +1 -1
- package/dist/src/providers/azureopenai.js +33 -36
- package/dist/src/providers/azureopenai.js.map +1 -1
- package/dist/src/providers/openai.d.ts +12 -12
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +54 -65
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/replicate.d.ts +4 -2
- package/dist/src/providers/replicate.d.ts.map +1 -1
- package/dist/src/providers/replicate.js +10 -8
- package/dist/src/providers/replicate.js.map +1 -1
- package/dist/src/providers/webhook.d.ts +9 -0
- package/dist/src/providers/webhook.d.ts.map +1 -0
- package/dist/src/providers/webhook.js +54 -0
- package/dist/src/providers/webhook.js.map +1 -0
- package/dist/src/providers.d.ts +1 -1
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +36 -28
- package/dist/src/providers.js.map +1 -1
- package/dist/src/suggestions.d.ts.map +1 -1
- package/dist/src/suggestions.js +1 -3
- package/dist/src/suggestions.js.map +1 -1
- package/dist/src/types.d.ts +7 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.js +1 -1
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/nextui/404/index.html +1 -1
- package/dist/src/web/nextui/404.html +1 -1
- package/dist/src/web/nextui/_next/static/Bl3o5lF4ON7Fjki46lPhr/_buildManifest.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/226-7bbb6c98a19542fd.js +37 -0
- package/dist/src/web/nextui/_next/static/chunks/249-ea9c0f034888ccff.js +125 -0
- package/dist/src/web/nextui/_next/static/chunks/339-501c32916b785ef1.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/365-e426ea5bc7e815fc.js +8 -0
- package/dist/src/web/nextui/_next/static/chunks/396-0a51429a01e24cdd.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/596-297f7ff4a0436e87.js +25 -0
- package/dist/src/web/nextui/_next/static/chunks/613-572c22424de64659.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/706-ae1d3352d28419e9.js +9 -0
- package/dist/src/web/nextui/_next/static/chunks/891-7035926a62c1c4e0.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-366629541fd598e9.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-319d2ee38d37574e.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-a6b1ff91723b7beb.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/layout-024c4adc71c9feb0.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/page-1ae60660130041b2.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-6ef16148040bf4f4.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/{ca377847-cb6ae6a6a073aebb.js → ca377847-26b462611379a4f7.js} +3 -3
- package/dist/src/web/nextui/_next/static/chunks/{fd9d1056-ac777be631f5a9e9.js → fd9d1056-fba4b53a2f01213b.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/framework-8883d1e9be70c3da.js +25 -0
- package/dist/src/web/nextui/_next/static/chunks/main-8ea85465d428ecfe.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/main-app-581ccf0003955b21.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/pages/_app-52924524f99094ab.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/pages/_error-c92d5c4bb2b49926.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/webpack-55c264ce2fd85eb7.js +1 -0
- package/dist/src/web/nextui/_next/static/css/4d399fceacd06992.css +1 -0
- package/dist/src/web/nextui/eval/index.html +1 -1
- package/dist/src/web/nextui/eval/index.txt +6 -6
- package/dist/src/web/nextui/index.html +1 -1
- package/dist/src/web/nextui/index.txt +5 -5
- package/dist/src/web/nextui/setup/index.html +27 -1
- package/dist/src/web/nextui/setup/index.txt +9 -9
- package/dist/src/web/server.d.ts.map +1 -1
- package/dist/src/web/server.js +9 -5
- package/dist/src/web/server.js.map +1 -1
- package/package.json +4 -4
- package/dist/src/web/nextui/_next/static/US6gOx8LHTX_Hzm9aYNrC/_buildManifest.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/339-4fc8a80fa840e771.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/373-8a280796c0f2d1af.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/583-125d32af505e9bc4.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/596-07e4a23a5c6cdf04.js +0 -25
- package/dist/src/web/nextui/_next/static/chunks/658-a62210d07dc4dcb6.js +0 -15
- package/dist/src/web/nextui/_next/static/chunks/707-699cbd84b259c37b.js +0 -37
- package/dist/src/web/nextui/_next/static/chunks/858-ceb6fa22e614492b.js +0 -125
- package/dist/src/web/nextui/_next/static/chunks/891-3000ea7c0a292558.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-50e40614fa05600e.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-c19c44ed1b2dfb58.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d4a1813b2f8c4532.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/layout-664a8d716d2d24b1.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/page-1f8ef6a00a2355f0.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-182018a3c6397345.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/framework-43665103d101a22d.js +0 -25
- package/dist/src/web/nextui/_next/static/chunks/main-50cc0a98559591ce.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/main-app-c9dc13756d166550.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/pages/_app-6b79a29ad0d63b21.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/pages/_error-9aeb3e4d490fe4b8.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/webpack-6e474e42be502dd7.js +0 -1
- package/dist/src/web/nextui/_next/static/css/a35c840ac696f161.css +0 -1
- package/dist/src/web/nextui/api +0 -1
- package/src/__mocks__/esm.ts +0 -3
- package/src/assertions.ts +0 -580
- package/src/cache.ts +0 -109
- package/src/esm.ts +0 -13
- package/src/evaluator.ts +0 -500
- package/src/index.ts +0 -52
- package/src/logger.ts +0 -46
- package/src/main.ts +0 -442
- package/src/matchers.ts +0 -120
- package/src/onboarding.ts +0 -69
- package/src/prompts.ts +0 -39
- package/src/providers/anthropic.ts +0 -88
- package/src/providers/azureopenai.ts +0 -299
- package/src/providers/llama.ts +0 -95
- package/src/providers/localai.ts +0 -111
- package/src/providers/ollama.ts +0 -89
- package/src/providers/openai.ts +0 -337
- package/src/providers/replicate.ts +0 -99
- package/src/providers/scriptCompletion.ts +0 -35
- package/src/providers/shared.ts +0 -34
- package/src/providers.ts +0 -192
- package/src/share.ts +0 -27
- package/src/suggestions.ts +0 -63
- package/src/table.ts +0 -43
- package/src/tableOutput.html +0 -52
- package/src/telemetry.ts +0 -70
- package/src/types.ts +0 -299
- package/src/updates.ts +0 -46
- package/src/util.ts +0 -543
- package/src/web/nextui/.eslintrc.json +0 -3
- package/src/web/nextui/next.config.js +0 -14
- package/src/web/nextui/package-lock.json +0 -4644
- package/src/web/nextui/package.json +0 -47
- package/src/web/nextui/public/favicon.ico +0 -0
- package/src/web/nextui/public/logo.svg +0 -30
- package/src/web/nextui/src/app/Home.css +0 -3
- package/src/web/nextui/src/app/api/route.ts +0 -6
- package/src/web/nextui/src/app/components/DarkMode.css +0 -22
- package/src/web/nextui/src/app/components/DarkMode.tsx +0 -17
- package/src/web/nextui/src/app/components/Logo.css +0 -32
- package/src/web/nextui/src/app/components/Logo.tsx +0 -11
- package/src/web/nextui/src/app/components/PageShell.css +0 -33
- package/src/web/nextui/src/app/components/PageShell.tsx +0 -87
- package/src/web/nextui/src/app/eval/ConfigModal.tsx +0 -84
- package/src/web/nextui/src/app/eval/Eval.css +0 -13
- package/src/web/nextui/src/app/eval/Eval.tsx +0 -79
- package/src/web/nextui/src/app/eval/EvalOutputPromptDialog.tsx +0 -127
- package/src/web/nextui/src/app/eval/ResultsCharts.tsx +0 -355
- package/src/web/nextui/src/app/eval/ResultsTable.css +0 -179
- package/src/web/nextui/src/app/eval/ResultsTable.tsx +0 -503
- package/src/web/nextui/src/app/eval/ResultsView.tsx +0 -301
- package/src/web/nextui/src/app/eval/ShareModal.tsx +0 -70
- package/src/web/nextui/src/app/eval/[id]/not-found.tsx +0 -5
- package/src/web/nextui/src/app/eval/[id]/page.css +0 -9
- package/src/web/nextui/src/app/eval/[id]/page.tsx +0 -20
- package/src/web/nextui/src/app/eval/index.css +0 -0
- package/src/web/nextui/src/app/eval/page.tsx +0 -8
- package/src/web/nextui/src/app/eval/store.ts +0 -18
- package/src/web/nextui/src/app/eval/types.ts +0 -20
- package/src/web/nextui/src/app/globals.css +0 -58
- package/src/web/nextui/src/app/layout.tsx +0 -25
- package/src/web/nextui/src/app/page.tsx +0 -7
- package/src/web/nextui/src/app/setup/AssertsForm.tsx +0 -118
- package/src/web/nextui/src/app/setup/PromptDialog.tsx +0 -77
- package/src/web/nextui/src/app/setup/PromptsSection.tsx +0 -190
- package/src/web/nextui/src/app/setup/ProviderConfigDialog.tsx +0 -99
- package/src/web/nextui/src/app/setup/ProviderSelector.tsx +0 -149
- package/src/web/nextui/src/app/setup/RunTestSuiteButton.tsx +0 -88
- package/src/web/nextui/src/app/setup/TestCaseDialog.tsx +0 -108
- package/src/web/nextui/src/app/setup/TestCasesSection.tsx +0 -154
- package/src/web/nextui/src/app/setup/VarsForm.tsx +0 -57
- package/src/web/nextui/src/app/setup/page.css +0 -3
- package/src/web/nextui/src/app/setup/page.tsx +0 -160
- package/src/web/nextui/src/util/api.ts +0 -1
- package/src/web/nextui/src/util/store.ts +0 -53
- package/src/web/nextui/tsconfig.json +0 -28
- package/src/web/server.ts +0 -151
- /package/dist/src/web/nextui/_next/static/{US6gOx8LHTX_Hzm9aYNrC → Bl3o5lF4ON7Fjki46lPhr}/_ssgManifest.js +0 -0
package/src/assertions.ts
DELETED
|
@@ -1,580 +0,0 @@
|
|
|
1
|
-
import rouge from 'rouge';
|
|
2
|
-
import invariant from 'tiny-invariant';
|
|
3
|
-
import Ajv from 'ajv';
|
|
4
|
-
import { distance as levenshtein } from 'fastest-levenshtein';
|
|
5
|
-
|
|
6
|
-
import telemetry from './telemetry';
|
|
7
|
-
import { fetchWithRetries, getNunjucksEngine } from './util';
|
|
8
|
-
import { matchesSimilarity, matchesLlmRubric } from './matchers';
|
|
9
|
-
|
|
10
|
-
import type { Assertion, AssertionType, GradingResult, AtomicTestCase } from './types';
|
|
11
|
-
|
|
12
|
-
const DEFAULT_SEMANTIC_SIMILARITY_THRESHOLD = 0.8;
|
|
13
|
-
|
|
14
|
-
const ajv = new Ajv();
|
|
15
|
-
const nunjucks = getNunjucksEngine();
|
|
16
|
-
|
|
17
|
-
function handleRougeScore(
|
|
18
|
-
baseType: 'rouge-n',
|
|
19
|
-
assertion: Assertion,
|
|
20
|
-
expected: string | string[],
|
|
21
|
-
output: string,
|
|
22
|
-
inverted: boolean,
|
|
23
|
-
): GradingResult {
|
|
24
|
-
const fnName = baseType[baseType.length - 1] as 'n' | 'l' | 's';
|
|
25
|
-
const rougeMethod = rouge[fnName];
|
|
26
|
-
const score = rougeMethod(output, expected);
|
|
27
|
-
const pass = score >= (assertion.threshold || 0.75) != inverted;
|
|
28
|
-
|
|
29
|
-
return {
|
|
30
|
-
pass,
|
|
31
|
-
score: inverted ? 1 - score : score,
|
|
32
|
-
reason: pass
|
|
33
|
-
? `${baseType.toUpperCase()} score ${score} is greater than or equal to threshold ${
|
|
34
|
-
assertion.threshold || 0.75
|
|
35
|
-
}`
|
|
36
|
-
: `${baseType.toUpperCase()} score ${score} is less than threshold ${
|
|
37
|
-
assertion.threshold || 0.75
|
|
38
|
-
}`,
|
|
39
|
-
assertion,
|
|
40
|
-
};
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
export async function runAssertions(test: AtomicTestCase, output: string): Promise<GradingResult> {
|
|
44
|
-
const tokensUsed = {
|
|
45
|
-
total: 0,
|
|
46
|
-
prompt: 0,
|
|
47
|
-
completion: 0,
|
|
48
|
-
};
|
|
49
|
-
|
|
50
|
-
if (!test.assert || test.assert.length < 1) {
|
|
51
|
-
return { pass: true, score: 1, reason: 'No assertions', tokensUsed, assertion: null };
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
let totalScore = 0;
|
|
55
|
-
let totalWeight = 0;
|
|
56
|
-
let allPass = true;
|
|
57
|
-
let failedReason = '';
|
|
58
|
-
const componentResults: GradingResult[] = [];
|
|
59
|
-
|
|
60
|
-
for (const assertion of test.assert) {
|
|
61
|
-
const weight = assertion.weight || 1;
|
|
62
|
-
totalWeight += weight;
|
|
63
|
-
|
|
64
|
-
const result = await runAssertion(assertion, test, output);
|
|
65
|
-
totalScore += result.score * weight;
|
|
66
|
-
componentResults.push(result);
|
|
67
|
-
|
|
68
|
-
if (result.tokensUsed) {
|
|
69
|
-
tokensUsed.total += result.tokensUsed.total;
|
|
70
|
-
tokensUsed.prompt += result.tokensUsed.prompt;
|
|
71
|
-
tokensUsed.completion += result.tokensUsed.completion;
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
if (!result.pass) {
|
|
75
|
-
allPass = false;
|
|
76
|
-
failedReason = result.reason;
|
|
77
|
-
if (process.env.PROMPTFOO_SHORT_CIRCUIT_TEST_FAILURES) {
|
|
78
|
-
return result;
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
return {
|
|
84
|
-
pass: allPass,
|
|
85
|
-
score: totalScore / totalWeight,
|
|
86
|
-
reason: allPass ? 'All assertions passed' : failedReason,
|
|
87
|
-
tokensUsed,
|
|
88
|
-
componentResults,
|
|
89
|
-
assertion: null,
|
|
90
|
-
};
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
export async function runAssertion(
|
|
94
|
-
assertion: Assertion,
|
|
95
|
-
test: AtomicTestCase,
|
|
96
|
-
output: string,
|
|
97
|
-
): Promise<GradingResult> {
|
|
98
|
-
let pass: boolean = false;
|
|
99
|
-
let score: number = 0.0;
|
|
100
|
-
|
|
101
|
-
invariant(assertion.type, `Assertion must have a type: ${JSON.stringify(assertion)}`);
|
|
102
|
-
|
|
103
|
-
const inverse = assertion.type.startsWith('not-');
|
|
104
|
-
const baseType = inverse ? assertion.type.slice(4) : assertion.type;
|
|
105
|
-
|
|
106
|
-
telemetry.record('assertion_used', {
|
|
107
|
-
type: baseType,
|
|
108
|
-
});
|
|
109
|
-
|
|
110
|
-
//render assertion values
|
|
111
|
-
let renderedValue = assertion.value;
|
|
112
|
-
// renderString for assertion values
|
|
113
|
-
if (renderedValue && typeof renderedValue === 'string') {
|
|
114
|
-
renderedValue = nunjucks.renderString(renderedValue, test.vars || {});
|
|
115
|
-
} else if (renderedValue && Array.isArray(renderedValue)) {
|
|
116
|
-
renderedValue = renderedValue.map((v) => nunjucks.renderString(v, test.vars || {}));
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
if (baseType === 'equals') {
|
|
120
|
-
pass = renderedValue === output;
|
|
121
|
-
return {
|
|
122
|
-
pass,
|
|
123
|
-
score: pass ? 1 : 0,
|
|
124
|
-
reason: pass ? 'Assertion passed' : `Expected output "${renderedValue}"`,
|
|
125
|
-
assertion,
|
|
126
|
-
};
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
if (baseType === 'is-json') {
|
|
130
|
-
let parsedJson;
|
|
131
|
-
try {
|
|
132
|
-
parsedJson = JSON.parse(output);
|
|
133
|
-
pass = !inverse;
|
|
134
|
-
} catch (err) {
|
|
135
|
-
pass = inverse;
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
if (pass && renderedValue) {
|
|
139
|
-
invariant(typeof renderedValue === 'object', 'is-json assertion must have an object value');
|
|
140
|
-
const validate = ajv.compile(renderedValue);
|
|
141
|
-
pass = validate(parsedJson);
|
|
142
|
-
if (!pass) {
|
|
143
|
-
return {
|
|
144
|
-
pass,
|
|
145
|
-
score: 0,
|
|
146
|
-
reason: `JSON does not conform to the provided schema. Errors: ${ajv.errorsText(
|
|
147
|
-
validate.errors,
|
|
148
|
-
)}`,
|
|
149
|
-
assertion,
|
|
150
|
-
};
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
|
|
154
|
-
return {
|
|
155
|
-
pass,
|
|
156
|
-
score: pass ? 1 : 0,
|
|
157
|
-
reason: pass ? 'Assertion passed' : 'Expected output to be valid JSON',
|
|
158
|
-
assertion,
|
|
159
|
-
};
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
if (baseType === 'contains') {
|
|
163
|
-
invariant(renderedValue, '"contains" assertion type must have a string or number value');
|
|
164
|
-
invariant(
|
|
165
|
-
typeof renderedValue === 'string' || typeof renderedValue === 'number',
|
|
166
|
-
'"contains" assertion type must have a string or number value',
|
|
167
|
-
);
|
|
168
|
-
pass = output.includes(String(renderedValue)) !== inverse;
|
|
169
|
-
return {
|
|
170
|
-
pass,
|
|
171
|
-
score: pass ? 1 : 0,
|
|
172
|
-
reason: pass
|
|
173
|
-
? 'Assertion passed'
|
|
174
|
-
: `Expected output to ${inverse ? 'not ' : ''}contain "${renderedValue}"`,
|
|
175
|
-
assertion,
|
|
176
|
-
};
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
if (baseType === 'contains-any') {
|
|
180
|
-
invariant(renderedValue, '"contains-any" assertion type must have a value');
|
|
181
|
-
invariant(
|
|
182
|
-
Array.isArray(renderedValue),
|
|
183
|
-
'"contains-any" assertion type must have an array value',
|
|
184
|
-
);
|
|
185
|
-
pass = renderedValue.some((value) => output.includes(value)) !== inverse;
|
|
186
|
-
return {
|
|
187
|
-
pass,
|
|
188
|
-
score: pass ? 1 : 0,
|
|
189
|
-
reason: pass
|
|
190
|
-
? 'Assertion passed'
|
|
191
|
-
: `Expected output to ${inverse ? 'not ' : ''}contain one of "${renderedValue.join(', ')}"`,
|
|
192
|
-
assertion,
|
|
193
|
-
};
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
if (baseType === 'contains-all') {
|
|
197
|
-
invariant(renderedValue, '"contains-all" assertion type must have a value');
|
|
198
|
-
invariant(
|
|
199
|
-
Array.isArray(renderedValue),
|
|
200
|
-
'"contains-all" assertion type must have an array value',
|
|
201
|
-
);
|
|
202
|
-
pass = renderedValue.every((value) => output.includes(value)) !== inverse;
|
|
203
|
-
return {
|
|
204
|
-
pass,
|
|
205
|
-
score: pass ? 1 : 0,
|
|
206
|
-
reason: pass
|
|
207
|
-
? 'Assertion passed'
|
|
208
|
-
: `Expected output to ${inverse ? 'not ' : ''}contain all of "${renderedValue.join(', ')}"`,
|
|
209
|
-
assertion,
|
|
210
|
-
};
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
if (baseType === 'regex') {
|
|
214
|
-
invariant(renderedValue, '"regex" assertion type must have a string value');
|
|
215
|
-
invariant(
|
|
216
|
-
typeof renderedValue === 'string',
|
|
217
|
-
'"contains" assertion type must have a string value',
|
|
218
|
-
);
|
|
219
|
-
const regex = new RegExp(renderedValue);
|
|
220
|
-
pass = regex.test(output) !== inverse;
|
|
221
|
-
return {
|
|
222
|
-
pass,
|
|
223
|
-
score: pass ? 1 : 0,
|
|
224
|
-
reason: pass
|
|
225
|
-
? 'Assertion passed'
|
|
226
|
-
: `Expected output to ${inverse ? 'not ' : ''}match regex "${renderedValue}"`,
|
|
227
|
-
assertion,
|
|
228
|
-
};
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
if (baseType === 'icontains') {
|
|
232
|
-
invariant(renderedValue, '"icontains" assertion type must have a string or number value');
|
|
233
|
-
invariant(
|
|
234
|
-
typeof renderedValue === 'string' || typeof renderedValue === 'number',
|
|
235
|
-
'"icontains" assertion type must have a string or number value',
|
|
236
|
-
);
|
|
237
|
-
pass = output.toLowerCase().includes(String(renderedValue).toLowerCase()) !== inverse;
|
|
238
|
-
return {
|
|
239
|
-
pass,
|
|
240
|
-
score: pass ? 1 : 0,
|
|
241
|
-
reason: pass
|
|
242
|
-
? 'Assertion passed'
|
|
243
|
-
: `Expected output to ${inverse ? 'not ' : ''}contain "${renderedValue}"`,
|
|
244
|
-
assertion,
|
|
245
|
-
};
|
|
246
|
-
}
|
|
247
|
-
|
|
248
|
-
if (baseType === 'starts-with') {
|
|
249
|
-
invariant(renderedValue, '"starts-with" assertion type must have a string value');
|
|
250
|
-
invariant(
|
|
251
|
-
typeof renderedValue === 'string',
|
|
252
|
-
'"starts-with" assertion type must have a string value',
|
|
253
|
-
);
|
|
254
|
-
pass = output.startsWith(String(renderedValue)) !== inverse;
|
|
255
|
-
return {
|
|
256
|
-
pass,
|
|
257
|
-
score: pass ? 1 : 0,
|
|
258
|
-
reason: pass
|
|
259
|
-
? 'Assertion passed'
|
|
260
|
-
: `Expected output to ${inverse ? 'not ' : ''}start with "${renderedValue}"`,
|
|
261
|
-
assertion,
|
|
262
|
-
};
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
if (baseType === 'contains-json') {
|
|
266
|
-
const jsonMatch = containsJSON(output);
|
|
267
|
-
pass = jsonMatch !== inverse;
|
|
268
|
-
|
|
269
|
-
if (pass && renderedValue) {
|
|
270
|
-
invariant(
|
|
271
|
-
typeof renderedValue === 'object',
|
|
272
|
-
'contains-json assertion must have an object value',
|
|
273
|
-
);
|
|
274
|
-
const validate = ajv.compile(renderedValue);
|
|
275
|
-
pass = validate(jsonMatch);
|
|
276
|
-
if (!pass) {
|
|
277
|
-
return {
|
|
278
|
-
pass,
|
|
279
|
-
score: 0,
|
|
280
|
-
reason: `JSON does not conform to the provided schema. Errors: ${ajv.errorsText(
|
|
281
|
-
validate.errors,
|
|
282
|
-
)}`,
|
|
283
|
-
assertion,
|
|
284
|
-
};
|
|
285
|
-
}
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
return {
|
|
289
|
-
pass,
|
|
290
|
-
score: pass ? 1 : 0,
|
|
291
|
-
reason: pass ? 'Assertion passed' : 'Expected output to contain valid JSON',
|
|
292
|
-
assertion,
|
|
293
|
-
};
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
const context = {
|
|
297
|
-
vars: test.vars || {},
|
|
298
|
-
};
|
|
299
|
-
|
|
300
|
-
if (baseType === 'javascript') {
|
|
301
|
-
try {
|
|
302
|
-
if (typeof assertion.value === 'function') {
|
|
303
|
-
return assertion.value(output, test, assertion);
|
|
304
|
-
}
|
|
305
|
-
invariant(typeof renderedValue === 'string', 'javascript assertion must have a string value');
|
|
306
|
-
const functionBody = renderedValue.includes('\n') ? renderedValue : `return ${renderedValue}`;
|
|
307
|
-
const customFunction = new Function('output', 'context', functionBody);
|
|
308
|
-
const result = customFunction(output, context) as any;
|
|
309
|
-
if (typeof result === 'boolean') {
|
|
310
|
-
pass = result !== inverse;
|
|
311
|
-
score = 1.0;
|
|
312
|
-
} else if (typeof result === 'number') {
|
|
313
|
-
pass = true;
|
|
314
|
-
score = result;
|
|
315
|
-
} else if (typeof result === 'object') {
|
|
316
|
-
return result;
|
|
317
|
-
} else {
|
|
318
|
-
throw new Error('Custom function must return a boolean or number');
|
|
319
|
-
}
|
|
320
|
-
} catch (err) {
|
|
321
|
-
return {
|
|
322
|
-
pass: false,
|
|
323
|
-
score: 0,
|
|
324
|
-
reason: `Custom function threw error: ${(err as Error).message}
|
|
325
|
-
${renderedValue}`,
|
|
326
|
-
assertion,
|
|
327
|
-
};
|
|
328
|
-
}
|
|
329
|
-
return {
|
|
330
|
-
pass,
|
|
331
|
-
score,
|
|
332
|
-
reason: pass
|
|
333
|
-
? 'Assertion passed'
|
|
334
|
-
: `Custom function returned ${inverse ? 'true' : 'false'}
|
|
335
|
-
${renderedValue}`,
|
|
336
|
-
assertion,
|
|
337
|
-
};
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
if (baseType === 'python') {
|
|
341
|
-
try {
|
|
342
|
-
const { execSync } = require('child_process');
|
|
343
|
-
const escapedOutput = output.replace(/'/g, "\\'").replace(/"/g, '\\"');
|
|
344
|
-
const escapedContext = JSON.stringify(context).replace(/'/g, "\\'").replace(/"/g, '\\"');
|
|
345
|
-
const result = execSync(
|
|
346
|
-
`python -c "import json; import math; import os; import sys; import re; import datetime; import random; import collections; output='${escapedOutput}'; context='${escapedContext}'; print(json.dumps(${assertion.value}))"`,
|
|
347
|
-
)
|
|
348
|
-
.toString()
|
|
349
|
-
.trim();
|
|
350
|
-
if (result === 'true') {
|
|
351
|
-
pass = true;
|
|
352
|
-
score = 1.0;
|
|
353
|
-
} else if (result === 'false') {
|
|
354
|
-
pass = false;
|
|
355
|
-
score = 0.0;
|
|
356
|
-
} else if (result.startsWith('{')) {
|
|
357
|
-
return JSON.parse(result);
|
|
358
|
-
} else {
|
|
359
|
-
pass = true;
|
|
360
|
-
score = parseFloat(result);
|
|
361
|
-
if (isNaN(score)) {
|
|
362
|
-
throw new Error(
|
|
363
|
-
'Python code must return a boolean, number, or {pass, score, reason} object',
|
|
364
|
-
);
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
} catch (err) {
|
|
368
|
-
return {
|
|
369
|
-
pass: false,
|
|
370
|
-
score: 0,
|
|
371
|
-
reason: `Python code execution failed: ${(err as Error).message}`,
|
|
372
|
-
assertion,
|
|
373
|
-
};
|
|
374
|
-
}
|
|
375
|
-
return {
|
|
376
|
-
pass,
|
|
377
|
-
score,
|
|
378
|
-
reason: pass
|
|
379
|
-
? 'Assertion passed'
|
|
380
|
-
: `Python code returned ${pass ? 'true' : 'false'}
|
|
381
|
-
${assertion.value}`,
|
|
382
|
-
assertion,
|
|
383
|
-
};
|
|
384
|
-
}
|
|
385
|
-
|
|
386
|
-
if (baseType === 'similar') {
|
|
387
|
-
invariant(renderedValue, 'Similarity assertion must have a string value');
|
|
388
|
-
invariant(
|
|
389
|
-
typeof renderedValue === 'string',
|
|
390
|
-
'"contains" assertion type must have a string value',
|
|
391
|
-
);
|
|
392
|
-
return {
|
|
393
|
-
assertion,
|
|
394
|
-
...(await matchesSimilarity(renderedValue, output, assertion.threshold || 0.75, inverse)),
|
|
395
|
-
};
|
|
396
|
-
}
|
|
397
|
-
|
|
398
|
-
if (baseType === 'llm-rubric') {
|
|
399
|
-
invariant(renderedValue, 'Similarity assertion must have a string value');
|
|
400
|
-
invariant(
|
|
401
|
-
typeof renderedValue === 'string',
|
|
402
|
-
'"contains" assertion type must have a string value',
|
|
403
|
-
);
|
|
404
|
-
|
|
405
|
-
// Assertion provider overrides test provider
|
|
406
|
-
test.options = test.options || {};
|
|
407
|
-
test.options.provider = assertion.provider || test.options.provider;
|
|
408
|
-
|
|
409
|
-
return {
|
|
410
|
-
assertion,
|
|
411
|
-
...(await matchesLlmRubric(renderedValue, output, test.options)),
|
|
412
|
-
};
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
if (baseType === 'webhook') {
|
|
416
|
-
invariant(renderedValue, '"webhook" assertion type must have a URL value');
|
|
417
|
-
invariant(typeof renderedValue === 'string', '"webhook" assertion type must have a URL value');
|
|
418
|
-
|
|
419
|
-
try {
|
|
420
|
-
const context = {
|
|
421
|
-
vars: test.vars || {},
|
|
422
|
-
};
|
|
423
|
-
const response = await fetchWithRetries(
|
|
424
|
-
renderedValue,
|
|
425
|
-
{
|
|
426
|
-
method: 'POST',
|
|
427
|
-
headers: {
|
|
428
|
-
'Content-Type': 'application/json',
|
|
429
|
-
},
|
|
430
|
-
body: JSON.stringify({ output, context }),
|
|
431
|
-
},
|
|
432
|
-
process.env.WEBHOOK_TIMEOUT ? parseInt(process.env.WEBHOOK_TIMEOUT, 10) : 5000,
|
|
433
|
-
);
|
|
434
|
-
|
|
435
|
-
if (!response.ok) {
|
|
436
|
-
throw new Error(`Webhook response status: ${response.status}`);
|
|
437
|
-
}
|
|
438
|
-
|
|
439
|
-
const jsonResponse = await response.json();
|
|
440
|
-
pass = jsonResponse.pass !== inverse;
|
|
441
|
-
score =
|
|
442
|
-
typeof jsonResponse.score === 'undefined'
|
|
443
|
-
? pass
|
|
444
|
-
? 1
|
|
445
|
-
: 0
|
|
446
|
-
: inverse
|
|
447
|
-
? 1 - jsonResponse.score
|
|
448
|
-
: jsonResponse.score;
|
|
449
|
-
} catch (err) {
|
|
450
|
-
return {
|
|
451
|
-
pass: false,
|
|
452
|
-
score: 0,
|
|
453
|
-
reason: `Webhook error: ${(err as Error).message}`,
|
|
454
|
-
assertion,
|
|
455
|
-
};
|
|
456
|
-
}
|
|
457
|
-
|
|
458
|
-
return {
|
|
459
|
-
pass,
|
|
460
|
-
score,
|
|
461
|
-
reason: pass ? 'Assertion passed' : `Webhook returned ${inverse ? 'true' : 'false'}`,
|
|
462
|
-
assertion,
|
|
463
|
-
};
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
if (baseType === 'rouge-n') {
|
|
467
|
-
invariant(
|
|
468
|
-
typeof renderedValue === 'string' || Array.isArray(renderedValue),
|
|
469
|
-
'"rouge" assertion type must be a value (string or string array)',
|
|
470
|
-
);
|
|
471
|
-
return handleRougeScore(baseType, assertion, renderedValue, output, inverse);
|
|
472
|
-
}
|
|
473
|
-
|
|
474
|
-
if (baseType === 'levenshtein') {
|
|
475
|
-
invariant(
|
|
476
|
-
typeof renderedValue === 'string',
|
|
477
|
-
'"levenshtein" assertion type must have a string value',
|
|
478
|
-
);
|
|
479
|
-
const levDistance = levenshtein(output, renderedValue);
|
|
480
|
-
pass = levDistance <= (assertion.threshold || 5);
|
|
481
|
-
return {
|
|
482
|
-
pass,
|
|
483
|
-
score: pass ? 1 : 0,
|
|
484
|
-
reason: pass
|
|
485
|
-
? 'Assertion passed'
|
|
486
|
-
: `Levenshtein distance ${levDistance} is greater than threshold ${
|
|
487
|
-
assertion.threshold || 5
|
|
488
|
-
}`,
|
|
489
|
-
assertion,
|
|
490
|
-
};
|
|
491
|
-
}
|
|
492
|
-
|
|
493
|
-
throw new Error('Unknown assertion type: ' + assertion.type);
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
function containsJSON(str: string): boolean {
|
|
497
|
-
// Regular expression to check for JSON-like pattern
|
|
498
|
-
const jsonPattern = /({[\s\S]*}|\[[\s\S]*])/;
|
|
499
|
-
|
|
500
|
-
const match = str.match(jsonPattern);
|
|
501
|
-
|
|
502
|
-
if (!match) {
|
|
503
|
-
return false;
|
|
504
|
-
}
|
|
505
|
-
|
|
506
|
-
try {
|
|
507
|
-
return JSON.parse(match[0]);
|
|
508
|
-
} catch (error) {
|
|
509
|
-
return false;
|
|
510
|
-
}
|
|
511
|
-
}
|
|
512
|
-
|
|
513
|
-
export function assertionFromString(expected: string): Assertion {
|
|
514
|
-
// Legacy options
|
|
515
|
-
if (expected.startsWith('fn:') || expected.startsWith('eval:')) {
|
|
516
|
-
// TODO(1.0): delete eval: legacy option
|
|
517
|
-
const sliceLength = expected.startsWith('fn:') ? 'fn:'.length : 'eval:'.length;
|
|
518
|
-
const functionBody = expected.slice(sliceLength);
|
|
519
|
-
return {
|
|
520
|
-
type: 'javascript',
|
|
521
|
-
value: functionBody,
|
|
522
|
-
};
|
|
523
|
-
}
|
|
524
|
-
if (expected.startsWith('grade:')) {
|
|
525
|
-
return {
|
|
526
|
-
type: 'llm-rubric',
|
|
527
|
-
value: expected.slice(6),
|
|
528
|
-
};
|
|
529
|
-
}
|
|
530
|
-
|
|
531
|
-
// New options
|
|
532
|
-
const assertionRegex =
|
|
533
|
-
/^(not-)?(equals|contains-any|contains-all|contains-json|is-json|regex|icontains|contains|webhook|rouge-n|similar|starts-with|levenshtein)(?:\((\d+(?:\.\d+)?)\))?(?::(.*))?$/;
|
|
534
|
-
const regexMatch = expected.match(assertionRegex);
|
|
535
|
-
|
|
536
|
-
if (regexMatch) {
|
|
537
|
-
const [_, notPrefix, type, thresholdStr, value] = regexMatch;
|
|
538
|
-
const fullType = notPrefix ? `not-${type}` : type;
|
|
539
|
-
const threshold = parseFloat(thresholdStr);
|
|
540
|
-
|
|
541
|
-
if (type === 'contains-any' || type === 'contains-all') {
|
|
542
|
-
return {
|
|
543
|
-
type: fullType as AssertionType,
|
|
544
|
-
value: value.split(',').map((s) => s.trim()),
|
|
545
|
-
};
|
|
546
|
-
} else if (type === 'contains-json' || type === 'is-json') {
|
|
547
|
-
return {
|
|
548
|
-
type: fullType as AssertionType,
|
|
549
|
-
};
|
|
550
|
-
} else if (
|
|
551
|
-
type === 'rouge-n' ||
|
|
552
|
-
type === 'similar' ||
|
|
553
|
-
type === 'starts-with' ||
|
|
554
|
-
type === 'levenshtein'
|
|
555
|
-
) {
|
|
556
|
-
return {
|
|
557
|
-
type: fullType as AssertionType,
|
|
558
|
-
value,
|
|
559
|
-
threshold: threshold || (type === 'similar' ? DEFAULT_SEMANTIC_SIMILARITY_THRESHOLD : 0.75),
|
|
560
|
-
};
|
|
561
|
-
} else {
|
|
562
|
-
return {
|
|
563
|
-
type: fullType as AssertionType,
|
|
564
|
-
value,
|
|
565
|
-
};
|
|
566
|
-
}
|
|
567
|
-
}
|
|
568
|
-
|
|
569
|
-
// Default to equality
|
|
570
|
-
return {
|
|
571
|
-
type: 'equals',
|
|
572
|
-
value: expected,
|
|
573
|
-
};
|
|
574
|
-
}
|
|
575
|
-
|
|
576
|
-
// These exports are used by the node.js package (index.ts)
|
|
577
|
-
export default {
|
|
578
|
-
matchesSimilarity,
|
|
579
|
-
matchesLlmRubric,
|
|
580
|
-
};
|
package/src/cache.ts
DELETED
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
import fs from 'fs';
|
|
2
|
-
import path from 'path';
|
|
3
|
-
|
|
4
|
-
import cacheManager from 'cache-manager';
|
|
5
|
-
import fsStore from 'cache-manager-fs-hash';
|
|
6
|
-
|
|
7
|
-
import logger from './logger';
|
|
8
|
-
import { getConfigDirectoryPath, fetchWithRetries } from './util';
|
|
9
|
-
|
|
10
|
-
import type { Cache } from 'cache-manager';
|
|
11
|
-
import type { RequestInfo, RequestInit } from 'node-fetch';
|
|
12
|
-
|
|
13
|
-
let cacheInstance: Cache | undefined;
|
|
14
|
-
|
|
15
|
-
let enabled =
|
|
16
|
-
typeof process.env.PROMPTFOO_CACHE_ENABLED === 'undefined'
|
|
17
|
-
? true
|
|
18
|
-
: Boolean(process.env.PROMPTFOO_CACHE_ENABLED);
|
|
19
|
-
|
|
20
|
-
const cacheType =
|
|
21
|
-
process.env.PROMPTFOO_CACHE_TYPE || (process.env.NODE_ENV === 'test' ? 'memory' : 'disk');
|
|
22
|
-
|
|
23
|
-
export function getCache() {
|
|
24
|
-
if (!cacheInstance) {
|
|
25
|
-
const cachePath =
|
|
26
|
-
process.env.PROMPTFOO_CACHE_PATH || path.join(getConfigDirectoryPath(), 'cache');
|
|
27
|
-
if (!fs.existsSync(cachePath)) {
|
|
28
|
-
logger.info(`Creating cache folder at ${cachePath}.`);
|
|
29
|
-
fs.mkdirSync(cachePath, { recursive: true });
|
|
30
|
-
}
|
|
31
|
-
cacheInstance = cacheManager.caching({
|
|
32
|
-
store: cacheType === 'disk' ? fsStore : 'memory',
|
|
33
|
-
options: {
|
|
34
|
-
max: process.env.PROMPTFOO_CACHE_MAX_FILE_COUNT || 10_000, // number of files
|
|
35
|
-
path: cachePath,
|
|
36
|
-
ttl: process.env.PROMPTFOO_CACHE_TTL || 60 * 60 * 24 * 14, // in seconds, 14 days
|
|
37
|
-
maxsize: process.env.PROMPTFOO_CACHE_MAX_SIZE || 1e7, // in bytes, 10mb
|
|
38
|
-
//zip: true, // whether to use gzip compression
|
|
39
|
-
},
|
|
40
|
-
});
|
|
41
|
-
}
|
|
42
|
-
return cacheInstance;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
export async function fetchWithCache(
|
|
46
|
-
url: RequestInfo,
|
|
47
|
-
options: RequestInit = {},
|
|
48
|
-
timeout: number,
|
|
49
|
-
format: 'json' | 'text' = 'json',
|
|
50
|
-
): Promise<{ data: any; cached: boolean }> {
|
|
51
|
-
if (!enabled) {
|
|
52
|
-
const resp = await fetchWithRetries(url, options, timeout);
|
|
53
|
-
return {
|
|
54
|
-
cached: false,
|
|
55
|
-
data: await resp.json(),
|
|
56
|
-
};
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
const cache = await getCache();
|
|
60
|
-
|
|
61
|
-
const copy = Object.assign({}, options);
|
|
62
|
-
delete copy.headers;
|
|
63
|
-
const cacheKey = `fetch:${url}:${JSON.stringify(copy)}`;
|
|
64
|
-
|
|
65
|
-
// Try to get the cached response
|
|
66
|
-
const cachedResponse = await cache.get(cacheKey);
|
|
67
|
-
|
|
68
|
-
if (cachedResponse) {
|
|
69
|
-
logger.debug(`Returning cached response for ${url}: ${cachedResponse}`);
|
|
70
|
-
return {
|
|
71
|
-
cached: true,
|
|
72
|
-
data: JSON.parse(cachedResponse as string),
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
// Fetch the actual data and store it in the cache
|
|
77
|
-
const response = await fetchWithRetries(url, options, timeout);
|
|
78
|
-
try {
|
|
79
|
-
const data = format === 'json' ? await response.json() : await response.text();
|
|
80
|
-
if (response.ok) {
|
|
81
|
-
logger.debug(`Storing ${url} response in cache: ${JSON.stringify(data)}`);
|
|
82
|
-
await cache.set(cacheKey, JSON.stringify(data));
|
|
83
|
-
}
|
|
84
|
-
return {
|
|
85
|
-
cached: false,
|
|
86
|
-
data,
|
|
87
|
-
};
|
|
88
|
-
} catch (err) {
|
|
89
|
-
throw new Error(`Error parsing response from ${url}: ${err}`);
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
export function enableCache() {
|
|
94
|
-
enabled = true;
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
export function disableCache() {
|
|
98
|
-
logger.info('Cache is disabled.');
|
|
99
|
-
enabled = false;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
export async function clearCache() {
|
|
103
|
-
logger.info('Clearing cache...');
|
|
104
|
-
return getCache().reset();
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
export function isCacheEnabled() {
|
|
108
|
-
return enabled;
|
|
109
|
-
}
|
package/src/esm.ts
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
// esm-specific crap that needs to get mocked out in tests
|
|
2
|
-
|
|
3
|
-
//import path from 'path';
|
|
4
|
-
//import { fileURLToPath } from 'url';
|
|
5
|
-
|
|
6
|
-
export function getDirectory(): string {
|
|
7
|
-
/*
|
|
8
|
-
// @ts-ignore: Jest chokes on this
|
|
9
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
10
|
-
return path.dirname(__filename);
|
|
11
|
-
*/
|
|
12
|
-
return __dirname;
|
|
13
|
-
}
|