promptfoo 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/README.md +1 -1
  2. package/dist/package.json +4 -4
  3. package/dist/src/assertions.d.ts.map +1 -1
  4. package/dist/src/assertions.js +5 -0
  5. package/dist/src/assertions.js.map +1 -1
  6. package/dist/src/evaluator.js +1 -1
  7. package/dist/src/evaluator.js.map +1 -1
  8. package/dist/src/index.d.ts +1 -5
  9. package/dist/src/index.d.ts.map +1 -1
  10. package/dist/src/index.js +1 -1
  11. package/dist/src/index.js.map +1 -1
  12. package/dist/src/matchers.d.ts +3 -2
  13. package/dist/src/matchers.d.ts.map +1 -1
  14. package/dist/src/matchers.js +37 -9
  15. package/dist/src/matchers.js.map +1 -1
  16. package/dist/src/providers/anthropic.d.ts +5 -3
  17. package/dist/src/providers/anthropic.d.ts.map +1 -1
  18. package/dist/src/providers/anthropic.js +8 -10
  19. package/dist/src/providers/anthropic.js.map +1 -1
  20. package/dist/src/providers/azureopenai.d.ts +9 -8
  21. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  22. package/dist/src/providers/azureopenai.js +33 -36
  23. package/dist/src/providers/azureopenai.js.map +1 -1
  24. package/dist/src/providers/openai.d.ts +12 -12
  25. package/dist/src/providers/openai.d.ts.map +1 -1
  26. package/dist/src/providers/openai.js +54 -65
  27. package/dist/src/providers/openai.js.map +1 -1
  28. package/dist/src/providers/replicate.d.ts +4 -2
  29. package/dist/src/providers/replicate.d.ts.map +1 -1
  30. package/dist/src/providers/replicate.js +10 -8
  31. package/dist/src/providers/replicate.js.map +1 -1
  32. package/dist/src/providers/webhook.d.ts +9 -0
  33. package/dist/src/providers/webhook.d.ts.map +1 -0
  34. package/dist/src/providers/webhook.js +54 -0
  35. package/dist/src/providers/webhook.js.map +1 -0
  36. package/dist/src/providers.d.ts +1 -1
  37. package/dist/src/providers.d.ts.map +1 -1
  38. package/dist/src/providers.js +36 -28
  39. package/dist/src/providers.js.map +1 -1
  40. package/dist/src/suggestions.d.ts.map +1 -1
  41. package/dist/src/suggestions.js +1 -3
  42. package/dist/src/suggestions.js.map +1 -1
  43. package/dist/src/types.d.ts +7 -1
  44. package/dist/src/types.d.ts.map +1 -1
  45. package/dist/src/util.js +1 -1
  46. package/dist/src/util.js.map +1 -1
  47. package/dist/src/web/nextui/404/index.html +1 -1
  48. package/dist/src/web/nextui/404.html +1 -1
  49. package/dist/src/web/nextui/_next/static/Bl3o5lF4ON7Fjki46lPhr/_buildManifest.js +1 -0
  50. package/dist/src/web/nextui/_next/static/chunks/226-7bbb6c98a19542fd.js +37 -0
  51. package/dist/src/web/nextui/_next/static/chunks/249-ea9c0f034888ccff.js +125 -0
  52. package/dist/src/web/nextui/_next/static/chunks/339-501c32916b785ef1.js +1 -0
  53. package/dist/src/web/nextui/_next/static/chunks/365-e426ea5bc7e815fc.js +8 -0
  54. package/dist/src/web/nextui/_next/static/chunks/396-0a51429a01e24cdd.js +1 -0
  55. package/dist/src/web/nextui/_next/static/chunks/596-297f7ff4a0436e87.js +25 -0
  56. package/dist/src/web/nextui/_next/static/chunks/613-572c22424de64659.js +1 -0
  57. package/dist/src/web/nextui/_next/static/chunks/706-ae1d3352d28419e9.js +9 -0
  58. package/dist/src/web/nextui/_next/static/chunks/891-7035926a62c1c4e0.js +1 -0
  59. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-366629541fd598e9.js +1 -0
  60. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-319d2ee38d37574e.js +1 -0
  61. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-a6b1ff91723b7beb.js +1 -0
  62. package/dist/src/web/nextui/_next/static/chunks/app/layout-024c4adc71c9feb0.js +1 -0
  63. package/dist/src/web/nextui/_next/static/chunks/app/page-1ae60660130041b2.js +1 -0
  64. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-6ef16148040bf4f4.js +1 -0
  65. package/dist/src/web/nextui/_next/static/chunks/{ca377847-cb6ae6a6a073aebb.js → ca377847-26b462611379a4f7.js} +3 -3
  66. package/dist/src/web/nextui/_next/static/chunks/{fd9d1056-ac777be631f5a9e9.js → fd9d1056-fba4b53a2f01213b.js} +1 -1
  67. package/dist/src/web/nextui/_next/static/chunks/framework-8883d1e9be70c3da.js +25 -0
  68. package/dist/src/web/nextui/_next/static/chunks/main-8ea85465d428ecfe.js +1 -0
  69. package/dist/src/web/nextui/_next/static/chunks/main-app-581ccf0003955b21.js +1 -0
  70. package/dist/src/web/nextui/_next/static/chunks/pages/_app-52924524f99094ab.js +1 -0
  71. package/dist/src/web/nextui/_next/static/chunks/pages/_error-c92d5c4bb2b49926.js +1 -0
  72. package/dist/src/web/nextui/_next/static/chunks/webpack-55c264ce2fd85eb7.js +1 -0
  73. package/dist/src/web/nextui/_next/static/css/4d399fceacd06992.css +1 -0
  74. package/dist/src/web/nextui/eval/index.html +1 -1
  75. package/dist/src/web/nextui/eval/index.txt +6 -6
  76. package/dist/src/web/nextui/index.html +1 -1
  77. package/dist/src/web/nextui/index.txt +5 -5
  78. package/dist/src/web/nextui/setup/index.html +27 -1
  79. package/dist/src/web/nextui/setup/index.txt +9 -9
  80. package/dist/src/web/server.d.ts.map +1 -1
  81. package/dist/src/web/server.js +9 -5
  82. package/dist/src/web/server.js.map +1 -1
  83. package/package.json +4 -4
  84. package/dist/src/web/nextui/_next/static/US6gOx8LHTX_Hzm9aYNrC/_buildManifest.js +0 -1
  85. package/dist/src/web/nextui/_next/static/chunks/339-4fc8a80fa840e771.js +0 -1
  86. package/dist/src/web/nextui/_next/static/chunks/373-8a280796c0f2d1af.js +0 -1
  87. package/dist/src/web/nextui/_next/static/chunks/583-125d32af505e9bc4.js +0 -1
  88. package/dist/src/web/nextui/_next/static/chunks/596-07e4a23a5c6cdf04.js +0 -25
  89. package/dist/src/web/nextui/_next/static/chunks/658-a62210d07dc4dcb6.js +0 -15
  90. package/dist/src/web/nextui/_next/static/chunks/707-699cbd84b259c37b.js +0 -37
  91. package/dist/src/web/nextui/_next/static/chunks/858-ceb6fa22e614492b.js +0 -125
  92. package/dist/src/web/nextui/_next/static/chunks/891-3000ea7c0a292558.js +0 -1
  93. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-50e40614fa05600e.js +0 -1
  94. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-c19c44ed1b2dfb58.js +0 -1
  95. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d4a1813b2f8c4532.js +0 -1
  96. package/dist/src/web/nextui/_next/static/chunks/app/layout-664a8d716d2d24b1.js +0 -1
  97. package/dist/src/web/nextui/_next/static/chunks/app/page-1f8ef6a00a2355f0.js +0 -1
  98. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-182018a3c6397345.js +0 -1
  99. package/dist/src/web/nextui/_next/static/chunks/framework-43665103d101a22d.js +0 -25
  100. package/dist/src/web/nextui/_next/static/chunks/main-50cc0a98559591ce.js +0 -1
  101. package/dist/src/web/nextui/_next/static/chunks/main-app-c9dc13756d166550.js +0 -1
  102. package/dist/src/web/nextui/_next/static/chunks/pages/_app-6b79a29ad0d63b21.js +0 -1
  103. package/dist/src/web/nextui/_next/static/chunks/pages/_error-9aeb3e4d490fe4b8.js +0 -1
  104. package/dist/src/web/nextui/_next/static/chunks/webpack-6e474e42be502dd7.js +0 -1
  105. package/dist/src/web/nextui/_next/static/css/a35c840ac696f161.css +0 -1
  106. package/dist/src/web/nextui/api +0 -1
  107. package/src/__mocks__/esm.ts +0 -3
  108. package/src/assertions.ts +0 -580
  109. package/src/cache.ts +0 -109
  110. package/src/esm.ts +0 -13
  111. package/src/evaluator.ts +0 -500
  112. package/src/index.ts +0 -52
  113. package/src/logger.ts +0 -46
  114. package/src/main.ts +0 -442
  115. package/src/matchers.ts +0 -120
  116. package/src/onboarding.ts +0 -69
  117. package/src/prompts.ts +0 -39
  118. package/src/providers/anthropic.ts +0 -88
  119. package/src/providers/azureopenai.ts +0 -299
  120. package/src/providers/llama.ts +0 -95
  121. package/src/providers/localai.ts +0 -111
  122. package/src/providers/ollama.ts +0 -89
  123. package/src/providers/openai.ts +0 -337
  124. package/src/providers/replicate.ts +0 -99
  125. package/src/providers/scriptCompletion.ts +0 -35
  126. package/src/providers/shared.ts +0 -34
  127. package/src/providers.ts +0 -192
  128. package/src/share.ts +0 -27
  129. package/src/suggestions.ts +0 -63
  130. package/src/table.ts +0 -43
  131. package/src/tableOutput.html +0 -52
  132. package/src/telemetry.ts +0 -70
  133. package/src/types.ts +0 -299
  134. package/src/updates.ts +0 -46
  135. package/src/util.ts +0 -543
  136. package/src/web/nextui/.eslintrc.json +0 -3
  137. package/src/web/nextui/next.config.js +0 -14
  138. package/src/web/nextui/package-lock.json +0 -4644
  139. package/src/web/nextui/package.json +0 -47
  140. package/src/web/nextui/public/favicon.ico +0 -0
  141. package/src/web/nextui/public/logo.svg +0 -30
  142. package/src/web/nextui/src/app/Home.css +0 -3
  143. package/src/web/nextui/src/app/api/route.ts +0 -6
  144. package/src/web/nextui/src/app/components/DarkMode.css +0 -22
  145. package/src/web/nextui/src/app/components/DarkMode.tsx +0 -17
  146. package/src/web/nextui/src/app/components/Logo.css +0 -32
  147. package/src/web/nextui/src/app/components/Logo.tsx +0 -11
  148. package/src/web/nextui/src/app/components/PageShell.css +0 -33
  149. package/src/web/nextui/src/app/components/PageShell.tsx +0 -87
  150. package/src/web/nextui/src/app/eval/ConfigModal.tsx +0 -84
  151. package/src/web/nextui/src/app/eval/Eval.css +0 -13
  152. package/src/web/nextui/src/app/eval/Eval.tsx +0 -79
  153. package/src/web/nextui/src/app/eval/EvalOutputPromptDialog.tsx +0 -127
  154. package/src/web/nextui/src/app/eval/ResultsCharts.tsx +0 -355
  155. package/src/web/nextui/src/app/eval/ResultsTable.css +0 -179
  156. package/src/web/nextui/src/app/eval/ResultsTable.tsx +0 -503
  157. package/src/web/nextui/src/app/eval/ResultsView.tsx +0 -301
  158. package/src/web/nextui/src/app/eval/ShareModal.tsx +0 -70
  159. package/src/web/nextui/src/app/eval/[id]/not-found.tsx +0 -5
  160. package/src/web/nextui/src/app/eval/[id]/page.css +0 -9
  161. package/src/web/nextui/src/app/eval/[id]/page.tsx +0 -20
  162. package/src/web/nextui/src/app/eval/index.css +0 -0
  163. package/src/web/nextui/src/app/eval/page.tsx +0 -8
  164. package/src/web/nextui/src/app/eval/store.ts +0 -18
  165. package/src/web/nextui/src/app/eval/types.ts +0 -20
  166. package/src/web/nextui/src/app/globals.css +0 -58
  167. package/src/web/nextui/src/app/layout.tsx +0 -25
  168. package/src/web/nextui/src/app/page.tsx +0 -7
  169. package/src/web/nextui/src/app/setup/AssertsForm.tsx +0 -118
  170. package/src/web/nextui/src/app/setup/PromptDialog.tsx +0 -77
  171. package/src/web/nextui/src/app/setup/PromptsSection.tsx +0 -190
  172. package/src/web/nextui/src/app/setup/ProviderConfigDialog.tsx +0 -99
  173. package/src/web/nextui/src/app/setup/ProviderSelector.tsx +0 -149
  174. package/src/web/nextui/src/app/setup/RunTestSuiteButton.tsx +0 -88
  175. package/src/web/nextui/src/app/setup/TestCaseDialog.tsx +0 -108
  176. package/src/web/nextui/src/app/setup/TestCasesSection.tsx +0 -154
  177. package/src/web/nextui/src/app/setup/VarsForm.tsx +0 -57
  178. package/src/web/nextui/src/app/setup/page.css +0 -3
  179. package/src/web/nextui/src/app/setup/page.tsx +0 -160
  180. package/src/web/nextui/src/util/api.ts +0 -1
  181. package/src/web/nextui/src/util/store.ts +0 -53
  182. package/src/web/nextui/tsconfig.json +0 -28
  183. package/src/web/server.ts +0 -151
  184. /package/dist/src/web/nextui/_next/static/{US6gOx8LHTX_Hzm9aYNrC → Bl3o5lF4ON7Fjki46lPhr}/_ssgManifest.js +0 -0
package/src/assertions.ts DELETED
@@ -1,580 +0,0 @@
1
- import rouge from 'rouge';
2
- import invariant from 'tiny-invariant';
3
- import Ajv from 'ajv';
4
- import { distance as levenshtein } from 'fastest-levenshtein';
5
-
6
- import telemetry from './telemetry';
7
- import { fetchWithRetries, getNunjucksEngine } from './util';
8
- import { matchesSimilarity, matchesLlmRubric } from './matchers';
9
-
10
- import type { Assertion, AssertionType, GradingResult, AtomicTestCase } from './types';
11
-
12
- const DEFAULT_SEMANTIC_SIMILARITY_THRESHOLD = 0.8;
13
-
14
- const ajv = new Ajv();
15
- const nunjucks = getNunjucksEngine();
16
-
17
- function handleRougeScore(
18
- baseType: 'rouge-n',
19
- assertion: Assertion,
20
- expected: string | string[],
21
- output: string,
22
- inverted: boolean,
23
- ): GradingResult {
24
- const fnName = baseType[baseType.length - 1] as 'n' | 'l' | 's';
25
- const rougeMethod = rouge[fnName];
26
- const score = rougeMethod(output, expected);
27
- const pass = score >= (assertion.threshold || 0.75) != inverted;
28
-
29
- return {
30
- pass,
31
- score: inverted ? 1 - score : score,
32
- reason: pass
33
- ? `${baseType.toUpperCase()} score ${score} is greater than or equal to threshold ${
34
- assertion.threshold || 0.75
35
- }`
36
- : `${baseType.toUpperCase()} score ${score} is less than threshold ${
37
- assertion.threshold || 0.75
38
- }`,
39
- assertion,
40
- };
41
- }
42
-
43
- export async function runAssertions(test: AtomicTestCase, output: string): Promise<GradingResult> {
44
- const tokensUsed = {
45
- total: 0,
46
- prompt: 0,
47
- completion: 0,
48
- };
49
-
50
- if (!test.assert || test.assert.length < 1) {
51
- return { pass: true, score: 1, reason: 'No assertions', tokensUsed, assertion: null };
52
- }
53
-
54
- let totalScore = 0;
55
- let totalWeight = 0;
56
- let allPass = true;
57
- let failedReason = '';
58
- const componentResults: GradingResult[] = [];
59
-
60
- for (const assertion of test.assert) {
61
- const weight = assertion.weight || 1;
62
- totalWeight += weight;
63
-
64
- const result = await runAssertion(assertion, test, output);
65
- totalScore += result.score * weight;
66
- componentResults.push(result);
67
-
68
- if (result.tokensUsed) {
69
- tokensUsed.total += result.tokensUsed.total;
70
- tokensUsed.prompt += result.tokensUsed.prompt;
71
- tokensUsed.completion += result.tokensUsed.completion;
72
- }
73
-
74
- if (!result.pass) {
75
- allPass = false;
76
- failedReason = result.reason;
77
- if (process.env.PROMPTFOO_SHORT_CIRCUIT_TEST_FAILURES) {
78
- return result;
79
- }
80
- }
81
- }
82
-
83
- return {
84
- pass: allPass,
85
- score: totalScore / totalWeight,
86
- reason: allPass ? 'All assertions passed' : failedReason,
87
- tokensUsed,
88
- componentResults,
89
- assertion: null,
90
- };
91
- }
92
-
93
- export async function runAssertion(
94
- assertion: Assertion,
95
- test: AtomicTestCase,
96
- output: string,
97
- ): Promise<GradingResult> {
98
- let pass: boolean = false;
99
- let score: number = 0.0;
100
-
101
- invariant(assertion.type, `Assertion must have a type: ${JSON.stringify(assertion)}`);
102
-
103
- const inverse = assertion.type.startsWith('not-');
104
- const baseType = inverse ? assertion.type.slice(4) : assertion.type;
105
-
106
- telemetry.record('assertion_used', {
107
- type: baseType,
108
- });
109
-
110
- //render assertion values
111
- let renderedValue = assertion.value;
112
- // renderString for assertion values
113
- if (renderedValue && typeof renderedValue === 'string') {
114
- renderedValue = nunjucks.renderString(renderedValue, test.vars || {});
115
- } else if (renderedValue && Array.isArray(renderedValue)) {
116
- renderedValue = renderedValue.map((v) => nunjucks.renderString(v, test.vars || {}));
117
- }
118
-
119
- if (baseType === 'equals') {
120
- pass = renderedValue === output;
121
- return {
122
- pass,
123
- score: pass ? 1 : 0,
124
- reason: pass ? 'Assertion passed' : `Expected output "${renderedValue}"`,
125
- assertion,
126
- };
127
- }
128
-
129
- if (baseType === 'is-json') {
130
- let parsedJson;
131
- try {
132
- parsedJson = JSON.parse(output);
133
- pass = !inverse;
134
- } catch (err) {
135
- pass = inverse;
136
- }
137
-
138
- if (pass && renderedValue) {
139
- invariant(typeof renderedValue === 'object', 'is-json assertion must have an object value');
140
- const validate = ajv.compile(renderedValue);
141
- pass = validate(parsedJson);
142
- if (!pass) {
143
- return {
144
- pass,
145
- score: 0,
146
- reason: `JSON does not conform to the provided schema. Errors: ${ajv.errorsText(
147
- validate.errors,
148
- )}`,
149
- assertion,
150
- };
151
- }
152
- }
153
-
154
- return {
155
- pass,
156
- score: pass ? 1 : 0,
157
- reason: pass ? 'Assertion passed' : 'Expected output to be valid JSON',
158
- assertion,
159
- };
160
- }
161
-
162
- if (baseType === 'contains') {
163
- invariant(renderedValue, '"contains" assertion type must have a string or number value');
164
- invariant(
165
- typeof renderedValue === 'string' || typeof renderedValue === 'number',
166
- '"contains" assertion type must have a string or number value',
167
- );
168
- pass = output.includes(String(renderedValue)) !== inverse;
169
- return {
170
- pass,
171
- score: pass ? 1 : 0,
172
- reason: pass
173
- ? 'Assertion passed'
174
- : `Expected output to ${inverse ? 'not ' : ''}contain "${renderedValue}"`,
175
- assertion,
176
- };
177
- }
178
-
179
- if (baseType === 'contains-any') {
180
- invariant(renderedValue, '"contains-any" assertion type must have a value');
181
- invariant(
182
- Array.isArray(renderedValue),
183
- '"contains-any" assertion type must have an array value',
184
- );
185
- pass = renderedValue.some((value) => output.includes(value)) !== inverse;
186
- return {
187
- pass,
188
- score: pass ? 1 : 0,
189
- reason: pass
190
- ? 'Assertion passed'
191
- : `Expected output to ${inverse ? 'not ' : ''}contain one of "${renderedValue.join(', ')}"`,
192
- assertion,
193
- };
194
- }
195
-
196
- if (baseType === 'contains-all') {
197
- invariant(renderedValue, '"contains-all" assertion type must have a value');
198
- invariant(
199
- Array.isArray(renderedValue),
200
- '"contains-all" assertion type must have an array value',
201
- );
202
- pass = renderedValue.every((value) => output.includes(value)) !== inverse;
203
- return {
204
- pass,
205
- score: pass ? 1 : 0,
206
- reason: pass
207
- ? 'Assertion passed'
208
- : `Expected output to ${inverse ? 'not ' : ''}contain all of "${renderedValue.join(', ')}"`,
209
- assertion,
210
- };
211
- }
212
-
213
- if (baseType === 'regex') {
214
- invariant(renderedValue, '"regex" assertion type must have a string value');
215
- invariant(
216
- typeof renderedValue === 'string',
217
- '"contains" assertion type must have a string value',
218
- );
219
- const regex = new RegExp(renderedValue);
220
- pass = regex.test(output) !== inverse;
221
- return {
222
- pass,
223
- score: pass ? 1 : 0,
224
- reason: pass
225
- ? 'Assertion passed'
226
- : `Expected output to ${inverse ? 'not ' : ''}match regex "${renderedValue}"`,
227
- assertion,
228
- };
229
- }
230
-
231
- if (baseType === 'icontains') {
232
- invariant(renderedValue, '"icontains" assertion type must have a string or number value');
233
- invariant(
234
- typeof renderedValue === 'string' || typeof renderedValue === 'number',
235
- '"icontains" assertion type must have a string or number value',
236
- );
237
- pass = output.toLowerCase().includes(String(renderedValue).toLowerCase()) !== inverse;
238
- return {
239
- pass,
240
- score: pass ? 1 : 0,
241
- reason: pass
242
- ? 'Assertion passed'
243
- : `Expected output to ${inverse ? 'not ' : ''}contain "${renderedValue}"`,
244
- assertion,
245
- };
246
- }
247
-
248
- if (baseType === 'starts-with') {
249
- invariant(renderedValue, '"starts-with" assertion type must have a string value');
250
- invariant(
251
- typeof renderedValue === 'string',
252
- '"starts-with" assertion type must have a string value',
253
- );
254
- pass = output.startsWith(String(renderedValue)) !== inverse;
255
- return {
256
- pass,
257
- score: pass ? 1 : 0,
258
- reason: pass
259
- ? 'Assertion passed'
260
- : `Expected output to ${inverse ? 'not ' : ''}start with "${renderedValue}"`,
261
- assertion,
262
- };
263
- }
264
-
265
- if (baseType === 'contains-json') {
266
- const jsonMatch = containsJSON(output);
267
- pass = jsonMatch !== inverse;
268
-
269
- if (pass && renderedValue) {
270
- invariant(
271
- typeof renderedValue === 'object',
272
- 'contains-json assertion must have an object value',
273
- );
274
- const validate = ajv.compile(renderedValue);
275
- pass = validate(jsonMatch);
276
- if (!pass) {
277
- return {
278
- pass,
279
- score: 0,
280
- reason: `JSON does not conform to the provided schema. Errors: ${ajv.errorsText(
281
- validate.errors,
282
- )}`,
283
- assertion,
284
- };
285
- }
286
- }
287
-
288
- return {
289
- pass,
290
- score: pass ? 1 : 0,
291
- reason: pass ? 'Assertion passed' : 'Expected output to contain valid JSON',
292
- assertion,
293
- };
294
- }
295
-
296
- const context = {
297
- vars: test.vars || {},
298
- };
299
-
300
- if (baseType === 'javascript') {
301
- try {
302
- if (typeof assertion.value === 'function') {
303
- return assertion.value(output, test, assertion);
304
- }
305
- invariant(typeof renderedValue === 'string', 'javascript assertion must have a string value');
306
- const functionBody = renderedValue.includes('\n') ? renderedValue : `return ${renderedValue}`;
307
- const customFunction = new Function('output', 'context', functionBody);
308
- const result = customFunction(output, context) as any;
309
- if (typeof result === 'boolean') {
310
- pass = result !== inverse;
311
- score = 1.0;
312
- } else if (typeof result === 'number') {
313
- pass = true;
314
- score = result;
315
- } else if (typeof result === 'object') {
316
- return result;
317
- } else {
318
- throw new Error('Custom function must return a boolean or number');
319
- }
320
- } catch (err) {
321
- return {
322
- pass: false,
323
- score: 0,
324
- reason: `Custom function threw error: ${(err as Error).message}
325
- ${renderedValue}`,
326
- assertion,
327
- };
328
- }
329
- return {
330
- pass,
331
- score,
332
- reason: pass
333
- ? 'Assertion passed'
334
- : `Custom function returned ${inverse ? 'true' : 'false'}
335
- ${renderedValue}`,
336
- assertion,
337
- };
338
- }
339
-
340
- if (baseType === 'python') {
341
- try {
342
- const { execSync } = require('child_process');
343
- const escapedOutput = output.replace(/'/g, "\\'").replace(/"/g, '\\"');
344
- const escapedContext = JSON.stringify(context).replace(/'/g, "\\'").replace(/"/g, '\\"');
345
- const result = execSync(
346
- `python -c "import json; import math; import os; import sys; import re; import datetime; import random; import collections; output='${escapedOutput}'; context='${escapedContext}'; print(json.dumps(${assertion.value}))"`,
347
- )
348
- .toString()
349
- .trim();
350
- if (result === 'true') {
351
- pass = true;
352
- score = 1.0;
353
- } else if (result === 'false') {
354
- pass = false;
355
- score = 0.0;
356
- } else if (result.startsWith('{')) {
357
- return JSON.parse(result);
358
- } else {
359
- pass = true;
360
- score = parseFloat(result);
361
- if (isNaN(score)) {
362
- throw new Error(
363
- 'Python code must return a boolean, number, or {pass, score, reason} object',
364
- );
365
- }
366
- }
367
- } catch (err) {
368
- return {
369
- pass: false,
370
- score: 0,
371
- reason: `Python code execution failed: ${(err as Error).message}`,
372
- assertion,
373
- };
374
- }
375
- return {
376
- pass,
377
- score,
378
- reason: pass
379
- ? 'Assertion passed'
380
- : `Python code returned ${pass ? 'true' : 'false'}
381
- ${assertion.value}`,
382
- assertion,
383
- };
384
- }
385
-
386
- if (baseType === 'similar') {
387
- invariant(renderedValue, 'Similarity assertion must have a string value');
388
- invariant(
389
- typeof renderedValue === 'string',
390
- '"contains" assertion type must have a string value',
391
- );
392
- return {
393
- assertion,
394
- ...(await matchesSimilarity(renderedValue, output, assertion.threshold || 0.75, inverse)),
395
- };
396
- }
397
-
398
- if (baseType === 'llm-rubric') {
399
- invariant(renderedValue, 'Similarity assertion must have a string value');
400
- invariant(
401
- typeof renderedValue === 'string',
402
- '"contains" assertion type must have a string value',
403
- );
404
-
405
- // Assertion provider overrides test provider
406
- test.options = test.options || {};
407
- test.options.provider = assertion.provider || test.options.provider;
408
-
409
- return {
410
- assertion,
411
- ...(await matchesLlmRubric(renderedValue, output, test.options)),
412
- };
413
- }
414
-
415
- if (baseType === 'webhook') {
416
- invariant(renderedValue, '"webhook" assertion type must have a URL value');
417
- invariant(typeof renderedValue === 'string', '"webhook" assertion type must have a URL value');
418
-
419
- try {
420
- const context = {
421
- vars: test.vars || {},
422
- };
423
- const response = await fetchWithRetries(
424
- renderedValue,
425
- {
426
- method: 'POST',
427
- headers: {
428
- 'Content-Type': 'application/json',
429
- },
430
- body: JSON.stringify({ output, context }),
431
- },
432
- process.env.WEBHOOK_TIMEOUT ? parseInt(process.env.WEBHOOK_TIMEOUT, 10) : 5000,
433
- );
434
-
435
- if (!response.ok) {
436
- throw new Error(`Webhook response status: ${response.status}`);
437
- }
438
-
439
- const jsonResponse = await response.json();
440
- pass = jsonResponse.pass !== inverse;
441
- score =
442
- typeof jsonResponse.score === 'undefined'
443
- ? pass
444
- ? 1
445
- : 0
446
- : inverse
447
- ? 1 - jsonResponse.score
448
- : jsonResponse.score;
449
- } catch (err) {
450
- return {
451
- pass: false,
452
- score: 0,
453
- reason: `Webhook error: ${(err as Error).message}`,
454
- assertion,
455
- };
456
- }
457
-
458
- return {
459
- pass,
460
- score,
461
- reason: pass ? 'Assertion passed' : `Webhook returned ${inverse ? 'true' : 'false'}`,
462
- assertion,
463
- };
464
- }
465
-
466
- if (baseType === 'rouge-n') {
467
- invariant(
468
- typeof renderedValue === 'string' || Array.isArray(renderedValue),
469
- '"rouge" assertion type must be a value (string or string array)',
470
- );
471
- return handleRougeScore(baseType, assertion, renderedValue, output, inverse);
472
- }
473
-
474
- if (baseType === 'levenshtein') {
475
- invariant(
476
- typeof renderedValue === 'string',
477
- '"levenshtein" assertion type must have a string value',
478
- );
479
- const levDistance = levenshtein(output, renderedValue);
480
- pass = levDistance <= (assertion.threshold || 5);
481
- return {
482
- pass,
483
- score: pass ? 1 : 0,
484
- reason: pass
485
- ? 'Assertion passed'
486
- : `Levenshtein distance ${levDistance} is greater than threshold ${
487
- assertion.threshold || 5
488
- }`,
489
- assertion,
490
- };
491
- }
492
-
493
- throw new Error('Unknown assertion type: ' + assertion.type);
494
- }
495
-
496
- function containsJSON(str: string): boolean {
497
- // Regular expression to check for JSON-like pattern
498
- const jsonPattern = /({[\s\S]*}|\[[\s\S]*])/;
499
-
500
- const match = str.match(jsonPattern);
501
-
502
- if (!match) {
503
- return false;
504
- }
505
-
506
- try {
507
- return JSON.parse(match[0]);
508
- } catch (error) {
509
- return false;
510
- }
511
- }
512
-
513
- export function assertionFromString(expected: string): Assertion {
514
- // Legacy options
515
- if (expected.startsWith('fn:') || expected.startsWith('eval:')) {
516
- // TODO(1.0): delete eval: legacy option
517
- const sliceLength = expected.startsWith('fn:') ? 'fn:'.length : 'eval:'.length;
518
- const functionBody = expected.slice(sliceLength);
519
- return {
520
- type: 'javascript',
521
- value: functionBody,
522
- };
523
- }
524
- if (expected.startsWith('grade:')) {
525
- return {
526
- type: 'llm-rubric',
527
- value: expected.slice(6),
528
- };
529
- }
530
-
531
- // New options
532
- const assertionRegex =
533
- /^(not-)?(equals|contains-any|contains-all|contains-json|is-json|regex|icontains|contains|webhook|rouge-n|similar|starts-with|levenshtein)(?:\((\d+(?:\.\d+)?)\))?(?::(.*))?$/;
534
- const regexMatch = expected.match(assertionRegex);
535
-
536
- if (regexMatch) {
537
- const [_, notPrefix, type, thresholdStr, value] = regexMatch;
538
- const fullType = notPrefix ? `not-${type}` : type;
539
- const threshold = parseFloat(thresholdStr);
540
-
541
- if (type === 'contains-any' || type === 'contains-all') {
542
- return {
543
- type: fullType as AssertionType,
544
- value: value.split(',').map((s) => s.trim()),
545
- };
546
- } else if (type === 'contains-json' || type === 'is-json') {
547
- return {
548
- type: fullType as AssertionType,
549
- };
550
- } else if (
551
- type === 'rouge-n' ||
552
- type === 'similar' ||
553
- type === 'starts-with' ||
554
- type === 'levenshtein'
555
- ) {
556
- return {
557
- type: fullType as AssertionType,
558
- value,
559
- threshold: threshold || (type === 'similar' ? DEFAULT_SEMANTIC_SIMILARITY_THRESHOLD : 0.75),
560
- };
561
- } else {
562
- return {
563
- type: fullType as AssertionType,
564
- value,
565
- };
566
- }
567
- }
568
-
569
- // Default to equality
570
- return {
571
- type: 'equals',
572
- value: expected,
573
- };
574
- }
575
-
576
- // These exports are used by the node.js package (index.ts)
577
- export default {
578
- matchesSimilarity,
579
- matchesLlmRubric,
580
- };
package/src/cache.ts DELETED
@@ -1,109 +0,0 @@
1
- import fs from 'fs';
2
- import path from 'path';
3
-
4
- import cacheManager from 'cache-manager';
5
- import fsStore from 'cache-manager-fs-hash';
6
-
7
- import logger from './logger';
8
- import { getConfigDirectoryPath, fetchWithRetries } from './util';
9
-
10
- import type { Cache } from 'cache-manager';
11
- import type { RequestInfo, RequestInit } from 'node-fetch';
12
-
13
- let cacheInstance: Cache | undefined;
14
-
15
- let enabled =
16
- typeof process.env.PROMPTFOO_CACHE_ENABLED === 'undefined'
17
- ? true
18
- : Boolean(process.env.PROMPTFOO_CACHE_ENABLED);
19
-
20
- const cacheType =
21
- process.env.PROMPTFOO_CACHE_TYPE || (process.env.NODE_ENV === 'test' ? 'memory' : 'disk');
22
-
23
- export function getCache() {
24
- if (!cacheInstance) {
25
- const cachePath =
26
- process.env.PROMPTFOO_CACHE_PATH || path.join(getConfigDirectoryPath(), 'cache');
27
- if (!fs.existsSync(cachePath)) {
28
- logger.info(`Creating cache folder at ${cachePath}.`);
29
- fs.mkdirSync(cachePath, { recursive: true });
30
- }
31
- cacheInstance = cacheManager.caching({
32
- store: cacheType === 'disk' ? fsStore : 'memory',
33
- options: {
34
- max: process.env.PROMPTFOO_CACHE_MAX_FILE_COUNT || 10_000, // number of files
35
- path: cachePath,
36
- ttl: process.env.PROMPTFOO_CACHE_TTL || 60 * 60 * 24 * 14, // in seconds, 14 days
37
- maxsize: process.env.PROMPTFOO_CACHE_MAX_SIZE || 1e7, // in bytes, 10mb
38
- //zip: true, // whether to use gzip compression
39
- },
40
- });
41
- }
42
- return cacheInstance;
43
- }
44
-
45
- export async function fetchWithCache(
46
- url: RequestInfo,
47
- options: RequestInit = {},
48
- timeout: number,
49
- format: 'json' | 'text' = 'json',
50
- ): Promise<{ data: any; cached: boolean }> {
51
- if (!enabled) {
52
- const resp = await fetchWithRetries(url, options, timeout);
53
- return {
54
- cached: false,
55
- data: await resp.json(),
56
- };
57
- }
58
-
59
- const cache = await getCache();
60
-
61
- const copy = Object.assign({}, options);
62
- delete copy.headers;
63
- const cacheKey = `fetch:${url}:${JSON.stringify(copy)}`;
64
-
65
- // Try to get the cached response
66
- const cachedResponse = await cache.get(cacheKey);
67
-
68
- if (cachedResponse) {
69
- logger.debug(`Returning cached response for ${url}: ${cachedResponse}`);
70
- return {
71
- cached: true,
72
- data: JSON.parse(cachedResponse as string),
73
- };
74
- }
75
-
76
- // Fetch the actual data and store it in the cache
77
- const response = await fetchWithRetries(url, options, timeout);
78
- try {
79
- const data = format === 'json' ? await response.json() : await response.text();
80
- if (response.ok) {
81
- logger.debug(`Storing ${url} response in cache: ${JSON.stringify(data)}`);
82
- await cache.set(cacheKey, JSON.stringify(data));
83
- }
84
- return {
85
- cached: false,
86
- data,
87
- };
88
- } catch (err) {
89
- throw new Error(`Error parsing response from ${url}: ${err}`);
90
- }
91
- }
92
-
93
- export function enableCache() {
94
- enabled = true;
95
- }
96
-
97
- export function disableCache() {
98
- logger.info('Cache is disabled.');
99
- enabled = false;
100
- }
101
-
102
- export async function clearCache() {
103
- logger.info('Clearing cache...');
104
- return getCache().reset();
105
- }
106
-
107
- export function isCacheEnabled() {
108
- return enabled;
109
- }
package/src/esm.ts DELETED
@@ -1,13 +0,0 @@
1
- // esm-specific crap that needs to get mocked out in tests
2
-
3
- //import path from 'path';
4
- //import { fileURLToPath } from 'url';
5
-
6
- export function getDirectory(): string {
7
- /*
8
- // @ts-ignore: Jest chokes on this
9
- const __filename = fileURLToPath(import.meta.url);
10
- return path.dirname(__filename);
11
- */
12
- return __dirname;
13
- }