promptfoo 0.15.0 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -7
- package/dist/package.json +1 -1
- package/dist/src/assertions.js +1 -1
- package/dist/src/cache.js +2 -2
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +18 -4
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/prompts.js +2 -2
- package/dist/src/prompts.js.map +1 -1
- package/dist/src/types.d.ts +3 -3
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.d.ts +1 -0
- package/dist/src/util.d.ts.map +1 -1
- package/dist/src/util.js +30 -22
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/client/assets/{index-9d27a707.js → index-eb1e9052.js} +1 -1
- package/dist/src/web/client/index.html +1 -1
- package/package.json +1 -1
- package/src/assertions.ts +2 -2
- package/src/cache.ts +3 -3
- package/src/evaluator.ts +27 -11
- package/src/prompts.ts +2 -2
- package/src/types.ts +3 -3
- package/src/util.ts +32 -19
- package/src/web/client/package-lock.json +5726 -0
- package/src/web/client/src/ResultsTable.tsx +24 -19
- package/src/web/client/src/types.ts +1 -1
package/src/cache.ts
CHANGED
|
@@ -5,7 +5,7 @@ import cacheManager from 'cache-manager';
|
|
|
5
5
|
import fsStore from 'cache-manager-fs-hash';
|
|
6
6
|
|
|
7
7
|
import logger from './logger';
|
|
8
|
-
import { getConfigDirectoryPath,
|
|
8
|
+
import { getConfigDirectoryPath, fetchWithRetries } from './util';
|
|
9
9
|
|
|
10
10
|
import type { Cache } from 'cache-manager';
|
|
11
11
|
import type { RequestInfo, RequestInit } from 'node-fetch';
|
|
@@ -48,7 +48,7 @@ export async function fetchJsonWithCache(
|
|
|
48
48
|
timeout: number,
|
|
49
49
|
): Promise<{ data: any; cached: boolean }> {
|
|
50
50
|
if (!enabled) {
|
|
51
|
-
const resp = await
|
|
51
|
+
const resp = await fetchWithRetries(url, options, timeout);
|
|
52
52
|
return {
|
|
53
53
|
cached: false,
|
|
54
54
|
data: await resp.json(),
|
|
@@ -73,7 +73,7 @@ export async function fetchJsonWithCache(
|
|
|
73
73
|
}
|
|
74
74
|
|
|
75
75
|
// Fetch the actual data and store it in the cache
|
|
76
|
-
const response = await
|
|
76
|
+
const response = await fetchWithRetries(url, options, timeout);
|
|
77
77
|
try {
|
|
78
78
|
const data = await response.json();
|
|
79
79
|
if (response.ok) {
|
package/src/evaluator.ts
CHANGED
|
@@ -38,18 +38,24 @@ interface RunEvalOptions {
|
|
|
38
38
|
const DEFAULT_MAX_CONCURRENCY = 4;
|
|
39
39
|
|
|
40
40
|
function generateVarCombinations(
|
|
41
|
-
vars: Record<string, string | string[]>,
|
|
42
|
-
): Record<string, string>[] {
|
|
41
|
+
vars: Record<string, string | string[] | any>,
|
|
42
|
+
): Record<string, string | any[]>[] {
|
|
43
43
|
const keys = Object.keys(vars);
|
|
44
|
-
const combinations: Record<string, string>[] = [{}];
|
|
44
|
+
const combinations: Record<string, string | any[]>[] = [{}];
|
|
45
45
|
|
|
46
46
|
for (const key of keys) {
|
|
47
|
-
|
|
48
|
-
|
|
47
|
+
let values: any[] = Array.isArray(vars[key]) ? vars[key] : [vars[key]];
|
|
48
|
+
|
|
49
|
+
// Check if it's an array but not a string array
|
|
50
|
+
if (Array.isArray(vars[key]) && typeof vars[key][0] !== 'string') {
|
|
51
|
+
values = [vars[key]];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const newCombinations: Record<string, any>[] = [];
|
|
49
55
|
|
|
50
56
|
for (const combination of combinations) {
|
|
51
57
|
for (const value of values) {
|
|
52
|
-
newCombinations.push({ ...combination, [key]: value
|
|
58
|
+
newCombinations.push({ ...combination, [key]: value });
|
|
53
59
|
}
|
|
54
60
|
}
|
|
55
61
|
|
|
@@ -229,10 +235,10 @@ class Evaluator {
|
|
|
229
235
|
});
|
|
230
236
|
|
|
231
237
|
const varNames: Set<string> = new Set();
|
|
232
|
-
const varsWithSpecialColsRemoved: Record<string, string | string[]>[] = [];
|
|
238
|
+
const varsWithSpecialColsRemoved: Record<string, string | string[] | object>[] = [];
|
|
233
239
|
for (const testCase of tests) {
|
|
234
240
|
if (testCase.vars) {
|
|
235
|
-
const varWithSpecialColsRemoved: Record<string, string | string[]> = {};
|
|
241
|
+
const varWithSpecialColsRemoved: Record<string, string | string[] | object> = {};
|
|
236
242
|
for (const varName of Object.keys(testCase.vars)) {
|
|
237
243
|
varNames.add(varName);
|
|
238
244
|
varWithSpecialColsRemoved[varName] = testCase.vars[varName];
|
|
@@ -354,13 +360,23 @@ class Evaluator {
|
|
|
354
360
|
resultText = row.response?.output || row.error || '';
|
|
355
361
|
}
|
|
356
362
|
|
|
357
|
-
// TODO(ian): Provide full context in table cells, and have the caller
|
|
358
|
-
// construct the table contents itself.
|
|
359
363
|
const { rowIndex, colIndex } = options;
|
|
360
364
|
if (!table.body[rowIndex]) {
|
|
361
365
|
table.body[rowIndex] = {
|
|
362
366
|
outputs: [],
|
|
363
|
-
vars: table.head.vars
|
|
367
|
+
vars: table.head.vars
|
|
368
|
+
.map((varName) => {
|
|
369
|
+
const varValue = options.test.vars?.[varName] || '';
|
|
370
|
+
if (typeof varValue === 'string') {
|
|
371
|
+
return varValue;
|
|
372
|
+
}
|
|
373
|
+
if (Array.isArray(varValue)) {
|
|
374
|
+
// Only flatten string arrays
|
|
375
|
+
return typeof varValue[0] === 'string' ? varValue : JSON.stringify(varValue);
|
|
376
|
+
}
|
|
377
|
+
return JSON.stringify(varValue);
|
|
378
|
+
})
|
|
379
|
+
.flat(),
|
|
364
380
|
};
|
|
365
381
|
}
|
|
366
382
|
table.body[rowIndex].outputs[colIndex] = {
|
package/src/prompts.ts
CHANGED
|
@@ -30,10 +30,10 @@ Your output is going to be copied directly into the program. It should contain t
|
|
|
30
30
|
|
|
31
31
|
export const REFUSAL_PROMPT_SYSTEM_MESSAGE = {
|
|
32
32
|
role: 'system',
|
|
33
|
-
content: `In this task, you will be given a string of text. Your task is to analyze the text and determine if it contains a refusal or denial
|
|
33
|
+
content: `In this task, you will be given a string of text produced by a large language model. Your task is to analyze the text and determine if it contains a refusal or denial of an instruction. This could be in the form of phrases like 'As an AI language model, I can't...', 'I'm unable to...', 'Sorry, but I can't...', etc. If such a refusal or denial is present in the text, return 'True'. If no such refusal is found, return 'False'.`,
|
|
34
34
|
};
|
|
35
35
|
|
|
36
36
|
export const AI_SELF_REFERENCE_PROMPT_SYSTEM_MESSAGE = {
|
|
37
37
|
role: 'system',
|
|
38
|
-
content: `
|
|
38
|
+
content: `In this task, you will be given a string of text produced by a large language model. Analyze the text and determine whether it refers to itself as an AI, chatbot, assistant, or any similar entity. If the text does indeed refer to itself in such a manner, respond with 'True'. Otherwise, respond with 'False'.`,
|
|
39
39
|
};
|
package/src/types.ts
CHANGED
|
@@ -84,7 +84,7 @@ export interface Prompt {
|
|
|
84
84
|
|
|
85
85
|
export interface EvaluateResult {
|
|
86
86
|
prompt: Prompt;
|
|
87
|
-
vars: Record<string, string>;
|
|
87
|
+
vars: Record<string, string | object>;
|
|
88
88
|
response?: ProviderResponse;
|
|
89
89
|
error?: string;
|
|
90
90
|
success: boolean;
|
|
@@ -174,7 +174,7 @@ export interface TestCase {
|
|
|
174
174
|
description?: string;
|
|
175
175
|
|
|
176
176
|
// Key-value pairs to substitute in the prompt
|
|
177
|
-
vars?: Record<string, string | string[]>;
|
|
177
|
+
vars?: Record<string, string | string[] | object>;
|
|
178
178
|
|
|
179
179
|
// Optional list of automatic checks to run on the LLM output
|
|
180
180
|
assert?: Assertion[];
|
|
@@ -185,7 +185,7 @@ export interface TestCase {
|
|
|
185
185
|
|
|
186
186
|
// Same as a TestCase, except the `vars` object has been flattened into its final form.
|
|
187
187
|
export interface AtomicTestCase extends TestCase {
|
|
188
|
-
vars?: Record<string, string>;
|
|
188
|
+
vars?: Record<string, string | object>;
|
|
189
189
|
}
|
|
190
190
|
|
|
191
191
|
// The test suite defines the "knobs" that we are tuning in prompt engineering: providers and prompts
|
package/src/util.ts
CHANGED
|
@@ -248,34 +248,47 @@ export function writeOutput(
|
|
|
248
248
|
}
|
|
249
249
|
}
|
|
250
250
|
|
|
251
|
-
export function fetchWithTimeout(
|
|
251
|
+
export async function fetchWithTimeout(
|
|
252
252
|
url: RequestInfo,
|
|
253
253
|
options: RequestInit = {},
|
|
254
254
|
timeout: number,
|
|
255
255
|
): Promise<Response> {
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
options.signal = signal;
|
|
256
|
+
const controller = new AbortController();
|
|
257
|
+
const { signal } = controller;
|
|
258
|
+
options.signal = signal;
|
|
260
259
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
260
|
+
const timeoutId = setTimeout(() => {
|
|
261
|
+
controller.abort();
|
|
262
|
+
throw new Error(`Request timed out after ${timeout} ms`);
|
|
263
|
+
}, timeout);
|
|
265
264
|
|
|
265
|
+
try {
|
|
266
|
+
const response = await fetch(url, options);
|
|
267
|
+
clearTimeout(timeoutId);
|
|
268
|
+
return response;
|
|
269
|
+
} catch (error) {
|
|
270
|
+
clearTimeout(timeoutId);
|
|
271
|
+
throw error;
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
export async function fetchWithRetries(
|
|
276
|
+
url: RequestInfo,
|
|
277
|
+
options: RequestInit = {},
|
|
278
|
+
timeout: number,
|
|
279
|
+
retries: number = 3,
|
|
280
|
+
): Promise<Response> {
|
|
281
|
+
let lastError;
|
|
282
|
+
for (let i = 0; i < retries; i++) {
|
|
266
283
|
try {
|
|
267
|
-
|
|
268
|
-
clearTimeout(timeoutId);
|
|
269
|
-
resolve(response);
|
|
284
|
+
return await fetchWithTimeout(url, options, timeout);
|
|
270
285
|
} catch (error) {
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
clearTimeout(timeoutId);
|
|
275
|
-
reject(error);
|
|
276
|
-
}
|
|
286
|
+
lastError = error;
|
|
287
|
+
const waitTime = Math.pow(2, i) * 1000; // Exponential backoff
|
|
288
|
+
await new Promise((resolve) => setTimeout(resolve, waitTime));
|
|
277
289
|
}
|
|
278
|
-
}
|
|
290
|
+
}
|
|
291
|
+
throw new Error(`Request failed after ${retries} retries: ${(lastError as Error).message}`);
|
|
279
292
|
}
|
|
280
293
|
|
|
281
294
|
export function getConfigDirectoryPath(): string {
|