promptfoo 0.15.0 → 0.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/cache.ts CHANGED
@@ -5,7 +5,7 @@ import cacheManager from 'cache-manager';
5
5
  import fsStore from 'cache-manager-fs-hash';
6
6
 
7
7
  import logger from './logger';
8
- import { getConfigDirectoryPath, fetchWithTimeout } from './util';
8
+ import { getConfigDirectoryPath, fetchWithRetries } from './util';
9
9
 
10
10
  import type { Cache } from 'cache-manager';
11
11
  import type { RequestInfo, RequestInit } from 'node-fetch';
@@ -48,7 +48,7 @@ export async function fetchJsonWithCache(
48
48
  timeout: number,
49
49
  ): Promise<{ data: any; cached: boolean }> {
50
50
  if (!enabled) {
51
- const resp = await fetchWithTimeout(url, options, timeout);
51
+ const resp = await fetchWithRetries(url, options, timeout);
52
52
  return {
53
53
  cached: false,
54
54
  data: await resp.json(),
@@ -73,7 +73,7 @@ export async function fetchJsonWithCache(
73
73
  }
74
74
 
75
75
  // Fetch the actual data and store it in the cache
76
- const response = await fetchWithTimeout(url, options, timeout);
76
+ const response = await fetchWithRetries(url, options, timeout);
77
77
  try {
78
78
  const data = await response.json();
79
79
  if (response.ok) {
package/src/evaluator.ts CHANGED
@@ -38,18 +38,24 @@ interface RunEvalOptions {
38
38
  const DEFAULT_MAX_CONCURRENCY = 4;
39
39
 
40
40
  function generateVarCombinations(
41
- vars: Record<string, string | string[]>,
42
- ): Record<string, string>[] {
41
+ vars: Record<string, string | string[] | any>,
42
+ ): Record<string, string | any[]>[] {
43
43
  const keys = Object.keys(vars);
44
- const combinations: Record<string, string>[] = [{}];
44
+ const combinations: Record<string, string | any[]>[] = [{}];
45
45
 
46
46
  for (const key of keys) {
47
- const values = Array.isArray(vars[key]) ? vars[key] : [vars[key]];
48
- const newCombinations: Record<string, string>[] = [];
47
+ let values: any[] = Array.isArray(vars[key]) ? vars[key] : [vars[key]];
48
+
49
+ // Check if it's an array but not a string array
50
+ if (Array.isArray(vars[key]) && typeof vars[key][0] !== 'string') {
51
+ values = [vars[key]];
52
+ }
53
+
54
+ const newCombinations: Record<string, any>[] = [];
49
55
 
50
56
  for (const combination of combinations) {
51
57
  for (const value of values) {
52
- newCombinations.push({ ...combination, [key]: value as string });
58
+ newCombinations.push({ ...combination, [key]: value });
53
59
  }
54
60
  }
55
61
 
@@ -229,10 +235,10 @@ class Evaluator {
229
235
  });
230
236
 
231
237
  const varNames: Set<string> = new Set();
232
- const varsWithSpecialColsRemoved: Record<string, string | string[]>[] = [];
238
+ const varsWithSpecialColsRemoved: Record<string, string | string[] | object>[] = [];
233
239
  for (const testCase of tests) {
234
240
  if (testCase.vars) {
235
- const varWithSpecialColsRemoved: Record<string, string | string[]> = {};
241
+ const varWithSpecialColsRemoved: Record<string, string | string[] | object> = {};
236
242
  for (const varName of Object.keys(testCase.vars)) {
237
243
  varNames.add(varName);
238
244
  varWithSpecialColsRemoved[varName] = testCase.vars[varName];
@@ -354,13 +360,23 @@ class Evaluator {
354
360
  resultText = row.response?.output || row.error || '';
355
361
  }
356
362
 
357
- // TODO(ian): Provide full context in table cells, and have the caller
358
- // construct the table contents itself.
359
363
  const { rowIndex, colIndex } = options;
360
364
  if (!table.body[rowIndex]) {
361
365
  table.body[rowIndex] = {
362
366
  outputs: [],
363
- vars: table.head.vars.map((varName) => options.test.vars?.[varName] || '').flat(),
367
+ vars: table.head.vars
368
+ .map((varName) => {
369
+ const varValue = options.test.vars?.[varName] || '';
370
+ if (typeof varValue === 'string') {
371
+ return varValue;
372
+ }
373
+ if (Array.isArray(varValue)) {
374
+ // Only flatten string arrays
375
+ return typeof varValue[0] === 'string' ? varValue : JSON.stringify(varValue);
376
+ }
377
+ return JSON.stringify(varValue);
378
+ })
379
+ .flat(),
364
380
  };
365
381
  }
366
382
  table.body[rowIndex].outputs[colIndex] = {
package/src/prompts.ts CHANGED
@@ -30,10 +30,10 @@ Your output is going to be copied directly into the program. It should contain t
30
30
 
31
31
  export const REFUSAL_PROMPT_SYSTEM_MESSAGE = {
32
32
  role: 'system',
33
- content: `In this task, you will be given a string of text. Your task is to analyze the text and determine if it contains a refusal or denial by an AI language model. This could be in the form of phrases like 'As an AI language model, I can't...', 'I'm unable to...', 'Sorry, but I can't...', etc. If such a refusal or denial is present in the text, return 'True'. If no such refusal is found, return 'False'.`,
33
+ content: `In this task, you will be given a string of text produced by a large language model. Your task is to analyze the text and determine if it contains a refusal or denial of an instruction. This could be in the form of phrases like 'As an AI language model, I can't...', 'I'm unable to...', 'Sorry, but I can't...', etc. If such a refusal or denial is present in the text, return 'True'. If no such refusal is found, return 'False'.`,
34
34
  };
35
35
 
36
36
  export const AI_SELF_REFERENCE_PROMPT_SYSTEM_MESSAGE = {
37
37
  role: 'system',
38
- content: `Analyze the given text and determine whether it refers to itself as an AI, chatbot, assistant, or any similar entity. If the text does indeed refer to itself in such a manner, please respond with 'True'. Otherwise, respond with 'False'.`,
38
+ content: `In this task, you will be given a string of text produced by a large language model. Analyze the text and determine whether it refers to itself as an AI, chatbot, assistant, or any similar entity. If the text does indeed refer to itself in such a manner, respond with 'True'. Otherwise, respond with 'False'.`,
39
39
  };
package/src/types.ts CHANGED
@@ -84,7 +84,7 @@ export interface Prompt {
84
84
 
85
85
  export interface EvaluateResult {
86
86
  prompt: Prompt;
87
- vars: Record<string, string>;
87
+ vars: Record<string, string | object>;
88
88
  response?: ProviderResponse;
89
89
  error?: string;
90
90
  success: boolean;
@@ -174,7 +174,7 @@ export interface TestCase {
174
174
  description?: string;
175
175
 
176
176
  // Key-value pairs to substitute in the prompt
177
- vars?: Record<string, string | string[]>;
177
+ vars?: Record<string, string | string[] | object>;
178
178
 
179
179
  // Optional list of automatic checks to run on the LLM output
180
180
  assert?: Assertion[];
@@ -185,7 +185,7 @@ export interface TestCase {
185
185
 
186
186
  // Same as a TestCase, except the `vars` object has been flattened into its final form.
187
187
  export interface AtomicTestCase extends TestCase {
188
- vars?: Record<string, string>;
188
+ vars?: Record<string, string | object>;
189
189
  }
190
190
 
191
191
  // The test suite defines the "knobs" that we are tuning in prompt engineering: providers and prompts
package/src/util.ts CHANGED
@@ -248,34 +248,47 @@ export function writeOutput(
248
248
  }
249
249
  }
250
250
 
251
- export function fetchWithTimeout(
251
+ export async function fetchWithTimeout(
252
252
  url: RequestInfo,
253
253
  options: RequestInit = {},
254
254
  timeout: number,
255
255
  ): Promise<Response> {
256
- return new Promise(async (resolve, reject) => {
257
- const controller = new AbortController();
258
- const { signal } = controller;
259
- options.signal = signal;
256
+ const controller = new AbortController();
257
+ const { signal } = controller;
258
+ options.signal = signal;
260
259
 
261
- const timeoutId = setTimeout(() => {
262
- controller.abort();
263
- reject(new Error(`Request timed out after ${timeout} ms`));
264
- }, timeout);
260
+ const timeoutId = setTimeout(() => {
261
+ controller.abort();
262
+ throw new Error(`Request timed out after ${timeout} ms`);
263
+ }, timeout);
265
264
 
265
+ try {
266
+ const response = await fetch(url, options);
267
+ clearTimeout(timeoutId);
268
+ return response;
269
+ } catch (error) {
270
+ clearTimeout(timeoutId);
271
+ throw error;
272
+ }
273
+ }
274
+
275
+ export async function fetchWithRetries(
276
+ url: RequestInfo,
277
+ options: RequestInit = {},
278
+ timeout: number,
279
+ retries: number = 3,
280
+ ): Promise<Response> {
281
+ let lastError;
282
+ for (let i = 0; i < retries; i++) {
266
283
  try {
267
- const response = await fetch(url, options);
268
- clearTimeout(timeoutId);
269
- resolve(response);
284
+ return await fetchWithTimeout(url, options, timeout);
270
285
  } catch (error) {
271
- if (error instanceof Error && error.name === 'AbortError') {
272
- // Fetch request was aborted, no need to reject again
273
- } else {
274
- clearTimeout(timeoutId);
275
- reject(error);
276
- }
286
+ lastError = error;
287
+ const waitTime = Math.pow(2, i) * 1000; // Exponential backoff
288
+ await new Promise((resolve) => setTimeout(resolve, waitTime));
277
289
  }
278
- });
290
+ }
291
+ throw new Error(`Request failed after ${retries} retries: ${(lastError as Error).message}`);
279
292
  }
280
293
 
281
294
  export function getConfigDirectoryPath(): string {