promptfoo 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +5 -5
  2. package/dist/assertions.d.ts +1 -1
  3. package/dist/assertions.d.ts.map +1 -1
  4. package/dist/assertions.js +10 -10
  5. package/dist/assertions.js.map +1 -1
  6. package/dist/cache.js +9 -9
  7. package/dist/cache.js.map +1 -1
  8. package/dist/evaluator.d.ts +1 -1
  9. package/dist/evaluator.d.ts.map +1 -1
  10. package/dist/evaluator.js +29 -22
  11. package/dist/evaluator.js.map +1 -1
  12. package/dist/index.d.ts +10 -10
  13. package/dist/index.d.ts.map +1 -1
  14. package/dist/index.js +18 -14
  15. package/dist/index.js.map +1 -1
  16. package/dist/main.js +41 -40
  17. package/dist/main.js.map +1 -1
  18. package/dist/providers/localai.js +11 -11
  19. package/dist/providers/localai.js.map +1 -1
  20. package/dist/providers/openai.d.ts.map +1 -1
  21. package/dist/providers/openai.js +30 -21
  22. package/dist/providers/openai.js.map +1 -1
  23. package/dist/providers.d.ts +3 -3
  24. package/dist/providers.d.ts.map +1 -1
  25. package/dist/providers.js +15 -15
  26. package/dist/providers.js.map +1 -1
  27. package/dist/types.d.ts +1 -1
  28. package/dist/util.d.ts +2 -2
  29. package/dist/util.d.ts.map +1 -1
  30. package/dist/util.js +43 -15
  31. package/dist/util.js.map +1 -1
  32. package/dist/web/client/assets/index-9a9ba400.css +1 -0
  33. package/dist/web/client/assets/{index-8751749f.js → index-b72d3ca9.js} +12 -12
  34. package/dist/web/client/index.html +2 -2
  35. package/dist/web/server.js +9 -9
  36. package/dist/web/server.js.map +1 -1
  37. package/package.json +1 -1
  38. package/src/assertions.ts +5 -5
  39. package/src/cache.ts +2 -2
  40. package/src/evaluator.ts +24 -17
  41. package/src/index.ts +13 -8
  42. package/src/main.ts +10 -9
  43. package/src/providers/localai.ts +3 -3
  44. package/src/providers/openai.ts +16 -8
  45. package/src/providers.ts +3 -3
  46. package/src/types.ts +1 -1
  47. package/src/util.ts +42 -14
  48. package/src/web/client/package-lock.json +5729 -0
  49. package/src/web/client/src/ResultsTable.css +19 -0
  50. package/src/web/client/src/ResultsTable.tsx +51 -37
  51. package/src/web/client/src/ResultsView.tsx +7 -7
  52. package/src/web/server.ts +3 -3
  53. package/dist/web/client/assets/index-207192fc.css +0 -1
@@ -5,8 +5,8 @@
5
5
  <link rel="icon" type="image/svg+xml" href="favicon.ico" />
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
7
  <title>promptfoo web viewer</title>
8
- <script type="module" crossorigin src="/assets/index-8751749f.js"></script>
9
- <link rel="stylesheet" href="/assets/index-207192fc.css">
8
+ <script type="module" crossorigin src="/assets/index-b72d3ca9.js"></script>
9
+ <link rel="stylesheet" href="/assets/index-9a9ba400.css">
10
10
  </head>
11
11
  <body>
12
12
  <div id="root"></div>
@@ -13,12 +13,12 @@ const express_1 = __importDefault(require("express"));
13
13
  const cors_1 = __importDefault(require("cors"));
14
14
  const opener_1 = __importDefault(require("opener"));
15
15
  const socket_io_1 = require("socket.io");
16
- const logger_js_1 = __importDefault(require("../logger.js"));
17
- const esm_js_1 = require("../esm.js");
18
- const util_js_1 = require("../util.js");
16
+ const logger_1 = __importDefault(require("../logger"));
17
+ const esm_1 = require("../esm");
18
+ const util_1 = require("../util");
19
19
  function init(port = 15500) {
20
20
  const app = (0, express_1.default)();
21
- const staticDir = node_path_1.default.join((0, esm_js_1.getDirectory)(), 'web', 'client');
21
+ const staticDir = node_path_1.default.join((0, esm_1.getDirectory)(), 'web', 'client');
22
22
  app.use((0, cors_1.default)());
23
23
  app.use(express_1.default.json());
24
24
  app.use(express_1.default.static(staticDir));
@@ -28,7 +28,7 @@ function init(port = 15500) {
28
28
  origin: '*',
29
29
  },
30
30
  });
31
- const latestJsonPath = (0, util_js_1.getLatestResultsPath)();
31
+ const latestJsonPath = (0, util_1.getLatestResultsPath)();
32
32
  const readLatestJson = () => {
33
33
  const data = fs_1.default.readFileSync(latestJsonPath, 'utf8');
34
34
  const jsonData = JSON.parse(data);
@@ -46,7 +46,7 @@ function init(port = 15500) {
46
46
  });
47
47
  httpServer.listen(port, () => {
48
48
  const url = `http://localhost:${port}`;
49
- logger_js_1.default.info(`Server listening at ${url}`);
49
+ logger_1.default.info(`Server listening at ${url}`);
50
50
  const rl = node_readline_1.default.createInterface({
51
51
  input: process.stdin,
52
52
  output: process.stdout,
@@ -55,14 +55,14 @@ function init(port = 15500) {
55
55
  if (answer.toLowerCase().startsWith('y')) {
56
56
  try {
57
57
  await (0, opener_1.default)(url);
58
- logger_js_1.default.info(`Opening browser to: ${url}`);
58
+ logger_1.default.info(`Opening browser to: ${url}`);
59
59
  }
60
60
  catch (err) {
61
- logger_js_1.default.error(`Failed to open browser: ${String(err)}`);
61
+ logger_1.default.error(`Failed to open browser: ${String(err)}`);
62
62
  }
63
63
  }
64
64
  rl.close();
65
- logger_js_1.default.info('Press Ctrl+C to stop the server');
65
+ logger_1.default.info('Press Ctrl+C to stop the server');
66
66
  });
67
67
  });
68
68
  }
@@ -1 +1 @@
1
- {"version":3,"file":"server.js","sourceRoot":"","sources":["../../src/web/server.ts"],"names":[],"mappings":";;;;;;AAAA,4CAAoB;AACpB,0DAA6B;AAC7B,kEAAqC;AACrC,0DAA6B;AAE7B,wDAAgC;AAChC,sDAA8B;AAC9B,gDAAwB;AACxB,oDAA4B;AAC5B,yCAAqD;AAGrD,6DAAkC;AAClC,sCAAyC;AACzC,wCAAkD;AAIlD,SAAgB,IAAI,CAAC,IAAI,GAAG,KAAK;IAC/B,MAAM,GAAG,GAAG,IAAA,iBAAO,GAAE,CAAC;IAEtB,MAAM,SAAS,GAAG,mBAAI,CAAC,IAAI,CAAC,IAAA,qBAAY,GAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;IAE7D,GAAG,CAAC,GAAG,CAAC,IAAA,cAAI,GAAE,CAAC,CAAC;IAChB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACxB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC;IAEnC,MAAM,UAAU,GAAG,mBAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,IAAI,kBAAc,CAAC,UAAU,EAAE;QACxC,IAAI,EAAE;YACJ,MAAM,EAAE,GAAG;SACZ;KACF,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAA,8BAAoB,GAAE,CAAC;IAC9C,MAAM,cAAc,GAAG,GAAG,EAAE;QAC1B,MAAM,IAAI,GAAG,YAAE,CAAC,YAAY,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO,QAAQ,CAAC,KAAK,CAAC;IACxB,CAAC,CAAC;IAEF,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE;QAC7B,qDAAqD;QACrD,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,KAAK,EAAE,cAAc,EAAE,EAAE,CAAC,CAAC;QAEjD,6DAA6D;QAC7D,YAAE,CAAC,KAAK,CACN,cAAc,EACd,IAAA,kBAAQ,EAAC,CAAC,KAAa,EAAE,EAAE;YACzB,IAAI,KAAK,KAAK,QAAQ,EAAE;gBACtB,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,cAAc,EAAE,EAAE,CAAC,CAAC;aACpD;QACH,CAAC,EAAE,GAAG,CAAC,CACR,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;QAC3B,MAAM,GAAG,GAAG,oBAAoB,IAAI,EAAE,CAAC;QACvC,mBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;QAE1C,MAAM,EAAE,GAAG,uBAAQ,CAAC,eAAe,CAAC;YAClC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,EAAE,CAAC,QAAQ,CAAC,qDAAqD,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;YAClF,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;gBACxC,IAAI;oBACF,MAAM,IAAA,gBAAM,EAAC,GAAG,CAAC,CAAC;oBAClB,mBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;iBAC3C;gBAAC,OAAO,GAAG,EAAE;oBACZ,mBAAM,CAAC,KAAK,CAAC,2BAA2B,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;iBACxD;aACF;YACD,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,mBAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AA3DD,oBA2DC"}
1
+ {"version":3,"file":"server.js","sourceRoot":"","sources":["../../src/web/server.ts"],"names":[],"mappings":";;;;;;AAAA,4CAAoB;AACpB,0DAA6B;AAC7B,kEAAqC;AACrC,0DAA6B;AAE7B,wDAAgC;AAChC,sDAA8B;AAC9B,gDAAwB;AACxB,oDAA4B;AAC5B,yCAAqD;AAGrD,uDAA+B;AAC/B,gCAAsC;AACtC,kCAA+C;AAI/C,SAAgB,IAAI,CAAC,IAAI,GAAG,KAAK;IAC/B,MAAM,GAAG,GAAG,IAAA,iBAAO,GAAE,CAAC;IAEtB,MAAM,SAAS,GAAG,mBAAI,CAAC,IAAI,CAAC,IAAA,kBAAY,GAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;IAE7D,GAAG,CAAC,GAAG,CAAC,IAAA,cAAI,GAAE,CAAC,CAAC;IAChB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACxB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC;IAEnC,MAAM,UAAU,GAAG,mBAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,IAAI,kBAAc,CAAC,UAAU,EAAE;QACxC,IAAI,EAAE;YACJ,MAAM,EAAE,GAAG;SACZ;KACF,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAA,2BAAoB,GAAE,CAAC;IAC9C,MAAM,cAAc,GAAG,GAAG,EAAE;QAC1B,MAAM,IAAI,GAAG,YAAE,CAAC,YAAY,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO,QAAQ,CAAC,KAAK,CAAC;IACxB,CAAC,CAAC;IAEF,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE;QAC7B,qDAAqD;QACrD,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,KAAK,EAAE,cAAc,EAAE,EAAE,CAAC,CAAC;QAEjD,6DAA6D;QAC7D,YAAE,CAAC,KAAK,CACN,cAAc,EACd,IAAA,kBAAQ,EAAC,CAAC,KAAa,EAAE,EAAE;YACzB,IAAI,KAAK,KAAK,QAAQ,EAAE;gBACtB,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,cAAc,EAAE,EAAE,CAAC,CAAC;aACpD;QACH,CAAC,EAAE,GAAG,CAAC,CACR,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;QAC3B,MAAM,GAAG,GAAG,oBAAoB,IAAI,EAAE,CAAC;QACvC,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;QAE1C,MAAM,EAAE,GAAG,uBAAQ,CAAC,eAAe,CAAC;YAClC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,EAAE,CAAC,QAAQ,CAAC,qDAAqD,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;YAClF,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;gBACxC,IAAI;oBACF,MAAM,IAAA,gBAAM,EAAC,GAAG,CAAC,CAAC;oBAClB,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;iBAC3C;gBAAC,OAAO,GAAG,EAAE;oBACZ,gBAAM,CAAC,KAAK,CAAC,2BAA2B,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;iBACxD;aACF;YACD,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,gBAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AA3DD,oBA2DC"}
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "promptfoo",
3
3
  "description": "Prompt engineering toolkit",
4
4
  "author": "Ian Webster",
5
- "version": "0.9.0",
5
+ "version": "0.10.0",
6
6
  "license": "MIT",
7
7
  "type": "commonjs",
8
8
  "main": "dist/index.js",
package/src/assertions.ts CHANGED
@@ -1,12 +1,12 @@
1
1
  import invariant from 'tiny-invariant';
2
2
  import nunjucks from 'nunjucks';
3
3
 
4
- import { DefaultEmbeddingProvider, DefaultGradingProvider } from './providers/openai.js';
5
- import { cosineSimilarity } from './util.js';
6
- import { loadApiProvider } from './providers.js';
7
- import { DEFAULT_GRADING_PROMPT } from './prompts.js';
4
+ import { DefaultEmbeddingProvider, DefaultGradingProvider } from './providers/openai';
5
+ import { cosineSimilarity } from './util';
6
+ import { loadApiProvider } from './providers';
7
+ import { DEFAULT_GRADING_PROMPT } from './prompts';
8
8
 
9
- import type { Assertion, GradingConfig, TestCase, GradingResult, AtomicTestCase } from './types.js';
9
+ import type { Assertion, GradingConfig, TestCase, GradingResult, AtomicTestCase } from './types';
10
10
 
11
11
  const SIMILAR_REGEX = /similar(?::|\((\d+(\.\d+)?)\):)/;
12
12
 
package/src/cache.ts CHANGED
@@ -4,8 +4,8 @@ import path from 'node:path';
4
4
  import cacheManager from 'cache-manager';
5
5
  import fsStore from 'cache-manager-fs-hash';
6
6
 
7
- import logger from './logger.js';
8
- import { getConfigDirectoryPath, fetchWithTimeout } from './util.js';
7
+ import logger from './logger';
8
+ import { getConfigDirectoryPath, fetchWithTimeout } from './util';
9
9
 
10
10
  import type { Cache } from 'cache-manager';
11
11
  import type { RequestInfo, RequestInit } from 'node-fetch';
package/src/evaluator.ts CHANGED
@@ -4,8 +4,8 @@ import async from 'async';
4
4
  import chalk from 'chalk';
5
5
  import nunjucks from 'nunjucks';
6
6
 
7
- import logger from './logger.js';
8
- import { runAssertions } from './assertions.js';
7
+ import logger from './logger';
8
+ import { runAssertions } from './assertions';
9
9
 
10
10
  import type { SingleBar } from 'cli-progress';
11
11
  import type {
@@ -19,12 +19,12 @@ import type {
19
19
  Prompt,
20
20
  TestCase,
21
21
  AtomicTestCase,
22
- } from './types.js';
23
- import { generatePrompts } from './suggestions.js';
22
+ } from './types';
23
+ import { generatePrompts } from './suggestions';
24
24
 
25
25
  interface RunEvalOptions {
26
26
  provider: ApiProvider;
27
- prompt: string;
27
+ prompt: Prompt;
28
28
 
29
29
  test: AtomicTestCase;
30
30
 
@@ -86,10 +86,13 @@ class Evaluator {
86
86
  includeProviderId,
87
87
  }: RunEvalOptions): Promise<EvaluateResult> {
88
88
  const vars = test.vars || {};
89
- const renderedPrompt = nunjucks.renderString(prompt, vars);
89
+ const renderedPrompt = nunjucks.renderString(prompt.raw, vars);
90
90
 
91
91
  // Note that we're using original prompt, not renderedPrompt
92
- const promptDisplay = includeProviderId ? `[${provider.id()}] ${prompt}` : prompt;
92
+ let promptDisplay = prompt.display;
93
+ if (includeProviderId) {
94
+ promptDisplay = `[${provider.id()}] ${promptDisplay}`;
95
+ }
93
96
 
94
97
  const setup = {
95
98
  prompt: {
@@ -155,7 +158,7 @@ class Evaluator {
155
158
  if (options.generateSuggestions) {
156
159
  // TODO(ian): Move this into its own command/file
157
160
  logger.info(`Generating prompt variations...`);
158
- const { prompts: newPrompts, error } = await generatePrompts(testSuite.prompts[0], 1);
161
+ const { prompts: newPrompts, error } = await generatePrompts(testSuite.prompts[0].raw, 1);
159
162
  if (error || !newPrompts) {
160
163
  throw new Error(`Failed to generate prompts: ${error}`);
161
164
  }
@@ -178,7 +181,7 @@ class Evaluator {
178
181
  async (answer) => {
179
182
  rl.close();
180
183
  if (answer.toLowerCase().startsWith('y')) {
181
- testSuite.prompts.push(prompt);
184
+ testSuite.prompts.push({ raw: prompt, display: prompt });
182
185
  numAdded++;
183
186
  } else {
184
187
  logger.info('Skipping this prompt.');
@@ -196,13 +199,13 @@ class Evaluator {
196
199
  }
197
200
 
198
201
  // Split prompts by provider
199
- for (const promptContent of testSuite.prompts) {
202
+ for (const prompt of testSuite.prompts) {
200
203
  for (const provider of testSuite.providers) {
201
- const display =
202
- testSuite.providers.length > 1 ? `[${provider.id()}] ${promptContent}` : promptContent;
204
+ const updatedDisplay =
205
+ testSuite.providers.length > 1 ? `[${provider.id()}] ${prompt.display}` : prompt.display;
203
206
  prompts.push({
204
- raw: promptContent,
205
- display,
207
+ ...prompt,
208
+ display: updatedDisplay,
206
209
  });
207
210
  }
208
211
  }
@@ -248,6 +251,7 @@ class Evaluator {
248
251
  // And progress bar...
249
252
  let progressbar: SingleBar | undefined;
250
253
  if (options.showProgressBar) {
254
+ // FIXME(ian): Add var combinations too
251
255
  const totalNumRuns =
252
256
  testSuite.prompts.length * testSuite.providers.length * (tests.length || 1);
253
257
  const cliProgress = await import('cli-progress');
@@ -284,11 +288,14 @@ class Evaluator {
284
288
  const varCombinations = generateVarCombinations(testCase.vars || {});
285
289
  for (const vars of varCombinations) {
286
290
  let colIndex = 0;
287
- for (const promptContent of testSuite.prompts) {
291
+ for (const prompt of testSuite.prompts) {
288
292
  for (const provider of testSuite.providers) {
289
293
  runEvalOptions.push({
290
294
  provider,
291
- prompt: prependToPrompt + promptContent + appendToPrompt,
295
+ prompt: {
296
+ ...prompt,
297
+ raw: prependToPrompt + prompt.raw + appendToPrompt,
298
+ },
292
299
  test: { ...testCase, vars },
293
300
  includeProviderId: testSuite.providers.length > 1,
294
301
  rowIndex,
@@ -314,7 +321,7 @@ class Evaluator {
314
321
  if (progressbar) {
315
322
  progressbar.increment({
316
323
  provider: options.provider.id(),
317
- prompt: options.prompt.slice(0, 10),
324
+ prompt: options.prompt.raw.slice(0, 10),
318
325
  vars: Object.entries(options.test.vars || {})
319
326
  .map(([k, v]) => `${k}=${v}`)
320
327
  .join(' ')
package/src/index.ts CHANGED
@@ -1,12 +1,12 @@
1
- import { evaluate as doEvaluate } from './evaluator.js';
2
- import { loadApiProviders } from './providers.js';
3
- import assertions from './assertions.js';
4
- import providers from './providers.js';
1
+ import { evaluate as doEvaluate } from './evaluator';
2
+ import { loadApiProviders } from './providers';
3
+ import assertions from './assertions';
4
+ import providers from './providers';
5
5
 
6
- import type { EvaluateOptions, TestSuite, TestSuiteConfig } from './types.js';
7
- import { readTests } from './util.js';
6
+ import type { EvaluateOptions, TestSuite, TestSuiteConfig } from './types';
7
+ import { readTests } from './util';
8
8
 
9
- export * from './types.js';
9
+ export * from './types';
10
10
 
11
11
  interface EvaluateTestSuite extends TestSuiteConfig {
12
12
  prompts: string[];
@@ -15,9 +15,14 @@ interface EvaluateTestSuite extends TestSuiteConfig {
15
15
  async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions = {}) {
16
16
  const constructedTestSuite: TestSuite = {
17
17
  ...testSuite,
18
- prompts: testSuite.prompts, // raw prompts expected
19
18
  providers: await loadApiProviders(testSuite.providers),
20
19
  tests: await readTests(testSuite.tests),
20
+
21
+ // Full prompts expected (not filepaths)
22
+ prompts: testSuite.prompts.map((promptContent) => ({
23
+ raw: promptContent,
24
+ display: promptContent,
25
+ })),
21
26
  };
22
27
  return doEvaluate(constructedTestSuite, options);
23
28
  }
package/src/main.ts CHANGED
@@ -6,9 +6,9 @@ import Table from 'cli-table3';
6
6
  import chalk from 'chalk';
7
7
  import { Command } from 'commander';
8
8
 
9
- import logger, { setLogLevel } from './logger.js';
10
- import { loadApiProvider, loadApiProviders } from './providers.js';
11
- import { evaluate } from './evaluator.js';
9
+ import logger, { setLogLevel } from './logger';
10
+ import { loadApiProvider, loadApiProviders } from './providers';
11
+ import { evaluate } from './evaluator';
12
12
  import {
13
13
  maybeReadConfig,
14
14
  readConfig,
@@ -16,10 +16,10 @@ import {
16
16
  readTests,
17
17
  writeLatestResults,
18
18
  writeOutput,
19
- } from './util.js';
20
- import { getDirectory } from './esm.js';
21
- import { init } from './web/server.js';
22
- import { disableCache } from './cache.js';
19
+ } from './util';
20
+ import { getDirectory } from './esm';
21
+ import { init } from './web/server';
22
+ import { disableCache } from './cache';
23
23
 
24
24
  import type {
25
25
  CommandLineOptions,
@@ -27,8 +27,8 @@ import type {
27
27
  TestCase,
28
28
  TestSuite,
29
29
  UnifiedConfig,
30
- } from './types.js';
31
- import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding.js';
30
+ } from './types';
31
+ import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
32
32
 
33
33
  function createDummyFiles(directory: string | null) {
34
34
  if (directory) {
@@ -178,6 +178,7 @@ async function main() {
178
178
  prompts: cmdObj.prompts || config.prompts,
179
179
  providers: cmdObj.providers || config.providers,
180
180
  tests: cmdObj.tests || cmdObj.vars || config.tests,
181
+ defaultTest: config.defaultTest,
181
182
  };
182
183
  }
183
184
 
@@ -1,6 +1,6 @@
1
- import logger from '../logger.js';
2
- import { fetchJsonWithCache } from '../cache.js';
3
- import { REQUEST_TIMEOUT_MS } from './shared.js';
1
+ import logger from '../logger';
2
+ import { fetchJsonWithCache } from '../cache';
3
+ import { REQUEST_TIMEOUT_MS } from './shared';
4
4
 
5
5
  import type { ApiProvider, ProviderResponse } from '../types.js';
6
6
 
@@ -1,6 +1,6 @@
1
- import logger from '../logger.js';
2
- import { fetchJsonWithCache } from '../cache.js';
3
- import { REQUEST_TIMEOUT_MS } from './shared.js';
1
+ import logger from '../logger';
2
+ import { fetchJsonWithCache } from '../cache';
3
+ import { REQUEST_TIMEOUT_MS } from './shared';
4
4
 
5
5
  import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '../types.js';
6
6
 
@@ -126,12 +126,20 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
126
126
  );
127
127
  }
128
128
 
129
+ let stop: string;
130
+ try {
131
+ stop = process.env.OPENAI_STOP
132
+ ? JSON.parse(process.env.OPENAI_STOP)
133
+ : ['<|im_end|>', '<|endoftext|>'];
134
+ } catch (err) {
135
+ throw new Error(`OPENAI_STOP is not a valid JSON string: ${err}`);
136
+ }
129
137
  const body = {
130
138
  model: this.modelName,
131
139
  prompt,
132
- max_tokens: process.env.OPENAI_MAX_TOKENS || 1024,
133
- temperature: options?.temperature ?? (process.env.OPENAI_MAX_TEMPERATURE || 0),
134
- stop: process.env.OPENAI_STOP ? JSON.parse(process.env.OPENAI_STOP) : undefined,
140
+ max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
141
+ temperature: options?.temperature ?? parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
142
+ stop,
135
143
  };
136
144
  logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
137
145
  let data,
@@ -210,8 +218,8 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
210
218
  const body = {
211
219
  model: this.modelName,
212
220
  messages: messages,
213
- max_tokens: process.env.OPENAI_MAX_TOKENS || 1024,
214
- temperature: options?.temperature ?? (process.env.OPENAI_MAX_TEMPERATURE || 0),
221
+ max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
222
+ temperature: options?.temperature ?? parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
215
223
  };
216
224
  logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
217
225
 
package/src/providers.ts CHANGED
@@ -1,9 +1,9 @@
1
1
  import path from 'node:path';
2
2
 
3
- import { ApiProvider } from './types.js';
3
+ import { ApiProvider } from './types';
4
4
 
5
- import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai.js';
6
- import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai.js';
5
+ import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai';
6
+ import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai';
7
7
 
8
8
  export async function loadApiProviders(providerPaths: string | string[]): Promise<ApiProvider[]> {
9
9
  if (typeof providerPaths === 'string') {
package/src/types.ts CHANGED
@@ -157,7 +157,7 @@ export interface TestSuite {
157
157
  providers: ApiProvider[];
158
158
 
159
159
  // One or more prompt strings
160
- prompts: string[];
160
+ prompts: Prompt[];
161
161
 
162
162
  // Test cases
163
163
  tests?: TestCase[];
package/src/util.ts CHANGED
@@ -10,13 +10,13 @@ import { parse as parsePath } from 'path';
10
10
  import { parse as parseCsv } from 'csv-parse/sync';
11
11
  import { stringify } from 'csv-stringify/sync';
12
12
 
13
- import logger from './logger.js';
14
- import { getDirectory } from './esm.js';
13
+ import logger from './logger';
14
+ import { getDirectory } from './esm';
15
15
 
16
16
  import type { RequestInfo, RequestInit, Response } from 'node-fetch';
17
17
 
18
- import type { Assertion, CsvRow, EvaluateSummary, UnifiedConfig, TestCase } from './types.js';
19
- import { assertionFromString } from './assertions.js';
18
+ import type { Assertion, CsvRow, EvaluateSummary, UnifiedConfig, TestCase, Prompt } from './types';
19
+ import { assertionFromString } from './assertions';
20
20
 
21
21
  const PROMPT_DELIMITER = '---';
22
22
 
@@ -44,17 +44,36 @@ export function readConfig(configPath: string): UnifiedConfig {
44
44
  case '.js':
45
45
  return require(configPath) as UnifiedConfig;
46
46
  case '.yaml':
47
+ case '.yml':
47
48
  return yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
48
49
  default:
49
50
  throw new Error(`Unsupported configuration file format: ${ext}`);
50
51
  }
51
52
  }
52
53
 
53
- export function readPrompts(promptPathsOrGlobs: string | string[]): string[] {
54
- promptPathsOrGlobs =
55
- typeof promptPathsOrGlobs === 'string' ? [promptPathsOrGlobs] : promptPathsOrGlobs;
56
- const promptPaths = promptPathsOrGlobs.flatMap((pathOrGlob) => globSync(pathOrGlob));
57
- let promptContents: string[] = [];
54
+ enum PromptInputType {
55
+ STRING = 1,
56
+ ARRAY = 2,
57
+ NAMED = 3,
58
+ }
59
+
60
+ export function readPrompts(
61
+ promptPathOrGlobs: string | string[] | Record<string, string>,
62
+ ): Prompt[] {
63
+ let promptPaths: string[] = [];
64
+ let promptContents: Prompt[] = [];
65
+
66
+ let inputType: PromptInputType | undefined;
67
+ if (typeof promptPathOrGlobs === 'string') {
68
+ promptPaths = [promptPathOrGlobs];
69
+ inputType = PromptInputType.STRING;
70
+ } else if (Array.isArray(promptPathOrGlobs)) {
71
+ promptPaths = promptPathOrGlobs.flatMap((pathOrGlob) => globSync(pathOrGlob));
72
+ inputType = PromptInputType.ARRAY;
73
+ } else if (typeof promptPathOrGlobs === 'object') {
74
+ promptPaths = Object.keys(promptPathOrGlobs);
75
+ inputType = PromptInputType.NAMED;
76
+ }
58
77
 
59
78
  for (const promptPath of promptPaths) {
60
79
  const stat = fs.statSync(promptPath);
@@ -63,18 +82,27 @@ export function readPrompts(promptPathsOrGlobs: string | string[]): string[] {
63
82
  const fileContents = filesInDirectory.map((fileName) =>
64
83
  fs.readFileSync(path.join(promptPath, fileName), 'utf-8'),
65
84
  );
66
- promptContents.push(...fileContents);
85
+ promptContents.push(...fileContents.map((content) => ({ raw: content, display: content })));
67
86
  } else {
68
87
  const fileContent = fs.readFileSync(promptPath, 'utf-8');
69
- promptContents.push(fileContent);
88
+ let display;
89
+ if (inputType === PromptInputType.NAMED) {
90
+ display = (promptPathOrGlobs as Record<string, string>)[promptPath];
91
+ } else {
92
+ display = fileContent.length > 200 ? promptPath : fileContent;
93
+ }
94
+ promptContents.push({ raw: fileContent, display });
70
95
  }
71
96
  }
72
97
 
73
- if (promptContents.length === 1) {
74
- promptContents = promptContents[0].split(PROMPT_DELIMITER).map((p) => p.trim());
98
+ if (promptContents.length === 1 && inputType !== PromptInputType.NAMED) {
99
+ const content = promptContents[0].raw;
100
+ promptContents = content
101
+ .split(PROMPT_DELIMITER)
102
+ .map((p) => ({ raw: p.trim(), display: p.trim() }));
75
103
  }
76
104
  if (promptContents.length === 0) {
77
- throw new Error(`There are no prompts in ${promptPathsOrGlobs.join(', ')}`);
105
+ throw new Error(`There are no prompts in ${JSON.stringify(promptPathOrGlobs)}`);
78
106
  }
79
107
  return promptContents;
80
108
  }