promptfoo 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/dist/package.json +87 -0
  2. package/dist/src/__mocks__/esm.d.ts.map +1 -0
  3. package/dist/src/__mocks__/esm.js.map +1 -0
  4. package/dist/src/assertions.d.ts.map +1 -0
  5. package/dist/{assertions.js → src/assertions.js} +19 -20
  6. package/dist/src/assertions.js.map +1 -0
  7. package/dist/src/cache.d.ts.map +1 -0
  8. package/dist/{cache.js → src/cache.js} +1 -1
  9. package/dist/src/cache.js.map +1 -0
  10. package/dist/src/esm.d.ts.map +1 -0
  11. package/dist/src/esm.js.map +1 -0
  12. package/dist/src/evaluator.d.ts.map +1 -0
  13. package/dist/{evaluator.js → src/evaluator.js} +28 -25
  14. package/dist/src/evaluator.js.map +1 -0
  15. package/dist/{index.d.ts → src/index.d.ts} +4 -0
  16. package/dist/src/index.d.ts.map +1 -0
  17. package/dist/{index.js → src/index.js} +15 -11
  18. package/dist/src/index.js.map +1 -0
  19. package/dist/src/logger.d.ts +12 -0
  20. package/dist/src/logger.d.ts.map +1 -0
  21. package/dist/{logger.js → src/logger.js} +13 -5
  22. package/dist/src/logger.js.map +1 -0
  23. package/dist/src/main.d.ts.map +1 -0
  24. package/dist/{main.js → src/main.js} +30 -47
  25. package/dist/src/main.js.map +1 -0
  26. package/dist/{onboarding.d.ts → src/onboarding.d.ts} +1 -1
  27. package/dist/src/onboarding.d.ts.map +1 -0
  28. package/dist/{onboarding.js → src/onboarding.js} +6 -0
  29. package/dist/src/onboarding.js.map +1 -0
  30. package/dist/src/prompts.d.ts.map +1 -0
  31. package/dist/src/prompts.js.map +1 -0
  32. package/dist/src/providers/localai.d.ts.map +1 -0
  33. package/dist/src/providers/localai.js.map +1 -0
  34. package/dist/{providers → src/providers}/openai.d.ts +11 -3
  35. package/dist/src/providers/openai.d.ts.map +1 -0
  36. package/dist/{providers → src/providers}/openai.js +17 -5
  37. package/dist/src/providers/openai.js.map +1 -0
  38. package/dist/src/providers/shared.d.ts.map +1 -0
  39. package/dist/src/providers/shared.js.map +1 -0
  40. package/dist/{providers.d.ts → src/providers.d.ts} +3 -3
  41. package/dist/src/providers.d.ts.map +1 -0
  42. package/dist/{providers.js → src/providers.js} +16 -7
  43. package/dist/src/providers.js.map +1 -0
  44. package/dist/src/suggestions.d.ts.map +1 -0
  45. package/dist/src/suggestions.js.map +1 -0
  46. package/dist/src/table.d.ts +4 -0
  47. package/dist/src/table.d.ts.map +1 -0
  48. package/dist/src/table.js +48 -0
  49. package/dist/src/table.js.map +1 -0
  50. package/dist/src/telemetry.d.ts +10 -0
  51. package/dist/src/telemetry.d.ts.map +1 -0
  52. package/dist/src/telemetry.js +48 -0
  53. package/dist/src/telemetry.js.map +1 -0
  54. package/dist/{types.d.ts → src/types.d.ts} +7 -1
  55. package/dist/src/types.d.ts.map +1 -0
  56. package/dist/src/types.js.map +1 -0
  57. package/dist/src/updates.d.ts +3 -0
  58. package/dist/src/updates.d.ts.map +1 -0
  59. package/dist/src/updates.js +36 -0
  60. package/dist/src/updates.js.map +1 -0
  61. package/dist/{util.d.ts → src/util.d.ts} +1 -1
  62. package/dist/src/util.d.ts.map +1 -0
  63. package/dist/{util.js → src/util.js} +15 -2
  64. package/dist/src/util.js.map +1 -0
  65. package/dist/src/web/client/assets/index-0f6d6b29.js +199 -0
  66. package/dist/src/web/client/assets/js-yaml-8bbf9398.js +32 -0
  67. package/dist/{web → src/web}/client/index.html +1 -1
  68. package/dist/src/web/server.d.ts.map +1 -0
  69. package/dist/{web → src/web}/server.js +3 -4
  70. package/dist/src/web/server.js.map +1 -0
  71. package/package.json +11 -9
  72. package/src/assertions.ts +18 -23
  73. package/src/cache.ts +1 -1
  74. package/src/evaluator.ts +38 -34
  75. package/src/index.ts +10 -10
  76. package/src/logger.ts +13 -5
  77. package/src/main.ts +38 -48
  78. package/src/onboarding.ts +6 -0
  79. package/src/providers/openai.ts +32 -6
  80. package/src/providers.ts +32 -9
  81. package/src/table.ts +41 -0
  82. package/src/telemetry.ts +57 -0
  83. package/src/types.ts +10 -1
  84. package/src/updates.ts +37 -0
  85. package/src/util.ts +34 -4
  86. package/src/web/client/package.json +1 -0
  87. package/src/web/client/src/App.tsx +36 -12
  88. package/src/web/client/src/ConfigModal.tsx +81 -0
  89. package/src/web/client/src/ResultsView.tsx +76 -3
  90. package/src/web/client/src/ShareModal.tsx +70 -0
  91. package/src/web/client/src/store.ts +6 -1
  92. package/src/web/client/src/types.ts +2 -0
  93. package/src/web/server.ts +3 -7
  94. package/dist/__mocks__/esm.d.ts.map +0 -1
  95. package/dist/__mocks__/esm.js.map +0 -1
  96. package/dist/assertions.d.ts.map +0 -1
  97. package/dist/assertions.js.map +0 -1
  98. package/dist/cache.d.ts.map +0 -1
  99. package/dist/cache.js.map +0 -1
  100. package/dist/esm.d.ts.map +0 -1
  101. package/dist/esm.js.map +0 -1
  102. package/dist/evaluator.d.ts.map +0 -1
  103. package/dist/evaluator.js.map +0 -1
  104. package/dist/index.d.ts.map +0 -1
  105. package/dist/index.js.map +0 -1
  106. package/dist/logger.d.ts +0 -11
  107. package/dist/logger.d.ts.map +0 -1
  108. package/dist/logger.js.map +0 -1
  109. package/dist/main.d.ts.map +0 -1
  110. package/dist/main.js.map +0 -1
  111. package/dist/onboarding.d.ts.map +0 -1
  112. package/dist/onboarding.js.map +0 -1
  113. package/dist/prompts.d.ts.map +0 -1
  114. package/dist/prompts.js.map +0 -1
  115. package/dist/providers/localai.d.ts.map +0 -1
  116. package/dist/providers/localai.js.map +0 -1
  117. package/dist/providers/openai.d.ts.map +0 -1
  118. package/dist/providers/openai.js.map +0 -1
  119. package/dist/providers/shared.d.ts.map +0 -1
  120. package/dist/providers/shared.js.map +0 -1
  121. package/dist/providers.d.ts.map +0 -1
  122. package/dist/providers.js.map +0 -1
  123. package/dist/suggestions.d.ts.map +0 -1
  124. package/dist/suggestions.js.map +0 -1
  125. package/dist/types.d.ts.map +0 -1
  126. package/dist/types.js.map +0 -1
  127. package/dist/util.d.ts.map +0 -1
  128. package/dist/util.js.map +0 -1
  129. package/dist/web/client/assets/index-15dfcd18.js +0 -172
  130. package/dist/web/server.d.ts.map +0 -1
  131. package/dist/web/server.js.map +0 -1
  132. package/src/web/client/package-lock.json +0 -5729
  133. /package/dist/{__mocks__ → src/__mocks__}/esm.d.ts +0 -0
  134. /package/dist/{__mocks__ → src/__mocks__}/esm.js +0 -0
  135. /package/dist/{assertions.d.ts → src/assertions.d.ts} +0 -0
  136. /package/dist/{cache.d.ts → src/cache.d.ts} +0 -0
  137. /package/dist/{esm.d.ts → src/esm.d.ts} +0 -0
  138. /package/dist/{esm.js → src/esm.js} +0 -0
  139. /package/dist/{evaluator.d.ts → src/evaluator.d.ts} +0 -0
  140. /package/dist/{main.d.ts → src/main.d.ts} +0 -0
  141. /package/dist/{prompts.d.ts → src/prompts.d.ts} +0 -0
  142. /package/dist/{prompts.js → src/prompts.js} +0 -0
  143. /package/dist/{providers → src/providers}/localai.d.ts +0 -0
  144. /package/dist/{providers → src/providers}/localai.js +0 -0
  145. /package/dist/{providers → src/providers}/shared.d.ts +0 -0
  146. /package/dist/{providers → src/providers}/shared.js +0 -0
  147. /package/dist/{suggestions.d.ts → src/suggestions.d.ts} +0 -0
  148. /package/dist/{suggestions.js → src/suggestions.js} +0 -0
  149. /package/dist/{tableOutput.html → src/tableOutput.html} +0 -0
  150. /package/dist/{types.js → src/types.js} +0 -0
  151. /package/dist/{web → src/web}/client/assets/index-87905193.css +0 -0
  152. /package/dist/{web → src/web}/client/favicon.ico +0 -0
  153. /package/dist/{web → src/web}/client/logo.svg +0 -0
  154. /package/dist/{web → src/web}/server.d.ts +0 -0
package/src/main.ts CHANGED
@@ -2,11 +2,11 @@
2
2
  import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'fs';
3
3
  import { join as pathJoin } from 'path';
4
4
 
5
- import Table from 'cli-table3';
6
5
  import chalk from 'chalk';
7
6
  import { Command } from 'commander';
8
7
 
9
- import logger, { setLogLevel } from './logger';
8
+ import telemetry from './telemetry';
9
+ import logger, { getLogLevel, setLogLevel } from './logger';
10
10
  import { loadApiProvider, loadApiProviders } from './providers';
11
11
  import { evaluate } from './evaluator';
12
12
  import {
@@ -17,9 +17,11 @@ import {
17
17
  writeLatestResults,
18
18
  writeOutput,
19
19
  } from './util';
20
+ import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
21
+ import { disableCache } from './cache';
20
22
  import { getDirectory } from './esm';
21
23
  import { init } from './web/server';
22
- import { disableCache } from './cache';
24
+ import { checkForUpdates } from './updates';
23
25
 
24
26
  import type {
25
27
  CommandLineOptions,
@@ -28,7 +30,7 @@ import type {
28
30
  TestSuite,
29
31
  UnifiedConfig,
30
32
  } from './types';
31
- import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
33
+ import { generateTable } from './table';
32
34
 
33
35
  function createDummyFiles(directory: string | null) {
34
36
  if (directory) {
@@ -60,6 +62,8 @@ function createDummyFiles(directory: string | null) {
60
62
  }
61
63
 
62
64
  async function main() {
65
+ await checkForUpdates();
66
+
63
67
  const pwd = process.cwd();
64
68
  const potentialPaths = [
65
69
  pathJoin(pwd, 'promptfooconfig.js'),
@@ -95,15 +99,23 @@ async function main() {
95
99
  program
96
100
  .command('init [directory]')
97
101
  .description('Initialize project with dummy files')
98
- .action((directory: string | null) => {
102
+ .action(async (directory: string | null) => {
99
103
  createDummyFiles(directory);
104
+ telemetry.record('command_used', {
105
+ name: 'init',
106
+ });
107
+ await telemetry.send();
100
108
  });
101
109
 
102
110
  program
103
111
  .command('view')
104
112
  .description('Start browser ui')
105
113
  .option('-p, --port <number>', 'Port number', '15500')
106
- .action((cmdObj: { port: number } & Command) => {
114
+ .action(async (cmdObj: { port: number } & Command) => {
115
+ telemetry.record('command_used', {
116
+ name: 'view',
117
+ });
118
+ await telemetry.send();
107
119
  init(cmdObj.port);
108
120
  });
109
121
 
@@ -111,10 +123,9 @@ async function main() {
111
123
  .command('eval')
112
124
  .description('Evaluate prompts')
113
125
  .requiredOption('-p, --prompts <paths...>', 'Paths to prompt files (.txt)', config.prompts)
114
- .requiredOption(
126
+ .option(
115
127
  '-r, --providers <name or path...>',
116
128
  'One of: openai:chat, openai:completion, openai:<model name>, or path to custom API caller module',
117
- config?.providers,
118
129
  )
119
130
  .option(
120
131
  '-c, --config <path>',
@@ -231,7 +242,7 @@ async function main() {
231
242
  };
232
243
 
233
244
  const options: EvaluateOptions = {
234
- showProgressBar: true,
245
+ showProgressBar: getLogLevel() !== 'debug',
235
246
  maxConcurrency: !isNaN(maxConcurrency) && maxConcurrency > 0 ? maxConcurrency : undefined,
236
247
  ...evaluateOptions,
237
248
  };
@@ -249,43 +260,9 @@ async function main() {
249
260
  if (cmdObj.output) {
250
261
  logger.info(chalk.yellow(`Writing output to ${cmdObj.output}`));
251
262
  writeOutput(cmdObj.output, summary);
252
- } else {
263
+ } else if (getLogLevel() !== 'debug') {
253
264
  // Output table by default
254
- const maxWidth = process.stdout.columns ? process.stdout.columns - 10 : 120;
255
- const head = summary.table.head;
256
- const headLength = head.prompts.length + head.vars.length;
257
- const table = new Table({
258
- head: [...head.prompts, ...head.vars],
259
- colWidths: Array(headLength).fill(Math.floor(maxWidth / headLength)),
260
- wordWrap: true,
261
- wrapOnWordBoundary: false,
262
- style: {
263
- head: ['blue', 'bold'],
264
- },
265
- });
266
- // Skip first row (header) and add the rest. Color PASS/FAIL
267
- for (const row of summary.table.body.slice(0, 25)) {
268
- table.push([
269
- ...row.vars,
270
- ...row.outputs.map((col) => {
271
- const tableCellMaxLength = parseInt(cmdObj.tableCellMaxLength || '', 10);
272
- if (!isNaN(tableCellMaxLength) && col.length > tableCellMaxLength) {
273
- col = col.slice(0, tableCellMaxLength) + '...';
274
- }
275
- if (col.startsWith('[PASS]')) {
276
- // color '[PASS]' green
277
- return chalk.green.bold(col.slice(0, 6)) + col.slice(6);
278
- } else if (col.startsWith('[FAIL]')) {
279
- // color everything red up until '---'
280
- return col
281
- .split('---')
282
- .map((c, idx) => (idx === 0 ? chalk.red.bold(c) : c))
283
- .join('---');
284
- }
285
- return col;
286
- }),
287
- ]);
288
- }
265
+ const table = generateTable(summary, parseInt(cmdObj.tableCellMaxLength || '', 10));
289
266
 
290
267
  logger.info('\n' + table.toString());
291
268
  if (summary.table.body.length > 25) {
@@ -293,12 +270,20 @@ async function main() {
293
270
  logger.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
294
271
  }
295
272
  }
273
+
274
+ const border = '='.repeat(process.stdout.columns - 10);
275
+ logger.info(border);
296
276
  if (cmdObj.view || !cmdObj.write) {
297
- logger.info('Evaluation complete');
277
+ logger.info(`${chalk.green('✔')} Evaluation complete`);
298
278
  } else {
299
- writeLatestResults(summary);
300
- logger.info(`Evaluation complete. To use web viewer, run ${chalk.green('promptfoo view')}`);
279
+ writeLatestResults(summary, config);
280
+ logger.info(
281
+ `${chalk.green('✔')} Evaluation complete. To use web viewer, run ${chalk.green(
282
+ 'promptfoo view',
283
+ )}`,
284
+ );
301
285
  }
286
+ logger.info(border);
302
287
  logger.info(chalk.green.bold(`Successes: ${summary.stats.successes}`));
303
288
  logger.info(chalk.red.bold(`Failures: ${summary.stats.failures}`));
304
289
  logger.info(
@@ -306,6 +291,11 @@ async function main() {
306
291
  );
307
292
  logger.info('Done.');
308
293
 
294
+ telemetry.record('command_used', {
295
+ name: 'eval',
296
+ });
297
+ await telemetry.send();
298
+
309
299
  if (cmdObj.view) {
310
300
  init(parseInt(cmdObj.view, 10) || 15500);
311
301
  }
package/src/onboarding.ts CHANGED
@@ -9,6 +9,12 @@ These prompts are nunjucks templates, so you can use logic like this:
9
9
  {{ var1 }}
10
10
  {% endif %}
11
11
  ---
12
+ [
13
+ {"role": "system", "content": "Use JSON too for more complex payloads"},
14
+ {"role": "user", "content": "Such as multi-shot prompts"}
15
+ {"role": "user", "content": "Variable substitution still works: {{ var3 }}"}
16
+ ]
17
+ ---
12
18
  If you prefer, you can break prompts into multiple files (make sure to edit promptfooconfig.yaml accordingly)
13
19
  `;
14
20
 
@@ -7,7 +7,13 @@ import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '.
7
7
  const DEFAULT_OPENAI_HOST = 'api.openai.com';
8
8
 
9
9
  interface OpenAiCompletionOptions {
10
- temperature: number;
10
+ temperature?: number;
11
+ functions?: {
12
+ name: string;
13
+ description?: string;
14
+ parameters: any;
15
+ }[];
16
+ function_call?: 'none' | 'auto';
11
17
  }
12
18
 
13
19
  class OpenAiGenericProvider implements ApiProvider {
@@ -112,11 +118,14 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
112
118
  'text-ada-001',
113
119
  ];
114
120
 
115
- constructor(modelName: string, apiKey?: string) {
121
+ options: OpenAiCompletionOptions;
122
+
123
+ constructor(modelName: string, apiKey?: string, context?: OpenAiCompletionOptions) {
116
124
  if (!OpenAiCompletionProvider.OPENAI_COMPLETION_MODELS.includes(modelName)) {
117
125
  logger.warn(`Using unknown OpenAI completion model: ${modelName}`);
118
126
  }
119
127
  super(modelName, apiKey);
128
+ this.options = context || {};
120
129
  }
121
130
 
122
131
  async callApi(prompt: string, options?: OpenAiCompletionOptions): Promise<ProviderResponse> {
@@ -138,7 +147,10 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
138
147
  model: this.modelName,
139
148
  prompt,
140
149
  max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
141
- temperature: options?.temperature ?? parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
150
+ temperature:
151
+ options?.temperature ??
152
+ this.options.temperature ??
153
+ parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
142
154
  stop,
143
155
  };
144
156
  logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
@@ -186,17 +198,22 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
186
198
  static OPENAI_CHAT_MODELS = [
187
199
  'gpt-4',
188
200
  'gpt-4-0314',
201
+ 'gpt-4-0613',
189
202
  'gpt-4-32k',
190
203
  'gpt-4-32k-0314',
191
204
  'gpt-3.5-turbo',
192
205
  'gpt-3.5-turbo-0301',
206
+ 'gpt-3.5-turbo-0613',
193
207
  ];
194
208
 
195
- constructor(modelName: string, apiKey?: string) {
209
+ options: OpenAiCompletionOptions;
210
+
211
+ constructor(modelName: string, apiKey?: string, context?: OpenAiCompletionOptions) {
196
212
  if (!OpenAiChatCompletionProvider.OPENAI_CHAT_MODELS.includes(modelName)) {
197
213
  logger.warn(`Using unknown OpenAI chat model: ${modelName}`);
198
214
  }
199
215
  super(modelName, apiKey);
216
+ this.options = context || {};
200
217
  }
201
218
 
202
219
  // TODO(ian): support passing in `messages` directly
@@ -215,11 +232,17 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
215
232
  } catch (err) {
216
233
  messages = [{ role: 'user', content: prompt }];
217
234
  }
235
+
218
236
  const body = {
219
237
  model: this.modelName,
220
238
  messages: messages,
221
239
  max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
222
- temperature: options?.temperature ?? parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
240
+ temperature:
241
+ options?.temperature ??
242
+ this.options.temperature ??
243
+ parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
244
+ functions: options?.functions || this.options.functions || undefined,
245
+ function_call: options?.function_call || this.options.function_call || undefined,
223
246
  };
224
247
  logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
225
248
 
@@ -246,8 +269,11 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
246
269
 
247
270
  logger.debug(`\tOpenAI API response: ${JSON.stringify(data)}`);
248
271
  try {
272
+ const message = data.choices[0].message;
273
+ const output =
274
+ message.content === null ? JSON.stringify(message.function_call) : message.content;
249
275
  return {
250
- output: data.choices[0].message.content,
276
+ output,
251
277
  tokenUsage: cached
252
278
  ? { cached: data.usage.total_tokens }
253
279
  : {
package/src/providers.ts CHANGED
@@ -1,20 +1,35 @@
1
1
  import path from 'node:path';
2
2
 
3
- import { ApiProvider } from './types';
3
+ import { ApiProvider, ProviderConfig, ProviderId, RawProviderConfig } from './types';
4
4
 
5
5
  import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai';
6
6
  import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai';
7
7
 
8
- export async function loadApiProviders(providerPaths: string | string[]): Promise<ApiProvider[]> {
8
+ export async function loadApiProviders(
9
+ providerPaths: ProviderId | ProviderId[] | RawProviderConfig[],
10
+ ): Promise<ApiProvider[]> {
9
11
  if (typeof providerPaths === 'string') {
10
12
  return [await loadApiProvider(providerPaths)];
11
13
  } else if (Array.isArray(providerPaths)) {
12
- return Promise.all(providerPaths.map((provider) => loadApiProvider(provider)));
14
+ return Promise.all(
15
+ providerPaths.map((provider) => {
16
+ if (typeof provider === 'string') {
17
+ return loadApiProvider(provider);
18
+ } else {
19
+ const id = Object.keys(provider)[0];
20
+ const context = { ...provider[id], id };
21
+ return loadApiProvider(id, context);
22
+ }
23
+ }),
24
+ );
13
25
  }
14
26
  throw new Error('Invalid providers list');
15
27
  }
16
28
 
17
- export async function loadApiProvider(providerPath: string): Promise<ApiProvider> {
29
+ export async function loadApiProvider(
30
+ providerPath: string,
31
+ context: ProviderConfig | undefined = undefined,
32
+ ): Promise<ApiProvider> {
18
33
  if (providerPath?.startsWith('openai:')) {
19
34
  // Load OpenAI module
20
35
  const options = providerPath.split(':');
@@ -22,13 +37,21 @@ export async function loadApiProvider(providerPath: string): Promise<ApiProvider
22
37
  const modelName = options[2];
23
38
 
24
39
  if (modelType === 'chat') {
25
- return new OpenAiChatCompletionProvider(modelName || 'gpt-3.5-turbo');
40
+ return new OpenAiChatCompletionProvider(
41
+ modelName || 'gpt-3.5-turbo',
42
+ undefined,
43
+ context?.config,
44
+ );
26
45
  } else if (modelType === 'completion') {
27
- return new OpenAiCompletionProvider(modelName || 'text-davinci-003');
46
+ return new OpenAiCompletionProvider(
47
+ modelName || 'text-davinci-003',
48
+ undefined,
49
+ context?.config,
50
+ );
28
51
  } else if (OpenAiChatCompletionProvider.OPENAI_CHAT_MODELS.includes(modelType)) {
29
- return new OpenAiChatCompletionProvider(modelType);
52
+ return new OpenAiChatCompletionProvider(modelType, undefined, context?.config);
30
53
  } else if (OpenAiCompletionProvider.OPENAI_COMPLETION_MODELS.includes(modelType)) {
31
- return new OpenAiCompletionProvider(modelType);
54
+ return new OpenAiCompletionProvider(modelType, undefined, context?.config);
32
55
  } else {
33
56
  throw new Error(
34
57
  `Unknown OpenAI model type: ${modelType}. Use one of the following providers: openai:chat:<model name>, openai:completion:<model name>`,
@@ -52,7 +75,7 @@ export async function loadApiProvider(providerPath: string): Promise<ApiProvider
52
75
 
53
76
  // Load custom module
54
77
  const CustomApiProvider = (await import(path.join(process.cwd(), providerPath))).default;
55
- return new CustomApiProvider();
78
+ return new CustomApiProvider(context);
56
79
  }
57
80
 
58
81
  export default {
package/src/table.ts ADDED
@@ -0,0 +1,41 @@
1
+ import Table from 'cli-table3';
2
+ import chalk from 'chalk';
3
+ import type { EvaluateSummary } from './types';
4
+
5
+ export function generateTable(summary: EvaluateSummary, tableCellMaxLength = 250, maxRows = 25) {
6
+ const maxWidth = process.stdout.columns ? process.stdout.columns - 10 : 120;
7
+ const head = summary.table.head;
8
+ const headLength = head.prompts.length + head.vars.length;
9
+ const table = new Table({
10
+ head: [...head.prompts, ...head.vars],
11
+ colWidths: Array(headLength).fill(Math.floor(maxWidth / headLength)),
12
+ wordWrap: true,
13
+ wrapOnWordBoundary: false,
14
+ style: {
15
+ head: ['blue', 'bold'],
16
+ },
17
+ });
18
+ // Skip first row (header) and add the rest. Color PASS/FAIL
19
+ for (const row of summary.table.body.slice(0, maxRows)) {
20
+ table.push([
21
+ ...row.vars,
22
+ ...row.outputs.map((col) => {
23
+ if (col.length > tableCellMaxLength) {
24
+ col = col.slice(0, tableCellMaxLength) + '...';
25
+ }
26
+ if (col.startsWith('[PASS]')) {
27
+ // color '[PASS]' green
28
+ return chalk.green.bold(col.slice(0, 6)) + col.slice(6);
29
+ } else if (col.startsWith('[FAIL]')) {
30
+ // color everything red up until '---'
31
+ return col
32
+ .split('---')
33
+ .map((c, idx) => (idx === 0 ? chalk.red.bold(c) : c))
34
+ .join('---');
35
+ }
36
+ return col;
37
+ }),
38
+ ]);
39
+ }
40
+ return table;
41
+ }
@@ -0,0 +1,57 @@
1
+ import packageJson from '../package.json';
2
+ import { fetchWithTimeout } from './util';
3
+
4
+ type TelemetryEvent = {
5
+ event: string;
6
+ packageVersion: string;
7
+ properties: Record<string, string | number>;
8
+ };
9
+
10
+ type TelemetryEventTypes = 'eval_ran' | 'assertion_used' | 'command_used';
11
+
12
+ const TELEMETRY_ENDPOINT = 'https://api.promptfoo.dev/telemetry';
13
+
14
+ const TELEMETRY_TIMEOUT_MS = 1000;
15
+
16
+ export class Telemetry {
17
+ private events: TelemetryEvent[] = [];
18
+
19
+ get disabled() {
20
+ return process.env.PROMPTFOO_DISABLE_TELEMETRY === '1';
21
+ }
22
+
23
+ record(eventName: TelemetryEventTypes, properties: Record<string, string | number>): void {
24
+ if (!this.disabled) {
25
+ this.events.push({
26
+ event: eventName,
27
+ packageVersion: packageJson.version,
28
+ properties,
29
+ });
30
+ }
31
+ }
32
+
33
+ async send(): Promise<void> {
34
+ if (!this.disabled && this.events.length > 0) {
35
+ try {
36
+ const response = await fetchWithTimeout(
37
+ TELEMETRY_ENDPOINT,
38
+ {
39
+ method: 'POST',
40
+ headers: {
41
+ 'Content-Type': 'application/json',
42
+ },
43
+ body: JSON.stringify(this.events),
44
+ },
45
+ TELEMETRY_TIMEOUT_MS,
46
+ );
47
+
48
+ if (response.ok) {
49
+ this.events = [];
50
+ }
51
+ } catch (err) {}
52
+ }
53
+ }
54
+ }
55
+
56
+ const telemetry = new Telemetry();
57
+ export default telemetry;
package/src/types.ts CHANGED
@@ -23,6 +23,11 @@ export interface CommandLineOptions {
23
23
  promptSuffix?: string;
24
24
  }
25
25
 
26
+ export interface ProviderConfig {
27
+ id: ProviderId;
28
+ config?: any;
29
+ }
30
+
26
31
  export interface ApiProvider {
27
32
  id: () => string;
28
33
  callApi: (prompt: string) => Promise<ProviderResponse>;
@@ -187,13 +192,17 @@ export interface TestSuite {
187
192
  defaultTest?: Partial<TestCase>;
188
193
  }
189
194
 
195
+ export type ProviderId = string;
196
+
197
+ export type RawProviderConfig = Record<ProviderId, Omit<ProviderConfig, 'id'>>;
198
+
190
199
  // TestSuiteConfig = Test Suite, but before everything is parsed and resolved. Providers are just strings, prompts are filepaths, tests can be filepath or inline.
191
200
  export interface TestSuiteConfig {
192
201
  // Optional description of what your LLM is trying to do
193
202
  description?: string;
194
203
 
195
204
  // One or more LLM APIs to use, for example: openai:gpt-3.5-turbo, openai:gpt-4, localai:chat:vicuna
196
- providers: string | string[];
205
+ providers: ProviderId | ProviderId[] | RawProviderConfig[];
197
206
 
198
207
  // One or more prompt files to load
199
208
  prompts: string | string[];
package/src/updates.ts ADDED
@@ -0,0 +1,37 @@
1
+ import chalk from 'chalk';
2
+ import semverGt from 'semver/functions/gt';
3
+
4
+ import logger from './logger';
5
+ import { fetchWithTimeout } from './util';
6
+ import packageJson from '../package.json';
7
+
8
+ const VERSION = packageJson.version;
9
+
10
+ export async function getLatestVersion(packageName: string) {
11
+ const response = await fetchWithTimeout(`https://registry.npmjs.org/${packageName}`, {}, 1000);
12
+ if (!response.ok) {
13
+ throw new Error(`Failed to fetch package information for ${packageName}`);
14
+ }
15
+ const data = await response.json();
16
+ return data['dist-tags'].latest;
17
+ }
18
+
19
+ export async function checkForUpdates(): Promise<boolean> {
20
+ const latestVersion = await getLatestVersion('promptfoo');
21
+ if (semverGt(latestVersion, VERSION)) {
22
+ const border = '='.repeat(process.stdout.columns - 10);
23
+ logger.info(
24
+ `\n${border}
25
+ ${chalk.yellow('⚠️')} The current version of promptfoo ${chalk.yellow(
26
+ VERSION,
27
+ )} is lower than the latest available version ${chalk.green(latestVersion)}.
28
+
29
+ Please run ${chalk.green('npx promptfoo@latest')} or ${chalk.green(
30
+ 'npm install -g promptfoo@latest',
31
+ )} to update.
32
+ ${border}\n`,
33
+ );
34
+ return true;
35
+ }
36
+ return false;
37
+ }
package/src/util.ts CHANGED
@@ -16,7 +16,15 @@ import { getDirectory } from './esm';
16
16
 
17
17
  import type { RequestInfo, RequestInit, Response } from 'node-fetch';
18
18
 
19
- import type { Assertion, CsvRow, EvaluateSummary, UnifiedConfig, TestCase, Prompt } from './types';
19
+ import type {
20
+ Assertion,
21
+ CsvRow,
22
+ EvaluateSummary,
23
+ UnifiedConfig,
24
+ TestCase,
25
+ Prompt,
26
+ TestSuite,
27
+ } from './types';
20
28
  import { assertionFromString } from './assertions';
21
29
 
22
30
  const PROMPT_DELIMITER = '---';
@@ -88,11 +96,22 @@ export function readPrompts(
88
96
  promptContents.push(...fileContents.map((content) => ({ raw: content, display: content })));
89
97
  } else {
90
98
  const fileContent = fs.readFileSync(promptPath, 'utf-8');
91
- let display;
99
+
100
+ let display: string | undefined;
92
101
  if (inputType === PromptInputType.NAMED) {
93
102
  display = (promptPathOrGlobs as Record<string, string>)[promptPath];
94
103
  } else {
95
104
  display = fileContent.length > 200 ? promptPath : fileContent;
105
+
106
+ const ext = path.parse(promptPath).ext;
107
+ if (ext === '.jsonl') {
108
+ // Special case for JSONL file
109
+ const jsonLines = fileContent.split(/\r?\n/).filter((line) => line.length > 0);
110
+ for (const json of jsonLines) {
111
+ promptContents.push({ raw: json, display: json });
112
+ }
113
+ continue;
114
+ }
96
115
  }
97
116
  promptContents.push({ raw: fileContent, display });
98
117
  }
@@ -238,11 +257,22 @@ export function getLatestResultsPath(): string {
238
257
  return path.join(getConfigDirectoryPath(), 'output', 'latest.json');
239
258
  }
240
259
 
241
- export function writeLatestResults(results: EvaluateSummary) {
260
+ export function writeLatestResults(results: EvaluateSummary, config: Partial<UnifiedConfig>) {
242
261
  const latestResultsPath = getLatestResultsPath();
243
262
  try {
244
263
  fs.mkdirSync(path.dirname(latestResultsPath), { recursive: true });
245
- fs.writeFileSync(latestResultsPath, JSON.stringify(results, null, 2));
264
+ fs.writeFileSync(
265
+ latestResultsPath,
266
+ JSON.stringify(
267
+ {
268
+ version: 1,
269
+ config,
270
+ results,
271
+ },
272
+ null,
273
+ 2,
274
+ ),
275
+ );
246
276
  } catch (err) {
247
277
  logger.error(`Failed to write latest results to ${latestResultsPath}:\n${err}`);
248
278
  }
@@ -15,6 +15,7 @@
15
15
  "@mui/icons-material": "^5.11.16",
16
16
  "@mui/material": "^5.13.0",
17
17
  "@tanstack/react-table": "^8.9.1",
18
+ "js-yaml": "^4.1.0",
18
19
  "react": "^18.2.0",
19
20
  "react-dnd": "^16.0.1",
20
21
  "react-dnd-html5-backend": "^16.0.1",
@@ -11,8 +11,9 @@ import { useStore } from './store.js';
11
11
  import './App.css';
12
12
 
13
13
  function App() {
14
- const { table, setTable } = useStore();
14
+ const { table, setTable, setConfig } = useStore();
15
15
  const [loaded, setLoaded] = React.useState<boolean>(false);
16
+ const loadedFromApi = React.useRef(false);
16
17
 
17
18
  const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
18
19
  const [darkMode, setDarkMode] = React.useState(prefersDarkMode);
@@ -37,24 +38,47 @@ function App() {
37
38
  };
38
39
 
39
40
  React.useEffect(() => {
40
- //const socket = SocketIOClient(`http://${window.location.host}`);
41
+ const fetchEvalData = async (id: string) => {
42
+ if (loadedFromApi.current) {
43
+ return;
44
+ }
45
+ loadedFromApi.current = true;
46
+ const response = await fetch(`https://api.promptfoo.dev/eval/${id}`);
47
+ const body = await response.json();
48
+ setTable(
49
+ body.data.results?.table ||
50
+ // Backwards compatibility with <= 0.12.0
51
+ body.data.table,
52
+ );
53
+ setConfig(body.data.config);
54
+ setLoaded(true);
55
+ };
56
+
41
57
  const socket = SocketIOClient(`http://localhost:15500`);
42
58
 
43
- socket.on('init', (data) => {
44
- console.log('Initialized socket connection');
45
- setLoaded(true);
46
- setTable(data.table);
47
- });
59
+ const pathMatch = window.location.pathname.match(/\/eval\/([\w:-]+)/);
60
+ if (pathMatch) {
61
+ const id = pathMatch[1];
62
+ fetchEvalData(id);
63
+ } else {
64
+ socket.on('init', (data) => {
65
+ console.log('Initialized socket connection', data);
66
+ setLoaded(true);
67
+ setTable(data.results.table);
68
+ setConfig(data.config);
69
+ });
48
70
 
49
- socket.on('update', (data) => {
50
- console.log('Received data update');
51
- setTable(data.table);
52
- });
71
+ socket.on('update', (data) => {
72
+ console.log('Received data update', data);
73
+ setTable(data.results.table);
74
+ setConfig(data.config);
75
+ });
76
+ }
53
77
 
54
78
  return () => {
55
79
  socket.disconnect();
56
80
  };
57
- }, [loaded, setTable]);
81
+ }, [setTable, setConfig]);
58
82
 
59
83
  return (
60
84
  <ThemeProvider theme={theme}>