promptfoo 0.10.0 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/README.md +45 -35
  2. package/dist/package.json +87 -0
  3. package/dist/src/__mocks__/esm.d.ts.map +1 -0
  4. package/dist/src/__mocks__/esm.js.map +1 -0
  5. package/dist/{assertions.d.ts → src/assertions.d.ts} +1 -1
  6. package/dist/src/assertions.d.ts.map +1 -0
  7. package/dist/src/assertions.js +374 -0
  8. package/dist/src/assertions.js.map +1 -0
  9. package/dist/src/cache.d.ts.map +1 -0
  10. package/dist/src/cache.js.map +1 -0
  11. package/dist/src/esm.d.ts.map +1 -0
  12. package/dist/src/esm.js.map +1 -0
  13. package/dist/src/evaluator.d.ts.map +1 -0
  14. package/dist/{evaluator.js → src/evaluator.js} +3 -1
  15. package/dist/src/evaluator.js.map +1 -0
  16. package/dist/src/index.d.ts.map +1 -0
  17. package/dist/{index.js → src/index.js} +10 -7
  18. package/dist/src/index.js.map +1 -0
  19. package/dist/src/logger.d.ts.map +1 -0
  20. package/dist/src/logger.js.map +1 -0
  21. package/dist/src/main.d.ts.map +1 -0
  22. package/dist/{main.js → src/main.js} +35 -13
  23. package/dist/src/main.js.map +1 -0
  24. package/dist/src/onboarding.d.ts.map +1 -0
  25. package/dist/src/onboarding.js.map +1 -0
  26. package/dist/src/prompts.d.ts.map +1 -0
  27. package/dist/src/prompts.js.map +1 -0
  28. package/dist/src/providers/localai.d.ts.map +1 -0
  29. package/dist/src/providers/localai.js.map +1 -0
  30. package/dist/src/providers/openai.d.ts.map +1 -0
  31. package/dist/src/providers/openai.js.map +1 -0
  32. package/dist/src/providers/shared.d.ts.map +1 -0
  33. package/dist/src/providers/shared.js.map +1 -0
  34. package/dist/src/providers.d.ts.map +1 -0
  35. package/dist/src/providers.js.map +1 -0
  36. package/dist/src/suggestions.d.ts.map +1 -0
  37. package/dist/src/suggestions.js.map +1 -0
  38. package/dist/src/telemetry.d.ts +10 -0
  39. package/dist/src/telemetry.d.ts.map +1 -0
  40. package/dist/src/telemetry.js +48 -0
  41. package/dist/src/telemetry.js.map +1 -0
  42. package/dist/{types.d.ts → src/types.d.ts} +6 -2
  43. package/dist/src/types.d.ts.map +1 -0
  44. package/dist/src/types.js.map +1 -0
  45. package/dist/src/updates.d.ts +3 -0
  46. package/dist/src/updates.d.ts.map +1 -0
  47. package/dist/src/updates.js +36 -0
  48. package/dist/src/updates.js.map +1 -0
  49. package/dist/{util.d.ts → src/util.d.ts} +3 -3
  50. package/dist/src/util.d.ts.map +1 -0
  51. package/dist/{util.js → src/util.js} +12 -5
  52. package/dist/src/util.js.map +1 -0
  53. package/dist/src/web/client/assets/index-87905193.css +1 -0
  54. package/dist/src/web/client/assets/index-eb6d3769.js +199 -0
  55. package/dist/src/web/client/assets/js-yaml-8bbf9398.js +32 -0
  56. package/dist/{web → src/web}/client/index.html +2 -2
  57. package/dist/src/web/server.d.ts.map +1 -0
  58. package/dist/{web → src/web}/server.js +3 -4
  59. package/dist/src/web/server.js.map +1 -0
  60. package/package.json +13 -9
  61. package/src/assertions.ts +247 -41
  62. package/src/evaluator.ts +5 -2
  63. package/src/index.ts +7 -4
  64. package/src/main.ts +50 -13
  65. package/src/telemetry.ts +57 -0
  66. package/src/types.ts +23 -2
  67. package/src/updates.ts +37 -0
  68. package/src/util.ts +28 -6
  69. package/src/web/client/package-lock.json +3 -6
  70. package/src/web/client/package.json +1 -0
  71. package/src/web/client/src/App.tsx +32 -12
  72. package/src/web/client/src/ConfigModal.tsx +81 -0
  73. package/src/web/client/src/ResultsTable.css +18 -6
  74. package/src/web/client/src/ResultsTable.tsx +101 -35
  75. package/src/web/client/src/ResultsView.tsx +148 -12
  76. package/src/web/client/src/ShareModal.tsx +70 -0
  77. package/src/web/client/src/index.css +6 -0
  78. package/src/web/client/src/store.ts +6 -1
  79. package/src/web/client/src/types.ts +4 -0
  80. package/src/web/server.ts +3 -7
  81. package/dist/__mocks__/esm.d.ts.map +0 -1
  82. package/dist/__mocks__/esm.js.map +0 -1
  83. package/dist/assertions.d.ts.map +0 -1
  84. package/dist/assertions.js +0 -233
  85. package/dist/assertions.js.map +0 -1
  86. package/dist/cache.d.ts.map +0 -1
  87. package/dist/cache.js.map +0 -1
  88. package/dist/esm.d.ts.map +0 -1
  89. package/dist/esm.js.map +0 -1
  90. package/dist/evaluator.d.ts.map +0 -1
  91. package/dist/evaluator.js.map +0 -1
  92. package/dist/index.d.ts.map +0 -1
  93. package/dist/index.js.map +0 -1
  94. package/dist/logger.d.ts.map +0 -1
  95. package/dist/logger.js.map +0 -1
  96. package/dist/main.d.ts.map +0 -1
  97. package/dist/main.js.map +0 -1
  98. package/dist/onboarding.d.ts.map +0 -1
  99. package/dist/onboarding.js.map +0 -1
  100. package/dist/prompts.d.ts.map +0 -1
  101. package/dist/prompts.js.map +0 -1
  102. package/dist/providers/localai.d.ts.map +0 -1
  103. package/dist/providers/localai.js.map +0 -1
  104. package/dist/providers/openai.d.ts.map +0 -1
  105. package/dist/providers/openai.js.map +0 -1
  106. package/dist/providers/shared.d.ts.map +0 -1
  107. package/dist/providers/shared.js.map +0 -1
  108. package/dist/providers.d.ts.map +0 -1
  109. package/dist/providers.js.map +0 -1
  110. package/dist/suggestions.d.ts.map +0 -1
  111. package/dist/suggestions.js.map +0 -1
  112. package/dist/types.d.ts.map +0 -1
  113. package/dist/types.js.map +0 -1
  114. package/dist/util.d.ts.map +0 -1
  115. package/dist/util.js.map +0 -1
  116. package/dist/web/client/assets/index-9a9ba400.css +0 -1
  117. package/dist/web/client/assets/index-b72d3ca9.js +0 -172
  118. package/dist/web/server.d.ts.map +0 -1
  119. package/dist/web/server.js.map +0 -1
  120. /package/dist/{__mocks__ → src/__mocks__}/esm.d.ts +0 -0
  121. /package/dist/{__mocks__ → src/__mocks__}/esm.js +0 -0
  122. /package/dist/{cache.d.ts → src/cache.d.ts} +0 -0
  123. /package/dist/{cache.js → src/cache.js} +0 -0
  124. /package/dist/{esm.d.ts → src/esm.d.ts} +0 -0
  125. /package/dist/{esm.js → src/esm.js} +0 -0
  126. /package/dist/{evaluator.d.ts → src/evaluator.d.ts} +0 -0
  127. /package/dist/{index.d.ts → src/index.d.ts} +0 -0
  128. /package/dist/{logger.d.ts → src/logger.d.ts} +0 -0
  129. /package/dist/{logger.js → src/logger.js} +0 -0
  130. /package/dist/{main.d.ts → src/main.d.ts} +0 -0
  131. /package/dist/{onboarding.d.ts → src/onboarding.d.ts} +0 -0
  132. /package/dist/{onboarding.js → src/onboarding.js} +0 -0
  133. /package/dist/{prompts.d.ts → src/prompts.d.ts} +0 -0
  134. /package/dist/{prompts.js → src/prompts.js} +0 -0
  135. /package/dist/{providers → src/providers}/localai.d.ts +0 -0
  136. /package/dist/{providers → src/providers}/localai.js +0 -0
  137. /package/dist/{providers → src/providers}/openai.d.ts +0 -0
  138. /package/dist/{providers → src/providers}/openai.js +0 -0
  139. /package/dist/{providers → src/providers}/shared.d.ts +0 -0
  140. /package/dist/{providers → src/providers}/shared.js +0 -0
  141. /package/dist/{providers.d.ts → src/providers.d.ts} +0 -0
  142. /package/dist/{providers.js → src/providers.js} +0 -0
  143. /package/dist/{suggestions.d.ts → src/suggestions.d.ts} +0 -0
  144. /package/dist/{suggestions.js → src/suggestions.js} +0 -0
  145. /package/dist/{types.js → src/types.js} +0 -0
  146. /package/dist/{web → src/web}/client/favicon.ico +0 -0
  147. /package/dist/{web → src/web}/client/logo.svg +0 -0
  148. /package/dist/{web → src/web}/server.d.ts +0 -0
package/src/index.ts CHANGED
@@ -1,10 +1,11 @@
1
- import { evaluate as doEvaluate } from './evaluator';
2
- import { loadApiProviders } from './providers';
3
1
  import assertions from './assertions';
4
2
  import providers from './providers';
3
+ import telemetry from './telemetry';
4
+ import { evaluate as doEvaluate } from './evaluator';
5
+ import { loadApiProviders } from './providers';
6
+ import { readTests } from './util';
5
7
 
6
8
  import type { EvaluateOptions, TestSuite, TestSuiteConfig } from './types';
7
- import { readTests } from './util';
8
9
 
9
10
  export * from './types';
10
11
 
@@ -24,7 +25,9 @@ async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions =
24
25
  display: promptContent,
25
26
  })),
26
27
  };
27
- return doEvaluate(constructedTestSuite, options);
28
+ const ret = await doEvaluate(constructedTestSuite, options);
29
+ await telemetry.send();
30
+ return ret;
28
31
  }
29
32
 
30
33
  module.exports = {
package/src/main.ts CHANGED
@@ -6,6 +6,7 @@ import Table from 'cli-table3';
6
6
  import chalk from 'chalk';
7
7
  import { Command } from 'commander';
8
8
 
9
+ import telemetry from './telemetry';
9
10
  import logger, { setLogLevel } from './logger';
10
11
  import { loadApiProvider, loadApiProviders } from './providers';
11
12
  import { evaluate } from './evaluator';
@@ -17,9 +18,11 @@ import {
17
18
  writeLatestResults,
18
19
  writeOutput,
19
20
  } from './util';
21
+ import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
22
+ import { disableCache } from './cache';
20
23
  import { getDirectory } from './esm';
21
24
  import { init } from './web/server';
22
- import { disableCache } from './cache';
25
+ import { checkForUpdates } from './updates';
23
26
 
24
27
  import type {
25
28
  CommandLineOptions,
@@ -28,7 +31,6 @@ import type {
28
31
  TestSuite,
29
32
  UnifiedConfig,
30
33
  } from './types';
31
- import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
32
34
 
33
35
  function createDummyFiles(directory: string | null) {
34
36
  if (directory) {
@@ -60,6 +62,8 @@ function createDummyFiles(directory: string | null) {
60
62
  }
61
63
 
62
64
  async function main() {
65
+ await checkForUpdates();
66
+
63
67
  const pwd = process.cwd();
64
68
  const potentialPaths = [
65
69
  pathJoin(pwd, 'promptfooconfig.js'),
@@ -68,7 +72,7 @@ async function main() {
68
72
  ];
69
73
  let config: Partial<UnifiedConfig> = {};
70
74
  for (const path of potentialPaths) {
71
- const maybeConfig = maybeReadConfig(path);
75
+ const maybeConfig = await maybeReadConfig(path);
72
76
  if (maybeConfig) {
73
77
  config = maybeConfig;
74
78
  break;
@@ -95,15 +99,23 @@ async function main() {
95
99
  program
96
100
  .command('init [directory]')
97
101
  .description('Initialize project with dummy files')
98
- .action((directory: string | null) => {
102
+ .action(async (directory: string | null) => {
99
103
  createDummyFiles(directory);
104
+ telemetry.record('command_used', {
105
+ name: 'init',
106
+ });
107
+ await telemetry.send();
100
108
  });
101
109
 
102
110
  program
103
111
  .command('view')
104
112
  .description('Start browser ui')
105
113
  .option('-p, --port <number>', 'Port number', '15500')
106
- .action((cmdObj: { port: number } & Command) => {
114
+ .action(async (cmdObj: { port: number } & Command) => {
115
+ telemetry.record('command_used', {
116
+ name: 'view',
117
+ });
118
+ await telemetry.send();
107
119
  init(cmdObj.port);
108
120
  });
109
121
 
@@ -154,8 +166,16 @@ async function main() {
154
166
  'This suffix is append to every prompt',
155
167
  config.defaultTest?.options?.suffix,
156
168
  )
157
- .option('--no-write', 'Do not write results to promptfoo directory')
158
- .option('--no-cache', 'Do not read or write results to disk cache')
169
+ .option(
170
+ '--no-write',
171
+ 'Do not write results to promptfoo directory',
172
+ config?.commandLineOptions?.write,
173
+ )
174
+ .option(
175
+ '--no-cache',
176
+ 'Do not read or write results to disk cache',
177
+ config?.commandLineOptions?.cache,
178
+ )
159
179
  .option('--grader', 'Model that will grade outputs', config?.commandLineOptions?.grader)
160
180
  .option('--verbose', 'Show debug logs', config?.commandLineOptions?.verbose)
161
181
  .option('--view [port]', 'View in browser ui')
@@ -172,7 +192,7 @@ async function main() {
172
192
  const maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
173
193
  const configPath = cmdObj.config;
174
194
  if (configPath) {
175
- config = readConfig(configPath);
195
+ config = await readConfig(configPath);
176
196
  } else {
177
197
  config = {
178
198
  prompts: cmdObj.prompts || config.prompts,
@@ -256,8 +276,9 @@ async function main() {
256
276
  },
257
277
  });
258
278
  // Skip first row (header) and add the rest. Color PASS/FAIL
259
- for (const row of summary.table.body) {
279
+ for (const row of summary.table.body.slice(0, 25)) {
260
280
  table.push([
281
+ ...row.vars,
261
282
  ...row.outputs.map((col) => {
262
283
  const tableCellMaxLength = parseInt(cmdObj.tableCellMaxLength || '', 10);
263
284
  if (!isNaN(tableCellMaxLength) && col.length > tableCellMaxLength) {
@@ -275,18 +296,29 @@ async function main() {
275
296
  }
276
297
  return col;
277
298
  }),
278
- ...row.vars,
279
299
  ]);
280
300
  }
281
301
 
282
302
  logger.info('\n' + table.toString());
303
+ if (summary.table.body.length > 25) {
304
+ const rowsLeft = summary.table.body.length - 25;
305
+ logger.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
306
+ }
283
307
  }
308
+
309
+ const border = '='.repeat(process.stdout.columns - 10);
310
+ logger.info(border);
284
311
  if (cmdObj.view || !cmdObj.write) {
285
- logger.info('Evaluation complete');
312
+ logger.info(`${chalk.green('✔')} Evaluation complete`);
286
313
  } else {
287
- writeLatestResults(summary);
288
- logger.info(`Evaluation complete. To use web viewer, run ${chalk.green('promptfoo view')}`);
314
+ writeLatestResults(summary, config);
315
+ logger.info(
316
+ `${chalk.green('✔')} Evaluation complete. To use web viewer, run ${chalk.green(
317
+ 'promptfoo view',
318
+ )}`,
319
+ );
289
320
  }
321
+ logger.info(border);
290
322
  logger.info(chalk.green.bold(`Successes: ${summary.stats.successes}`));
291
323
  logger.info(chalk.red.bold(`Failures: ${summary.stats.failures}`));
292
324
  logger.info(
@@ -294,6 +326,11 @@ async function main() {
294
326
  );
295
327
  logger.info('Done.');
296
328
 
329
+ telemetry.record('command_used', {
330
+ name: 'eval',
331
+ });
332
+ await telemetry.send();
333
+
297
334
  if (cmdObj.view) {
298
335
  init(parseInt(cmdObj.view, 10) || 15500);
299
336
  }
@@ -0,0 +1,57 @@
1
+ import packageJson from '../package.json';
2
+ import { fetchWithTimeout } from './util';
3
+
4
+ type TelemetryEvent = {
5
+ event: string;
6
+ packageVersion: string;
7
+ properties: Record<string, string | number>;
8
+ };
9
+
10
+ type TelemetryEventTypes = 'eval_ran' | 'assertion_used' | 'command_used';
11
+
12
+ const TELEMETRY_ENDPOINT = 'https://api.promptfoo.dev/telemetry';
13
+
14
+ const TELEMETRY_TIMEOUT_MS = 1000;
15
+
16
+ export class Telemetry {
17
+ private events: TelemetryEvent[] = [];
18
+
19
+ get disabled() {
20
+ return process.env.PROMPTFOO_DISABLE_TELEMETRY === '1';
21
+ }
22
+
23
+ record(eventName: TelemetryEventTypes, properties: Record<string, string | number>): void {
24
+ if (!this.disabled) {
25
+ this.events.push({
26
+ event: eventName,
27
+ packageVersion: packageJson.version,
28
+ properties,
29
+ });
30
+ }
31
+ }
32
+
33
+ async send(): Promise<void> {
34
+ if (!this.disabled && this.events.length > 0) {
35
+ try {
36
+ const response = await fetchWithTimeout(
37
+ TELEMETRY_ENDPOINT,
38
+ {
39
+ method: 'POST',
40
+ headers: {
41
+ 'Content-Type': 'application/json',
42
+ },
43
+ body: JSON.stringify(this.events),
44
+ },
45
+ TELEMETRY_TIMEOUT_MS,
46
+ );
47
+
48
+ if (response.ok) {
49
+ this.events = [];
50
+ }
51
+ } catch (err) {}
52
+ }
53
+ }
54
+ }
55
+
56
+ const telemetry = new Telemetry();
57
+ export default telemetry;
package/src/types.ts CHANGED
@@ -113,13 +113,34 @@ export interface GradingResult {
113
113
  tokensUsed?: TokenUsage;
114
114
  }
115
115
 
116
+ type BaseAssertionTypes =
117
+ | 'equals'
118
+ | 'contains'
119
+ | 'icontains'
120
+ | 'contains-all'
121
+ | 'contains-any'
122
+ | 'regex'
123
+ | 'is-json'
124
+ | 'contains-json'
125
+ | 'javascript'
126
+ | 'similar'
127
+ | 'llm-rubric'
128
+ | 'webhook'
129
+ | 'rouge-n'
130
+ | 'rouge-s'
131
+ | 'rouge-l';
132
+
133
+ type NotPrefixed<T extends string> = `not-${T}`;
134
+
135
+ export type AssertionType = BaseAssertionTypes | NotPrefixed<BaseAssertionTypes>;
136
+
116
137
  // TODO(ian): maybe Assertion should support {type: config} to make the yaml cleaner
117
138
  export interface Assertion {
118
139
  // Type of assertion
119
- type: 'equals' | 'is-json' | 'contains-json' | 'javascript' | 'similar' | 'llm-rubric';
140
+ type: AssertionType;
120
141
 
121
142
  // The expected value, if applicable
122
- value?: string;
143
+ value?: string | string[];
123
144
 
124
145
  // The threshold value, only applicable for similarity (cosine distance)
125
146
  threshold?: number;
package/src/updates.ts ADDED
@@ -0,0 +1,37 @@
1
+ import chalk from 'chalk';
2
+ import semverGt from 'semver/functions/gt';
3
+
4
+ import logger from './logger';
5
+ import { fetchWithTimeout } from './util';
6
+ import packageJson from '../package.json';
7
+
8
+ const VERSION = packageJson.version;
9
+
10
+ export async function getLatestVersion(packageName: string) {
11
+ const response = await fetchWithTimeout(`https://registry.npmjs.org/${packageName}`, {}, 1000);
12
+ if (!response.ok) {
13
+ throw new Error(`Failed to fetch package information for ${packageName}`);
14
+ }
15
+ const data = await response.json();
16
+ return data['dist-tags'].latest;
17
+ }
18
+
19
+ export async function checkForUpdates(): Promise<boolean> {
20
+ const latestVersion = await getLatestVersion('promptfoo');
21
+ if (semverGt(latestVersion, VERSION)) {
22
+ const border = '='.repeat(process.stdout.columns - 10);
23
+ logger.info(
24
+ `\n${border}
25
+ ${chalk.yellow('⚠️')} The current version of promptfoo ${chalk.yellow(
26
+ VERSION,
27
+ )} is lower than the latest available version ${chalk.green(latestVersion)}.
28
+
29
+ Please run ${chalk.green('npx promptfoo@latest')} or ${chalk.green(
30
+ 'npm install -g promptfoo@latest',
31
+ )} to update.
32
+ ${border}\n`,
33
+ );
34
+ return true;
35
+ }
36
+ return false;
37
+ }
package/src/util.ts CHANGED
@@ -2,6 +2,7 @@ import * as fs from 'fs';
2
2
  import * as path from 'node:path';
3
3
  import * as os from 'node:os';
4
4
 
5
+ import $RefParser from '@apidevtools/json-schema-ref-parser';
5
6
  import fetch from 'node-fetch';
6
7
  import yaml from 'js-yaml';
7
8
  import nunjucks from 'nunjucks';
@@ -15,7 +16,15 @@ import { getDirectory } from './esm';
15
16
 
16
17
  import type { RequestInfo, RequestInit, Response } from 'node-fetch';
17
18
 
18
- import type { Assertion, CsvRow, EvaluateSummary, UnifiedConfig, TestCase, Prompt } from './types';
19
+ import type {
20
+ Assertion,
21
+ CsvRow,
22
+ EvaluateSummary,
23
+ UnifiedConfig,
24
+ TestCase,
25
+ Prompt,
26
+ TestSuite,
27
+ } from './types';
19
28
  import { assertionFromString } from './assertions';
20
29
 
21
30
  const PROMPT_DELIMITER = '---';
@@ -28,14 +37,14 @@ function parseJson(json: string): any | undefined {
28
37
  }
29
38
  }
30
39
 
31
- export function maybeReadConfig(configPath: string): UnifiedConfig | undefined {
40
+ export async function maybeReadConfig(configPath: string): Promise<UnifiedConfig | undefined> {
32
41
  if (!fs.existsSync(configPath)) {
33
42
  return undefined;
34
43
  }
35
44
  return readConfig(configPath);
36
45
  }
37
46
 
38
- export function readConfig(configPath: string): UnifiedConfig {
47
+ export async function readConfig(configPath: string): Promise<UnifiedConfig> {
39
48
  const ext = path.parse(configPath).ext;
40
49
  switch (ext) {
41
50
  case '.json':
@@ -45,7 +54,9 @@ export function readConfig(configPath: string): UnifiedConfig {
45
54
  return require(configPath) as UnifiedConfig;
46
55
  case '.yaml':
47
56
  case '.yml':
48
- return yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
57
+ let ret = yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
58
+ ret = (await $RefParser.dereference(ret)) as UnifiedConfig;
59
+ return ret;
49
60
  default:
50
61
  throw new Error(`Unsupported configuration file format: ${ext}`);
51
62
  }
@@ -235,11 +246,22 @@ export function getLatestResultsPath(): string {
235
246
  return path.join(getConfigDirectoryPath(), 'output', 'latest.json');
236
247
  }
237
248
 
238
- export function writeLatestResults(results: EvaluateSummary) {
249
+ export function writeLatestResults(results: EvaluateSummary, config: Partial<UnifiedConfig>) {
239
250
  const latestResultsPath = getLatestResultsPath();
240
251
  try {
241
252
  fs.mkdirSync(path.dirname(latestResultsPath), { recursive: true });
242
- fs.writeFileSync(latestResultsPath, JSON.stringify(results, null, 2));
253
+ fs.writeFileSync(
254
+ latestResultsPath,
255
+ JSON.stringify(
256
+ {
257
+ version: 1,
258
+ config,
259
+ results,
260
+ },
261
+ null,
262
+ 2,
263
+ ),
264
+ );
243
265
  } catch (err) {
244
266
  logger.error(`Failed to write latest results to ${latestResultsPath}:\n${err}`);
245
267
  }
@@ -13,6 +13,7 @@
13
13
  "@mui/icons-material": "^5.11.16",
14
14
  "@mui/material": "^5.13.0",
15
15
  "@tanstack/react-table": "^8.9.1",
16
+ "js-yaml": "^4.1.0",
16
17
  "react": "^18.2.0",
17
18
  "react-dnd": "^16.0.1",
18
19
  "react-dnd-html5-backend": "^16.0.1",
@@ -1652,8 +1653,7 @@
1652
1653
  "node_modules/argparse": {
1653
1654
  "version": "2.0.1",
1654
1655
  "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
1655
- "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
1656
- "dev": true
1656
+ "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
1657
1657
  },
1658
1658
  "node_modules/array-union": {
1659
1659
  "version": "2.1.0",
@@ -2512,7 +2512,6 @@
2512
2512
  "version": "4.1.0",
2513
2513
  "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
2514
2514
  "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
2515
- "dev": true,
2516
2515
  "dependencies": {
2517
2516
  "argparse": "^2.0.1"
2518
2517
  },
@@ -4436,8 +4435,7 @@
4436
4435
  "argparse": {
4437
4436
  "version": "2.0.1",
4438
4437
  "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
4439
- "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==",
4440
- "dev": true
4438
+ "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
4441
4439
  },
4442
4440
  "array-union": {
4443
4441
  "version": "2.1.0",
@@ -5102,7 +5100,6 @@
5102
5100
  "version": "4.1.0",
5103
5101
  "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
5104
5102
  "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
5105
- "dev": true,
5106
5103
  "requires": {
5107
5104
  "argparse": "^2.0.1"
5108
5105
  }
@@ -15,6 +15,7 @@
15
15
  "@mui/icons-material": "^5.11.16",
16
16
  "@mui/material": "^5.13.0",
17
17
  "@tanstack/react-table": "^8.9.1",
18
+ "js-yaml": "^4.1.0",
18
19
  "react": "^18.2.0",
19
20
  "react-dnd": "^16.0.1",
20
21
  "react-dnd-html5-backend": "^16.0.1",
@@ -11,8 +11,9 @@ import { useStore } from './store.js';
11
11
  import './App.css';
12
12
 
13
13
  function App() {
14
- const { table, setTable } = useStore();
14
+ const { table, setTable, setConfig } = useStore();
15
15
  const [loaded, setLoaded] = React.useState<boolean>(false);
16
+ const loadedFromApi = React.useRef(false);
16
17
 
17
18
  const prefersDarkMode = useMediaQuery('(prefers-color-scheme: dark)');
18
19
  const [darkMode, setDarkMode] = React.useState(prefersDarkMode);
@@ -37,24 +38,43 @@ function App() {
37
38
  };
38
39
 
39
40
  React.useEffect(() => {
40
- //const socket = SocketIOClient(`http://${window.location.host}`);
41
+ const fetchEvalData = async (id: string) => {
42
+ if (loadedFromApi.current) {
43
+ return;
44
+ }
45
+ loadedFromApi.current = true;
46
+ const response = await fetch(`https://api.promptfoo.dev/eval/${id}`);
47
+ const body = await response.json();
48
+ setTable(body.data.results.table);
49
+ setConfig(body.data.config);
50
+ setLoaded(true);
51
+ };
52
+
41
53
  const socket = SocketIOClient(`http://localhost:15500`);
42
54
 
43
- socket.on('init', (data) => {
44
- console.log('Initialized socket connection');
45
- setLoaded(true);
46
- setTable(data.table);
47
- });
55
+ const pathMatch = window.location.pathname.match(/\/eval\/([\w:-]+)/);
56
+ if (pathMatch) {
57
+ const id = pathMatch[1];
58
+ fetchEvalData(id);
59
+ } else {
60
+ socket.on('init', (data) => {
61
+ console.log('Initialized socket connection', data);
62
+ setLoaded(true);
63
+ setTable(data.results.table);
64
+ setConfig(data.config);
65
+ });
48
66
 
49
- socket.on('update', (data) => {
50
- console.log('Received data update');
51
- setTable(data.table);
52
- });
67
+ socket.on('update', (data) => {
68
+ console.log('Received data update', data);
69
+ setTable(data.results.table);
70
+ setConfig(data.config);
71
+ });
72
+ }
53
73
 
54
74
  return () => {
55
75
  socket.disconnect();
56
76
  };
57
- }, [loaded, setTable]);
77
+ }, [setTable, setConfig]);
58
78
 
59
79
  return (
60
80
  <ThemeProvider theme={theme}>
@@ -0,0 +1,81 @@
1
+ import React from 'react';
2
+ import Dialog from '@mui/material/Dialog';
3
+ import DialogTitle from '@mui/material/DialogTitle';
4
+ import DialogContent from '@mui/material/DialogContent';
5
+ import DialogActions from '@mui/material/DialogActions';
6
+ import Button from '@mui/material/Button';
7
+ import Typography from '@mui/material/Typography';
8
+ import { useStore } from './store';
9
+ import { IconButton, Box } from '@mui/material';
10
+ import { FileCopy, Check } from '@mui/icons-material';
11
+
12
+ interface ConfigModalProps {
13
+ open: boolean;
14
+ onClose: () => void;
15
+ }
16
+
17
+ export default function ConfigModal({ open, onClose }: ConfigModalProps) {
18
+ const { config } = useStore();
19
+ const textareaRef = React.useRef<HTMLTextAreaElement>(null);
20
+ const [copied, setCopied] = React.useState(false);
21
+ const [yamlConfig, setYamlConfig] = React.useState('');
22
+
23
+ React.useEffect(() => {
24
+ if (open) {
25
+ (async () => {
26
+ const { default: yaml } = await import('js-yaml');
27
+ setYamlConfig(yaml.dump(config));
28
+ })();
29
+ }
30
+ }, [open, config]);
31
+
32
+ const handleCopyClick = () => {
33
+ if (textareaRef.current) {
34
+ textareaRef.current.select();
35
+ document.execCommand('copy');
36
+ setCopied(true);
37
+ }
38
+ };
39
+
40
+ const handleClose = () => {
41
+ setCopied(false);
42
+ onClose();
43
+ };
44
+
45
+ return (
46
+ <Dialog
47
+ open={open}
48
+ onClose={handleClose}
49
+ aria-labelledby="config-dialog-title"
50
+ maxWidth="md"
51
+ fullWidth
52
+ >
53
+ <DialogTitle id="config-dialog-title">
54
+ <Box display="flex" justifyContent="space-between" alignItems="center">
55
+ <Typography variant="h6">Config</Typography>
56
+ <IconButton onClick={handleCopyClick}>{copied ? <Check /> : <FileCopy />}</IconButton>
57
+ </Box>
58
+ </DialogTitle>
59
+ <DialogContent>
60
+ <Typography variant="body1" component="div">
61
+ <textarea
62
+ ref={textareaRef}
63
+ readOnly
64
+ value={yamlConfig}
65
+ style={{
66
+ width: '100%',
67
+ minHeight: '400px',
68
+ fontFamily: 'monospace',
69
+ border: '1px solid #ccc',
70
+ }}
71
+ />
72
+ </Typography>
73
+ </DialogContent>
74
+ <DialogActions>
75
+ <Button onClick={handleClose} color="primary">
76
+ Close
77
+ </Button>
78
+ </DialogActions>
79
+ </Dialog>
80
+ );
81
+ }
@@ -40,7 +40,6 @@ td,
40
40
  .td {
41
41
  position: relative;
42
42
  box-shadow: inset 0 0 0 1px var(--border-color);
43
- word-break: break-all;
44
43
  vertical-align: top;
45
44
 
46
45
  padding: 1.5rem;
@@ -50,11 +49,11 @@ th.variable,
50
49
  .th.variable,
51
50
  td.variable,
52
51
  .td.variable {
53
- background-color: #f8fbff;
52
+ background-color: var(--variable-background-color);
54
53
  }
55
54
 
56
55
  tr.header {
57
- background-color: #fffdf7;
56
+ background-color: var(--header-background-color);
58
57
  }
59
58
 
60
59
  th,
@@ -62,7 +61,7 @@ th,
62
61
  padding: 1rem;
63
62
  position: relative;
64
63
  text-align: center;
65
- font-weight: semi-bold;
64
+ vertical-align: bottom;
66
65
  }
67
66
 
68
67
  tr .cell {
@@ -72,7 +71,7 @@ tr .cell-rating {
72
71
  visibility: hidden;
73
72
  position: absolute;
74
73
  bottom: 1.25rem;
75
- right: -1rem;
74
+ right: 0;
76
75
  line-height: 0;
77
76
  font-size: 1.75rem;
78
77
  }
@@ -83,7 +82,10 @@ tr:hover .cell-rating {
83
82
 
84
83
  tr .cell-rating .rating {
85
84
  cursor: pointer;
86
- margin-right: 1rem;
85
+ }
86
+
87
+ tr .cell-rating .rating:first-child {
88
+ margin-right: 0.5rem;
87
89
  }
88
90
 
89
91
  th .smalltext {
@@ -97,6 +99,16 @@ th:hover .smalltext {
97
99
  visibility: visible;
98
100
  }
99
101
 
102
+ th .summary {
103
+ font-weight: normal;
104
+ font-size: 0.8rem;
105
+ padding: 0.25rem;
106
+ }
107
+
108
+ th .summary.highlight {
109
+ background-color: var(--success-background-color);
110
+ }
111
+
100
112
  td,
101
113
  .td {
102
114
  }