promptfoo 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ :root{font-family:system-ui,Avenir,Helvetica,Arial,sans-serif;font-synthesis:none;text-rendering:optimizeLegibility;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale;-webkit-text-size-adjust:100%;--background-color: #ffffff;--text-color: #404040;--border-color: lightgray;--table-border-color: lightgray;--pass-color: green;--fail-color: #ad0000;--smalltext-color: gray;--success-background-color: #d1ffd7;--variable-background-color: #f7f7f7;--header-background-color: #fffdf7}@media (prefers-color-scheme: dark){:root{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888}}[data-theme=dark]{--background-color: #1a1a1a;--text-color: #f0f0f0;--border-color: #444444;--table-border-color: #444444;--pass-color: #4caf50;--fail-color: #f44336;--smalltext-color: #888888;--success-background-color: #216d2b;--variable-background-color: #333;--header-background-color: #333}html{font-size:calc(14px + (18 - 14) * ((100vw - 300px) / (1600 - 300)))}*{box-sizing:border-box}html{font-family:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji",Segoe UI Symbol;font-size:16px;background-color:var(--background-color);color:var(--text-color)}table,.divTable{border:1px solid var(--table-border-color);border-collapse:collapse;width:100%;margin:1rem 0;box-shadow:0 2px 4px #0000001a}.tr{display:flex}tr,.tr{width:fit-content}tr:hover,.tr:hover{background-color:#0000000d}th,.th,td,.td{position:relative;box-shadow:inset 0 0 0 1px var(--border-color);vertical-align:top;padding:1.5rem}th.variable,.th.variable,td.variable,.td.variable{background-color:var(--variable-background-color)}tr.header{background-color:var(--header-background-color)}th,.th{padding:1rem;position:relative;text-align:center;vertical-align:bottom}tr .cell-rating{visibility:hidden;position:absolute;bottom:1.25rem;right:0;line-height:0;font-size:1.75rem}tr:hover .cell-rating{visibility:visible}tr .cell-rating .rating{cursor:pointer}tr .cell-rating .rating:first-child{margin-right:.5rem}th .smalltext{visibility:hidden;font-weight:400;font-size:.75rem;color:var(--smalltext-color)}th:hover .smalltext{visibility:visible}th .summary{font-weight:400;font-size:.8rem;padding:.25rem}th .summary.highlight{background-color:var(--success-background-color)}td .status{margin-bottom:.5rem;font-weight:700}td .pass{color:var(--pass-color)}td .fail{color:var(--fail-color)}.first-prompt-col{border-left:2px solid #888}.first-prompt-row{border-top:2px solid #888}.resizer{position:absolute;right:0;top:0;height:100%;width:5px;cursor:col-resize;user-select:none;touch-action:none;background:var(--text-color);opacity:.5}.resizer.isResizing{background:var(--text-color);opacity:1}@media (hover: hover){.resizer{opacity:0}*:hover>.resizer{opacity:1}}.logo{display:flex;align-items:center;gap:4px}.logo img{width:30px}.logo span{margin-bottom:6px;color:var(--text-color)}[data-theme=dark] .logo img{filter:invert(1)}nav{display:flex;justify-content:space-between;align-items:center;margin-bottom:1rem;color:var(--text-color)}.dark-mode-toggle{background-color:transparent;border:none;color:var(--text-color);cursor:pointer;font-size:16px;padding:8px;transition:color .3s}.dark-mode-toggle:hover{color:var(--pass-color)}body{background-color:var(--background-color);color:var(--text-color)}
@@ -5,8 +5,8 @@
5
5
  <link rel="icon" type="image/svg+xml" href="favicon.ico" />
6
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
7
7
  <title>promptfoo web viewer</title>
8
- <script type="module" crossorigin src="/assets/index-b72d3ca9.js"></script>
9
- <link rel="stylesheet" href="/assets/index-9a9ba400.css">
8
+ <script type="module" crossorigin src="/assets/index-15dfcd18.js"></script>
9
+ <link rel="stylesheet" href="/assets/index-87905193.css">
10
10
  </head>
11
11
  <body>
12
12
  <div id="root"></div>
package/package.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "name": "promptfoo",
3
3
  "description": "Prompt engineering toolkit",
4
4
  "author": "Ian Webster",
5
- "version": "0.10.0",
5
+ "version": "0.11.0",
6
6
  "license": "MIT",
7
7
  "type": "commonjs",
8
8
  "main": "dist/index.js",
@@ -59,6 +59,7 @@
59
59
  "typescript": "^5.0.4"
60
60
  },
61
61
  "dependencies": {
62
+ "@apidevtools/json-schema-ref-parser": "^10.1.0",
62
63
  "async": "^3.2.4",
63
64
  "cache-manager": "^4.1.0",
64
65
  "cache-manager-fs-hash": "^1.0.0",
@@ -76,6 +77,7 @@
76
77
  "node-fetch": "^2.6.7",
77
78
  "nunjucks": "^3.2.4",
78
79
  "opener": "^1.5.2",
80
+ "rouge": "^1.0.3",
79
81
  "socket.io": "^4.6.1",
80
82
  "tiny-invariant": "^1.3.1",
81
83
  "winston": "^3.8.2"
package/src/assertions.ts CHANGED
@@ -1,17 +1,49 @@
1
+ import rouge from 'rouge';
1
2
  import invariant from 'tiny-invariant';
2
3
  import nunjucks from 'nunjucks';
3
4
 
4
5
  import { DefaultEmbeddingProvider, DefaultGradingProvider } from './providers/openai';
5
- import { cosineSimilarity } from './util';
6
+ import { cosineSimilarity, fetchWithTimeout } from './util';
6
7
  import { loadApiProvider } from './providers';
7
8
  import { DEFAULT_GRADING_PROMPT } from './prompts';
8
9
 
9
- import type { Assertion, GradingConfig, TestCase, GradingResult, AtomicTestCase } from './types';
10
+ import type {
11
+ Assertion,
12
+ AssertionType,
13
+ GradingConfig,
14
+ GradingResult,
15
+ AtomicTestCase,
16
+ } from './types';
10
17
 
11
18
  const SIMILAR_REGEX = /similar(?::|\((\d+(\.\d+)?)\):)/;
12
19
 
13
20
  const DEFAULT_SEMANTIC_SIMILARITY_THRESHOLD = 0.8;
14
21
 
22
+ function handleRougeScore(
23
+ baseType: 'rouge-n',
24
+ assertion: Assertion,
25
+ expected: string | string[],
26
+ output: string,
27
+ inverted: boolean,
28
+ ): GradingResult {
29
+ const fnName = baseType[baseType.length - 1] as 'n' | 'l' | 's';
30
+ const rougeMethod = rouge[fnName];
31
+ const score = rougeMethod(output, expected);
32
+ console.log(output, expected, score);
33
+ const pass = score >= (assertion.threshold || 0.75) != inverted;
34
+
35
+ return {
36
+ pass,
37
+ reason: pass
38
+ ? `${baseType.toUpperCase()} score ${score} is greater than or equal to threshold ${
39
+ assertion.threshold || 0.75
40
+ }`
41
+ : `${baseType.toUpperCase()} score ${score} is less than threshold ${
42
+ assertion.threshold || 0.75
43
+ }`,
44
+ };
45
+ }
46
+
15
47
  export async function runAssertions(test: AtomicTestCase, output: string): Promise<GradingResult> {
16
48
  const tokensUsed = {
17
49
  total: 0,
@@ -46,7 +78,12 @@ export async function runAssertion(
46
78
  ): Promise<GradingResult> {
47
79
  let pass: boolean = false;
48
80
 
49
- if (assertion.type === 'equals') {
81
+ invariant(assertion.type, `Assertion must have a type: ${JSON.stringify(assertion)}`);
82
+
83
+ const inverse = assertion.type.startsWith('not-');
84
+ const baseType = inverse ? assertion.type.slice(4) : assertion.type;
85
+
86
+ if (baseType === 'equals') {
50
87
  pass = assertion.value === output;
51
88
  return {
52
89
  pass,
@@ -54,52 +91,194 @@ export async function runAssertion(
54
91
  };
55
92
  }
56
93
 
57
- if (assertion.type === 'is-json') {
94
+ if (baseType === 'is-json') {
58
95
  try {
59
96
  JSON.parse(output);
60
- return { pass: true, reason: 'Assertion passed' };
97
+ pass = !inverse;
61
98
  } catch (err) {
62
- return {
63
- pass: false,
64
- reason: `Expected output to be valid JSON, but it isn't.\nError: ${err}`,
65
- };
99
+ pass = inverse;
66
100
  }
101
+ return { pass, reason: pass ? 'Assertion passed' : 'Expected output to be valid JSON' };
102
+ }
103
+
104
+ if (baseType === 'contains') {
105
+ invariant(assertion.value, '"contains" assertion type must have a string value');
106
+ invariant(
107
+ typeof assertion.value === 'string',
108
+ '"contains" assertion type must have a string value',
109
+ );
110
+ pass = output.includes(assertion.value) !== inverse;
111
+ return {
112
+ pass,
113
+ reason: pass
114
+ ? 'Assertion passed'
115
+ : `Expected output to ${inverse ? 'not ' : ''}contain "${assertion.value}"`,
116
+ };
117
+ }
118
+
119
+ if (baseType === 'contains-any') {
120
+ invariant(assertion.value, '"contains-any" assertion type must have a value');
121
+ invariant(
122
+ Array.isArray(assertion.value),
123
+ '"contains-any" assertion type must have an array value',
124
+ );
125
+ pass = assertion.value.some((value) => output.includes(value)) !== inverse;
126
+ return {
127
+ pass,
128
+ reason: pass
129
+ ? 'Assertion passed'
130
+ : `Expected output to ${inverse ? 'not ' : ''}contain one of "${assertion.value.join(
131
+ ', ',
132
+ )}"`,
133
+ };
67
134
  }
68
135
 
69
- if (assertion.type === 'contains-json') {
70
- const pass = containsJSON(output);
136
+ if (baseType === 'contains-all') {
137
+ invariant(assertion.value, '"contains-all" assertion type must have a value');
138
+ invariant(
139
+ Array.isArray(assertion.value),
140
+ '"contains-all" assertion type must have an array value',
141
+ );
142
+ pass = assertion.value.every((value) => output.includes(value)) !== inverse;
143
+ return {
144
+ pass,
145
+ reason: pass
146
+ ? 'Assertion passed'
147
+ : `Expected output to ${inverse ? 'not ' : ''}contain all of "${assertion.value.join(
148
+ ', ',
149
+ )}"`,
150
+ };
151
+ }
152
+
153
+ if (baseType === 'regex') {
154
+ invariant(assertion.value, '"regex" assertion type must have a string value');
155
+ invariant(
156
+ typeof assertion.value === 'string',
157
+ '"contains" assertion type must have a string value',
158
+ );
159
+ const regex = new RegExp(assertion.value);
160
+ pass = regex.test(output) !== inverse;
161
+ return {
162
+ pass,
163
+ reason: pass
164
+ ? 'Assertion passed'
165
+ : `Expected output to ${inverse ? 'not ' : ''}match regex "${assertion.value}"`,
166
+ };
167
+ }
168
+
169
+ if (baseType === 'icontains') {
170
+ invariant(assertion.value, '"icontains" assertion type must have a string value');
171
+ invariant(
172
+ typeof assertion.value === 'string',
173
+ '"icontains" assertion type must have a string value',
174
+ );
175
+ pass = output.toLowerCase().includes(assertion.value.toLowerCase()) !== inverse;
176
+ return {
177
+ pass,
178
+ reason: pass
179
+ ? 'Assertion passed'
180
+ : `Expected output to ${inverse ? 'not ' : ''}contain "${assertion.value}"`,
181
+ };
182
+ }
183
+
184
+ if (baseType === 'contains-json') {
185
+ pass = containsJSON(output) !== inverse;
71
186
  return {
72
187
  pass,
73
- reason: pass ? 'Assertion passed' : 'Expected output to contain valid JSON',
188
+ reason: pass
189
+ ? 'Assertion passed'
190
+ : `Expected output to ${inverse ? 'not ' : ''}contain valid JSON`,
74
191
  };
75
192
  }
76
193
 
77
- if (assertion.type === 'javascript') {
194
+ if (baseType === 'javascript') {
78
195
  try {
79
- const customFunction = new Function('output', `return ${assertion.value}`);
80
- pass = customFunction(output);
196
+ const customFunction = new Function('output', 'context', `return ${assertion.value}`);
197
+ const context = {
198
+ vars: test.vars || {},
199
+ };
200
+ pass = customFunction(output, context) !== inverse;
81
201
  } catch (err) {
82
202
  return {
83
203
  pass: false,
84
- reason: `Custom function threw error: ${(err as Error).message}`,
204
+ reason: `Custom function threw error: ${(err as Error).message}
205
+ ${assertion.value}`,
85
206
  };
86
207
  }
87
208
  return {
88
209
  pass,
89
- reason: pass ? 'Assertion passed' : `Custom function returned false`,
210
+ reason: pass
211
+ ? 'Assertion passed'
212
+ : `Custom function returned ${inverse ? 'true' : 'false'}
213
+ ${assertion.value}`,
90
214
  };
91
215
  }
92
216
 
93
- if (assertion.type === 'similar') {
217
+ if (baseType === 'similar') {
94
218
  invariant(assertion.value, 'Similarity assertion must have a string value');
95
- return matchesSimilarity(assertion.value, output, assertion.threshold || 0.75);
219
+ invariant(
220
+ typeof assertion.value === 'string',
221
+ '"contains" assertion type must have a string value',
222
+ );
223
+ return matchesSimilarity(assertion.value, output, assertion.threshold || 0.75, inverse);
96
224
  }
97
225
 
98
- if (assertion.type === 'llm-rubric') {
226
+ if (baseType === 'llm-rubric') {
99
227
  invariant(assertion.value, 'Similarity assertion must have a string value');
228
+ invariant(
229
+ typeof assertion.value === 'string',
230
+ '"contains" assertion type must have a string value',
231
+ );
100
232
  return matchesLlmRubric(assertion.value, output, test.options);
101
233
  }
102
234
 
235
+ if (baseType === 'webhook') {
236
+ invariant(assertion.value, '"webhook" assertion type must have a URL value');
237
+ invariant(
238
+ typeof assertion.value === 'string',
239
+ '"webhook" assertion type must have a URL value',
240
+ );
241
+
242
+ try {
243
+ const context = {
244
+ vars: test.vars || {},
245
+ };
246
+ const response = await fetchWithTimeout(
247
+ assertion.value,
248
+ {
249
+ method: 'POST',
250
+ headers: {
251
+ 'Content-Type': 'application/json',
252
+ },
253
+ body: JSON.stringify({ output, context }),
254
+ },
255
+ process.env.WEBHOOK_TIMEOUT ? parseInt(process.env.WEBHOOK_TIMEOUT, 10) : 5000,
256
+ );
257
+
258
+ if (!response.ok) {
259
+ throw new Error(`Webhook response status: ${response.status}`);
260
+ }
261
+
262
+ const jsonResponse = await response.json();
263
+ pass = jsonResponse.pass !== inverse;
264
+ } catch (err) {
265
+ return {
266
+ pass: false,
267
+ reason: `Webhook error: ${(err as Error).message}`,
268
+ };
269
+ }
270
+
271
+ return {
272
+ pass,
273
+ reason: pass ? 'Assertion passed' : `Webhook returned ${inverse ? 'true' : 'false'}`,
274
+ };
275
+ }
276
+
277
+ if (baseType === 'rouge-n') {
278
+ invariant(assertion.value, '"rouge" assertion type must a value (string or string array)');
279
+ return handleRougeScore(baseType, assertion, assertion.value, output, inverse);
280
+ }
281
+
103
282
  throw new Error('Unknown assertion type: ' + assertion.type);
104
283
  }
105
284
 
@@ -125,6 +304,7 @@ export async function matchesSimilarity(
125
304
  expected: string,
126
305
  output: string,
127
306
  threshold: number,
307
+ inverse: boolean = false,
128
308
  ): Promise<GradingResult> {
129
309
  const expectedEmbedding = await DefaultEmbeddingProvider.callEmbeddingApi(expected);
130
310
  const outputEmbedding = await DefaultEmbeddingProvider.callEmbeddingApi(output);
@@ -155,16 +335,19 @@ export async function matchesSimilarity(
155
335
  }
156
336
 
157
337
  const similarity = cosineSimilarity(expectedEmbedding.embedding, outputEmbedding.embedding);
158
- if (similarity < threshold) {
338
+ const pass = inverse ? similarity <= threshold : similarity >= threshold;
339
+ const greaterThanReason = `Similarity ${similarity} is greater than threshold ${threshold}`;
340
+ const lessThanReason = `Similarity ${similarity} is less than threshold ${threshold}`;
341
+ if (pass) {
159
342
  return {
160
- pass: false,
161
- reason: `Similarity ${similarity} is less than threshold ${threshold}`,
343
+ pass: true,
344
+ reason: inverse ? lessThanReason : greaterThanReason,
162
345
  tokensUsed,
163
346
  };
164
347
  }
165
348
  return {
166
- pass: true,
167
- reason: `Similarity ${similarity} is greater than threshold ${threshold}`,
349
+ pass: false,
350
+ reason: inverse ? greaterThanReason : lessThanReason,
168
351
  tokensUsed,
169
352
  };
170
353
  }
@@ -224,16 +407,7 @@ export async function matchesLlmRubric(
224
407
  }
225
408
 
226
409
  export function assertionFromString(expected: string): Assertion {
227
- const match = expected.match(SIMILAR_REGEX);
228
- if (match) {
229
- const threshold = parseFloat(match[1]) || DEFAULT_SEMANTIC_SIMILARITY_THRESHOLD;
230
- const rest = expected.replace(SIMILAR_REGEX, '').trim();
231
- return {
232
- type: 'similar',
233
- value: rest,
234
- threshold,
235
- };
236
- }
410
+ // Legacy options
237
411
  if (expected.startsWith('fn:') || expected.startsWith('eval:')) {
238
412
  // TODO(1.0): delete eval: legacy option
239
413
  const sliceLength = expected.startsWith('fn:') ? 'fn:'.length : 'eval:'.length;
@@ -249,11 +423,48 @@ export function assertionFromString(expected: string): Assertion {
249
423
  value: expected.slice(6),
250
424
  };
251
425
  }
426
+
427
+ // New options
428
+ const assertionRegex =
429
+ /^(not-)?(equals|contains|contains-any|contains-all|regex|icontains):(.+)$/;
430
+ const regexMatch = expected.match(assertionRegex);
431
+
432
+ if (regexMatch) {
433
+ const [_, notPrefix, type, value] = regexMatch;
434
+ const fullType = notPrefix ? `not-${type}` : type;
435
+
436
+ if (type === 'contains-any' || type === 'contains-all') {
437
+ return {
438
+ type: fullType as AssertionType,
439
+ value: value.split(',').map((s) => s.trim()),
440
+ };
441
+ } else {
442
+ return {
443
+ type: fullType as AssertionType,
444
+ value,
445
+ };
446
+ }
447
+ }
448
+
449
+ // Options that require some special handling
450
+ const match = expected.match(SIMILAR_REGEX);
451
+ if (match) {
452
+ const threshold = parseFloat(match[1]) || DEFAULT_SEMANTIC_SIMILARITY_THRESHOLD;
453
+ const rest = expected.replace(SIMILAR_REGEX, '').trim();
454
+ return {
455
+ type: 'similar',
456
+ value: rest,
457
+ threshold,
458
+ };
459
+ }
460
+
252
461
  if (expected === 'is-json' || expected === 'contains-json') {
253
462
  return {
254
463
  type: expected,
255
464
  };
256
465
  }
466
+
467
+ // Default to equality
257
468
  return {
258
469
  type: 'equals',
259
470
  value: expected,
package/src/evaluator.ts CHANGED
@@ -145,7 +145,7 @@ class Evaluator {
145
145
  } catch (err) {
146
146
  return {
147
147
  ...setup,
148
- error: String(err),
148
+ error: String(err) + '\n\n' + (err as Error).stack,
149
149
  success: false,
150
150
  };
151
151
  }
package/src/main.ts CHANGED
@@ -68,7 +68,7 @@ async function main() {
68
68
  ];
69
69
  let config: Partial<UnifiedConfig> = {};
70
70
  for (const path of potentialPaths) {
71
- const maybeConfig = maybeReadConfig(path);
71
+ const maybeConfig = await maybeReadConfig(path);
72
72
  if (maybeConfig) {
73
73
  config = maybeConfig;
74
74
  break;
@@ -154,8 +154,16 @@ async function main() {
154
154
  'This suffix is append to every prompt',
155
155
  config.defaultTest?.options?.suffix,
156
156
  )
157
- .option('--no-write', 'Do not write results to promptfoo directory')
158
- .option('--no-cache', 'Do not read or write results to disk cache')
157
+ .option(
158
+ '--no-write',
159
+ 'Do not write results to promptfoo directory',
160
+ config?.commandLineOptions?.write,
161
+ )
162
+ .option(
163
+ '--no-cache',
164
+ 'Do not read or write results to disk cache',
165
+ config?.commandLineOptions?.cache,
166
+ )
159
167
  .option('--grader', 'Model that will grade outputs', config?.commandLineOptions?.grader)
160
168
  .option('--verbose', 'Show debug logs', config?.commandLineOptions?.verbose)
161
169
  .option('--view [port]', 'View in browser ui')
@@ -172,7 +180,7 @@ async function main() {
172
180
  const maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
173
181
  const configPath = cmdObj.config;
174
182
  if (configPath) {
175
- config = readConfig(configPath);
183
+ config = await readConfig(configPath);
176
184
  } else {
177
185
  config = {
178
186
  prompts: cmdObj.prompts || config.prompts,
@@ -256,8 +264,9 @@ async function main() {
256
264
  },
257
265
  });
258
266
  // Skip first row (header) and add the rest. Color PASS/FAIL
259
- for (const row of summary.table.body) {
267
+ for (const row of summary.table.body.slice(0, 25)) {
260
268
  table.push([
269
+ ...row.vars,
261
270
  ...row.outputs.map((col) => {
262
271
  const tableCellMaxLength = parseInt(cmdObj.tableCellMaxLength || '', 10);
263
272
  if (!isNaN(tableCellMaxLength) && col.length > tableCellMaxLength) {
@@ -275,11 +284,14 @@ async function main() {
275
284
  }
276
285
  return col;
277
286
  }),
278
- ...row.vars,
279
287
  ]);
280
288
  }
281
289
 
282
290
  logger.info('\n' + table.toString());
291
+ if (summary.table.body.length > 25) {
292
+ const rowsLeft = summary.table.body.length - 25;
293
+ logger.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
294
+ }
283
295
  }
284
296
  if (cmdObj.view || !cmdObj.write) {
285
297
  logger.info('Evaluation complete');
package/src/types.ts CHANGED
@@ -113,13 +113,34 @@ export interface GradingResult {
113
113
  tokensUsed?: TokenUsage;
114
114
  }
115
115
 
116
+ type BaseAssertionTypes =
117
+ | 'equals'
118
+ | 'contains'
119
+ | 'icontains'
120
+ | 'contains-all'
121
+ | 'contains-any'
122
+ | 'regex'
123
+ | 'is-json'
124
+ | 'contains-json'
125
+ | 'javascript'
126
+ | 'similar'
127
+ | 'llm-rubric'
128
+ | 'webhook'
129
+ | 'rouge-n'
130
+ | 'rouge-s'
131
+ | 'rouge-l';
132
+
133
+ type NotPrefixed<T extends string> = `not-${T}`;
134
+
135
+ export type AssertionType = BaseAssertionTypes | NotPrefixed<BaseAssertionTypes>;
136
+
116
137
  // TODO(ian): maybe Assertion should support {type: config} to make the yaml cleaner
117
138
  export interface Assertion {
118
139
  // Type of assertion
119
- type: 'equals' | 'is-json' | 'contains-json' | 'javascript' | 'similar' | 'llm-rubric';
140
+ type: AssertionType;
120
141
 
121
142
  // The expected value, if applicable
122
- value?: string;
143
+ value?: string | string[];
123
144
 
124
145
  // The threshold value, only applicable for similarity (cosine distance)
125
146
  threshold?: number;
package/src/util.ts CHANGED
@@ -2,6 +2,7 @@ import * as fs from 'fs';
2
2
  import * as path from 'node:path';
3
3
  import * as os from 'node:os';
4
4
 
5
+ import $RefParser from '@apidevtools/json-schema-ref-parser';
5
6
  import fetch from 'node-fetch';
6
7
  import yaml from 'js-yaml';
7
8
  import nunjucks from 'nunjucks';
@@ -28,14 +29,14 @@ function parseJson(json: string): any | undefined {
28
29
  }
29
30
  }
30
31
 
31
- export function maybeReadConfig(configPath: string): UnifiedConfig | undefined {
32
+ export async function maybeReadConfig(configPath: string): Promise<UnifiedConfig | undefined> {
32
33
  if (!fs.existsSync(configPath)) {
33
34
  return undefined;
34
35
  }
35
36
  return readConfig(configPath);
36
37
  }
37
38
 
38
- export function readConfig(configPath: string): UnifiedConfig {
39
+ export async function readConfig(configPath: string): Promise<UnifiedConfig> {
39
40
  const ext = path.parse(configPath).ext;
40
41
  switch (ext) {
41
42
  case '.json':
@@ -45,7 +46,9 @@ export function readConfig(configPath: string): UnifiedConfig {
45
46
  return require(configPath) as UnifiedConfig;
46
47
  case '.yaml':
47
48
  case '.yml':
48
- return yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
49
+ let ret = yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
50
+ ret = (await $RefParser.dereference(ret)) as UnifiedConfig;
51
+ return ret;
49
52
  default:
50
53
  throw new Error(`Unsupported configuration file format: ${ext}`);
51
54
  }
@@ -40,7 +40,6 @@ td,
40
40
  .td {
41
41
  position: relative;
42
42
  box-shadow: inset 0 0 0 1px var(--border-color);
43
- word-break: break-all;
44
43
  vertical-align: top;
45
44
 
46
45
  padding: 1.5rem;
@@ -50,11 +49,11 @@ th.variable,
50
49
  .th.variable,
51
50
  td.variable,
52
51
  .td.variable {
53
- background-color: #f8fbff;
52
+ background-color: var(--variable-background-color);
54
53
  }
55
54
 
56
55
  tr.header {
57
- background-color: #fffdf7;
56
+ background-color: var(--header-background-color);
58
57
  }
59
58
 
60
59
  th,
@@ -62,7 +61,7 @@ th,
62
61
  padding: 1rem;
63
62
  position: relative;
64
63
  text-align: center;
65
- font-weight: semi-bold;
64
+ vertical-align: bottom;
66
65
  }
67
66
 
68
67
  tr .cell {
@@ -72,7 +71,7 @@ tr .cell-rating {
72
71
  visibility: hidden;
73
72
  position: absolute;
74
73
  bottom: 1.25rem;
75
- right: -1rem;
74
+ right: 0;
76
75
  line-height: 0;
77
76
  font-size: 1.75rem;
78
77
  }
@@ -83,7 +82,10 @@ tr:hover .cell-rating {
83
82
 
84
83
  tr .cell-rating .rating {
85
84
  cursor: pointer;
86
- margin-right: 1rem;
85
+ }
86
+
87
+ tr .cell-rating .rating:first-child {
88
+ margin-right: 0.5rem;
87
89
  }
88
90
 
89
91
  th .smalltext {
@@ -97,6 +99,16 @@ th:hover .smalltext {
97
99
  visibility: visible;
98
100
  }
99
101
 
102
+ th .summary {
103
+ font-weight: normal;
104
+ font-size: 0.8rem;
105
+ padding: 0.25rem;
106
+ }
107
+
108
+ th .summary.highlight {
109
+ background-color: var(--success-background-color);
110
+ }
111
+
100
112
  td,
101
113
  .td {
102
114
  }