promptfoo 0.18.1 → 0.18.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/dist/package.json +1 -1
  2. package/dist/src/assertions.d.ts +2 -2
  3. package/dist/src/assertions.d.ts.map +1 -1
  4. package/dist/src/assertions.js +42 -11
  5. package/dist/src/assertions.js.map +1 -1
  6. package/dist/src/cache.d.ts +1 -1
  7. package/dist/src/cache.d.ts.map +1 -1
  8. package/dist/src/cache.js +4 -4
  9. package/dist/src/cache.js.map +1 -1
  10. package/dist/src/evaluator.d.ts.map +1 -1
  11. package/dist/src/evaluator.js +5 -2
  12. package/dist/src/evaluator.js.map +1 -1
  13. package/dist/src/main.js +4 -4
  14. package/dist/src/main.js.map +1 -1
  15. package/dist/src/providers/azureopenai.d.ts +2 -2
  16. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  17. package/dist/src/providers/azureopenai.js +7 -5
  18. package/dist/src/providers/azureopenai.js.map +1 -1
  19. package/dist/src/providers/llama.js +1 -1
  20. package/dist/src/providers/llama.js.map +1 -1
  21. package/dist/src/providers/localai.js +2 -2
  22. package/dist/src/providers/localai.js.map +1 -1
  23. package/dist/src/providers/ollama.d.ts +9 -0
  24. package/dist/src/providers/ollama.d.ts.map +1 -0
  25. package/dist/src/providers/ollama.js +66 -0
  26. package/dist/src/providers/ollama.js.map +1 -0
  27. package/dist/src/providers/openai.d.ts +2 -2
  28. package/dist/src/providers/openai.d.ts.map +1 -1
  29. package/dist/src/providers/openai.js +7 -5
  30. package/dist/src/providers/openai.js.map +1 -1
  31. package/dist/src/providers.d.ts.map +1 -1
  32. package/dist/src/providers.js +11 -5
  33. package/dist/src/providers.js.map +1 -1
  34. package/dist/src/types.d.ts +6 -2
  35. package/dist/src/types.d.ts.map +1 -1
  36. package/dist/src/util.d.ts +2 -0
  37. package/dist/src/util.d.ts.map +1 -1
  38. package/dist/src/util.js +24 -12
  39. package/dist/src/util.js.map +1 -1
  40. package/dist/src/web/client/assets/index-6d2a3573.js +200 -0
  41. package/dist/src/web/client/index.html +1 -1
  42. package/package.json +1 -1
  43. package/src/assertions.ts +45 -11
  44. package/src/cache.ts +3 -2
  45. package/src/evaluator.ts +5 -1
  46. package/src/main.ts +4 -4
  47. package/src/providers/azureopenai.ts +18 -6
  48. package/src/providers/llama.ts +2 -2
  49. package/src/providers/localai.ts +3 -3
  50. package/src/providers/ollama.ts +88 -0
  51. package/src/providers/openai.ts +8 -6
  52. package/src/providers.ts +20 -5
  53. package/src/types.ts +6 -2
  54. package/src/util.ts +25 -17
  55. package/src/web/client/package-lock.json +5726 -0
  56. package/src/web/client/src/EvalOutputPromptDialog.tsx +78 -16
  57. package/src/web/client/src/ResultsTable.tsx +32 -9
  58. package/src/web/client/src/ResultsView.tsx +1 -1
  59. package/src/web/client/src/types.ts +3 -1
  60. package/dist/src/web/client/assets/index-8388d689.js +0 -199
package/src/types.ts CHANGED
@@ -28,7 +28,7 @@ export interface CommandLineOptions {
28
28
  }
29
29
 
30
30
  export interface ProviderConfig {
31
- id: ProviderId;
31
+ id?: ProviderId;
32
32
  config?: any;
33
33
  prompts?: string[]; // List of prompt display strings
34
34
  }
@@ -97,6 +97,7 @@ export interface EvaluateResult {
97
97
  success: boolean;
98
98
  score: number;
99
99
  latencyMs: number;
100
+ gradingResult?: GradingResult;
100
101
  }
101
102
 
102
103
  export interface EvaluateTableOutput {
@@ -106,6 +107,7 @@ export interface EvaluateTableOutput {
106
107
  prompt: string;
107
108
  latencyMs: number;
108
109
  tokenUsage?: Partial<TokenUsage>;
110
+ gradingResult?: GradingResult;
109
111
  }
110
112
 
111
113
  export interface EvaluateTable {
@@ -138,6 +140,8 @@ export interface GradingResult {
138
140
  score: number;
139
141
  reason: string;
140
142
  tokensUsed?: TokenUsage;
143
+ componentResults?: GradingResult[];
144
+ assertion: Assertion | null;
141
145
  }
142
146
 
143
147
  type BaseAssertionTypes =
@@ -244,7 +248,7 @@ export type ProviderId = string;
244
248
 
245
249
  export type ProviderFunction = (prompt: string) => Promise<ProviderResponse>;
246
250
 
247
- export type RawProviderConfig = Record<ProviderId, Omit<ProviderConfig, 'id'>>;
251
+ export type RawProviderConfig = Record<ProviderId, ProviderConfig>;
248
252
 
249
253
  // TestSuiteConfig = Test Suite, but before everything is parsed and resolved. Providers are just strings, prompts are filepaths, tests can be filepath or inline.
250
254
  export interface TestSuiteConfig {
package/src/util.ts CHANGED
@@ -4,7 +4,6 @@ import * as os from 'os';
4
4
 
5
5
  import $RefParser from '@apidevtools/json-schema-ref-parser';
6
6
  import fetch from 'node-fetch';
7
- import invariant from 'tiny-invariant';
8
7
  import yaml from 'js-yaml';
9
8
  import nunjucks from 'nunjucks';
10
9
  import { globSync } from 'glob';
@@ -13,7 +12,6 @@ import { parse as parseCsv } from 'csv-parse/sync';
13
12
  import { stringify } from 'csv-stringify/sync';
14
13
 
15
14
  import logger from './logger';
16
- import { assertionFromString } from './assertions';
17
15
  import { getDirectory } from './esm';
18
16
 
19
17
  import type { RequestInfo, RequestInit, Response } from 'node-fetch';
@@ -45,20 +43,21 @@ export function readProviderPromptMap(
45
43
  allPrompts.push(prompt.display);
46
44
  }
47
45
 
48
- invariant(
49
- typeof config.providers !== 'string',
50
- 'In order to use a provider-prompt map, config.providers should be an array of objects, not a string',
51
- );
52
- invariant(
53
- typeof config.providers !== 'function',
54
- 'In order to use a provider-prompt map, config.providers should be an array of objects, not a function',
55
- );
46
+ if (typeof config.providers === 'string') {
47
+ return { [config.providers]: allPrompts };
48
+ }
49
+
50
+ if (typeof config.providers === 'function') {
51
+ return { 'Custom function': allPrompts };
52
+ }
56
53
 
57
54
  for (const provider of config.providers) {
58
55
  if (typeof provider === 'object') {
59
56
  const rawProvider = provider as RawProviderConfig;
60
- const id = Object.keys(rawProvider)[0];
61
- ret[id] = rawProvider[id].prompts || allPrompts;
57
+ const originalId = Object.keys(rawProvider)[0];
58
+ const providerObject = rawProvider[originalId];
59
+ const id = providerObject.id || originalId;
60
+ ret[id] = rawProvider[originalId].prompts || allPrompts;
62
61
  }
63
62
  }
64
63
 
@@ -368,7 +367,7 @@ export function writeOutput(
368
367
  [...results.table.head.prompts, ...results.table.head.vars],
369
368
  ...results.table.body.map((row) => [...row.outputs.map(outputToSimpleString), ...row.vars]),
370
369
  ];
371
- const htmlOutput = nunjucks.renderString(template, {
370
+ const htmlOutput = getNunjucksEngine().renderString(template, {
372
371
  table,
373
372
  results: results.results,
374
373
  });
@@ -456,10 +455,12 @@ export function writeLatestResults(results: EvaluateSummary, config: Partial<Uni
456
455
  2,
457
456
  ),
458
457
  );
459
- if (fs.existsSync(latestResultsPath) && fs.lstatSync(latestResultsPath).isSymbolicLink()) {
458
+
459
+ try {
460
460
  fs.unlinkSync(latestResultsPath);
461
- }
461
+ } catch {}
462
462
  fs.symlinkSync(newResultsPath, latestResultsPath);
463
+
463
464
  cleanupOldResults();
464
465
  } catch (err) {
465
466
  logger.error(`Failed to write latest results to ${newResultsPath}:\n${err}`);
@@ -501,8 +502,7 @@ export function readResult(
501
502
  export function readLatestResults():
502
503
  | { results: EvaluateSummary; config: Partial<UnifiedConfig> }
503
504
  | undefined {
504
- const latestResultsPath = getLatestResultsPath();
505
- return readResult(latestResultsPath);
505
+ return JSON.parse(fs.readFileSync(getLatestResultsPath(), 'utf-8'));
506
506
  }
507
507
 
508
508
  export function cosineSimilarity(vecA: number[], vecB: number[]) {
@@ -521,6 +521,7 @@ export function testCaseFromCsvRow(row: CsvRow): TestCase {
521
521
  for (const [key, value] of Object.entries(row)) {
522
522
  if (key === '__expected') {
523
523
  if (value.trim() !== '') {
524
+ const { assertionFromString } = require('./assertions');
524
525
  asserts.push(assertionFromString(value));
525
526
  }
526
527
  } else {
@@ -533,3 +534,10 @@ export function testCaseFromCsvRow(row: CsvRow): TestCase {
533
534
  assert: asserts,
534
535
  };
535
536
  }
537
+
538
+ export function getNunjucksEngine() {
539
+ nunjucks.configure({
540
+ autoescape: false,
541
+ });
542
+ return nunjucks;
543
+ }