promptfoo 0.18.1 → 0.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +1 -1
- package/dist/src/assertions.d.ts +2 -2
- package/dist/src/assertions.d.ts.map +1 -1
- package/dist/src/assertions.js +42 -11
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/cache.d.ts +1 -1
- package/dist/src/cache.d.ts.map +1 -1
- package/dist/src/cache.js +4 -4
- package/dist/src/cache.js.map +1 -1
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +5 -2
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/main.js +4 -4
- package/dist/src/main.js.map +1 -1
- package/dist/src/providers/azureopenai.d.ts +2 -2
- package/dist/src/providers/azureopenai.d.ts.map +1 -1
- package/dist/src/providers/azureopenai.js +7 -5
- package/dist/src/providers/azureopenai.js.map +1 -1
- package/dist/src/providers/llama.js +1 -1
- package/dist/src/providers/llama.js.map +1 -1
- package/dist/src/providers/localai.js +2 -2
- package/dist/src/providers/localai.js.map +1 -1
- package/dist/src/providers/ollama.d.ts +9 -0
- package/dist/src/providers/ollama.d.ts.map +1 -0
- package/dist/src/providers/ollama.js +66 -0
- package/dist/src/providers/ollama.js.map +1 -0
- package/dist/src/providers/openai.d.ts +2 -2
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +7 -5
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +11 -5
- package/dist/src/providers.js.map +1 -1
- package/dist/src/types.d.ts +6 -2
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.d.ts +2 -0
- package/dist/src/util.d.ts.map +1 -1
- package/dist/src/util.js +24 -12
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/client/assets/index-6d2a3573.js +200 -0
- package/dist/src/web/client/index.html +1 -1
- package/package.json +1 -1
- package/src/assertions.ts +45 -11
- package/src/cache.ts +3 -2
- package/src/evaluator.ts +5 -1
- package/src/main.ts +4 -4
- package/src/providers/azureopenai.ts +18 -6
- package/src/providers/llama.ts +2 -2
- package/src/providers/localai.ts +3 -3
- package/src/providers/ollama.ts +88 -0
- package/src/providers/openai.ts +8 -6
- package/src/providers.ts +20 -5
- package/src/types.ts +6 -2
- package/src/util.ts +25 -17
- package/src/web/client/package-lock.json +5726 -0
- package/src/web/client/src/EvalOutputPromptDialog.tsx +78 -16
- package/src/web/client/src/ResultsTable.tsx +32 -9
- package/src/web/client/src/ResultsView.tsx +1 -1
- package/src/web/client/src/types.ts +3 -1
- package/dist/src/web/client/assets/index-8388d689.js +0 -199
package/src/types.ts
CHANGED
|
@@ -28,7 +28,7 @@ export interface CommandLineOptions {
|
|
|
28
28
|
}
|
|
29
29
|
|
|
30
30
|
export interface ProviderConfig {
|
|
31
|
-
id
|
|
31
|
+
id?: ProviderId;
|
|
32
32
|
config?: any;
|
|
33
33
|
prompts?: string[]; // List of prompt display strings
|
|
34
34
|
}
|
|
@@ -97,6 +97,7 @@ export interface EvaluateResult {
|
|
|
97
97
|
success: boolean;
|
|
98
98
|
score: number;
|
|
99
99
|
latencyMs: number;
|
|
100
|
+
gradingResult?: GradingResult;
|
|
100
101
|
}
|
|
101
102
|
|
|
102
103
|
export interface EvaluateTableOutput {
|
|
@@ -106,6 +107,7 @@ export interface EvaluateTableOutput {
|
|
|
106
107
|
prompt: string;
|
|
107
108
|
latencyMs: number;
|
|
108
109
|
tokenUsage?: Partial<TokenUsage>;
|
|
110
|
+
gradingResult?: GradingResult;
|
|
109
111
|
}
|
|
110
112
|
|
|
111
113
|
export interface EvaluateTable {
|
|
@@ -138,6 +140,8 @@ export interface GradingResult {
|
|
|
138
140
|
score: number;
|
|
139
141
|
reason: string;
|
|
140
142
|
tokensUsed?: TokenUsage;
|
|
143
|
+
componentResults?: GradingResult[];
|
|
144
|
+
assertion: Assertion | null;
|
|
141
145
|
}
|
|
142
146
|
|
|
143
147
|
type BaseAssertionTypes =
|
|
@@ -244,7 +248,7 @@ export type ProviderId = string;
|
|
|
244
248
|
|
|
245
249
|
export type ProviderFunction = (prompt: string) => Promise<ProviderResponse>;
|
|
246
250
|
|
|
247
|
-
export type RawProviderConfig = Record<ProviderId,
|
|
251
|
+
export type RawProviderConfig = Record<ProviderId, ProviderConfig>;
|
|
248
252
|
|
|
249
253
|
// TestSuiteConfig = Test Suite, but before everything is parsed and resolved. Providers are just strings, prompts are filepaths, tests can be filepath or inline.
|
|
250
254
|
export interface TestSuiteConfig {
|
package/src/util.ts
CHANGED
|
@@ -4,7 +4,6 @@ import * as os from 'os';
|
|
|
4
4
|
|
|
5
5
|
import $RefParser from '@apidevtools/json-schema-ref-parser';
|
|
6
6
|
import fetch from 'node-fetch';
|
|
7
|
-
import invariant from 'tiny-invariant';
|
|
8
7
|
import yaml from 'js-yaml';
|
|
9
8
|
import nunjucks from 'nunjucks';
|
|
10
9
|
import { globSync } from 'glob';
|
|
@@ -13,7 +12,6 @@ import { parse as parseCsv } from 'csv-parse/sync';
|
|
|
13
12
|
import { stringify } from 'csv-stringify/sync';
|
|
14
13
|
|
|
15
14
|
import logger from './logger';
|
|
16
|
-
import { assertionFromString } from './assertions';
|
|
17
15
|
import { getDirectory } from './esm';
|
|
18
16
|
|
|
19
17
|
import type { RequestInfo, RequestInit, Response } from 'node-fetch';
|
|
@@ -45,20 +43,21 @@ export function readProviderPromptMap(
|
|
|
45
43
|
allPrompts.push(prompt.display);
|
|
46
44
|
}
|
|
47
45
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
);
|
|
46
|
+
if (typeof config.providers === 'string') {
|
|
47
|
+
return { [config.providers]: allPrompts };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (typeof config.providers === 'function') {
|
|
51
|
+
return { 'Custom function': allPrompts };
|
|
52
|
+
}
|
|
56
53
|
|
|
57
54
|
for (const provider of config.providers) {
|
|
58
55
|
if (typeof provider === 'object') {
|
|
59
56
|
const rawProvider = provider as RawProviderConfig;
|
|
60
|
-
const
|
|
61
|
-
|
|
57
|
+
const originalId = Object.keys(rawProvider)[0];
|
|
58
|
+
const providerObject = rawProvider[originalId];
|
|
59
|
+
const id = providerObject.id || originalId;
|
|
60
|
+
ret[id] = rawProvider[originalId].prompts || allPrompts;
|
|
62
61
|
}
|
|
63
62
|
}
|
|
64
63
|
|
|
@@ -368,7 +367,7 @@ export function writeOutput(
|
|
|
368
367
|
[...results.table.head.prompts, ...results.table.head.vars],
|
|
369
368
|
...results.table.body.map((row) => [...row.outputs.map(outputToSimpleString), ...row.vars]),
|
|
370
369
|
];
|
|
371
|
-
const htmlOutput =
|
|
370
|
+
const htmlOutput = getNunjucksEngine().renderString(template, {
|
|
372
371
|
table,
|
|
373
372
|
results: results.results,
|
|
374
373
|
});
|
|
@@ -456,10 +455,12 @@ export function writeLatestResults(results: EvaluateSummary, config: Partial<Uni
|
|
|
456
455
|
2,
|
|
457
456
|
),
|
|
458
457
|
);
|
|
459
|
-
|
|
458
|
+
|
|
459
|
+
try {
|
|
460
460
|
fs.unlinkSync(latestResultsPath);
|
|
461
|
-
}
|
|
461
|
+
} catch {}
|
|
462
462
|
fs.symlinkSync(newResultsPath, latestResultsPath);
|
|
463
|
+
|
|
463
464
|
cleanupOldResults();
|
|
464
465
|
} catch (err) {
|
|
465
466
|
logger.error(`Failed to write latest results to ${newResultsPath}:\n${err}`);
|
|
@@ -501,8 +502,7 @@ export function readResult(
|
|
|
501
502
|
export function readLatestResults():
|
|
502
503
|
| { results: EvaluateSummary; config: Partial<UnifiedConfig> }
|
|
503
504
|
| undefined {
|
|
504
|
-
|
|
505
|
-
return readResult(latestResultsPath);
|
|
505
|
+
return JSON.parse(fs.readFileSync(getLatestResultsPath(), 'utf-8'));
|
|
506
506
|
}
|
|
507
507
|
|
|
508
508
|
export function cosineSimilarity(vecA: number[], vecB: number[]) {
|
|
@@ -521,6 +521,7 @@ export function testCaseFromCsvRow(row: CsvRow): TestCase {
|
|
|
521
521
|
for (const [key, value] of Object.entries(row)) {
|
|
522
522
|
if (key === '__expected') {
|
|
523
523
|
if (value.trim() !== '') {
|
|
524
|
+
const { assertionFromString } = require('./assertions');
|
|
524
525
|
asserts.push(assertionFromString(value));
|
|
525
526
|
}
|
|
526
527
|
} else {
|
|
@@ -533,3 +534,10 @@ export function testCaseFromCsvRow(row: CsvRow): TestCase {
|
|
|
533
534
|
assert: asserts,
|
|
534
535
|
};
|
|
535
536
|
}
|
|
537
|
+
|
|
538
|
+
export function getNunjucksEngine() {
|
|
539
|
+
nunjucks.configure({
|
|
540
|
+
autoescape: false,
|
|
541
|
+
});
|
|
542
|
+
return nunjucks;
|
|
543
|
+
}
|