promptfoo 0.20.0 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/package.json +4 -4
- package/dist/src/assertions.d.ts.map +1 -1
- package/dist/src/assertions.js +5 -0
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/evaluator.js +1 -1
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/index.d.ts +1 -5
- package/dist/src/index.d.ts.map +1 -1
- package/dist/src/index.js +1 -1
- package/dist/src/index.js.map +1 -1
- package/dist/src/matchers.d.ts +3 -2
- package/dist/src/matchers.d.ts.map +1 -1
- package/dist/src/matchers.js +37 -9
- package/dist/src/matchers.js.map +1 -1
- package/dist/src/providers/anthropic.d.ts +5 -3
- package/dist/src/providers/anthropic.d.ts.map +1 -1
- package/dist/src/providers/anthropic.js +8 -10
- package/dist/src/providers/anthropic.js.map +1 -1
- package/dist/src/providers/azureopenai.d.ts +9 -8
- package/dist/src/providers/azureopenai.d.ts.map +1 -1
- package/dist/src/providers/azureopenai.js +33 -36
- package/dist/src/providers/azureopenai.js.map +1 -1
- package/dist/src/providers/openai.d.ts +12 -12
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +54 -65
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/replicate.d.ts +4 -2
- package/dist/src/providers/replicate.d.ts.map +1 -1
- package/dist/src/providers/replicate.js +10 -8
- package/dist/src/providers/replicate.js.map +1 -1
- package/dist/src/providers/webhook.d.ts +9 -0
- package/dist/src/providers/webhook.d.ts.map +1 -0
- package/dist/src/providers/webhook.js +54 -0
- package/dist/src/providers/webhook.js.map +1 -0
- package/dist/src/providers.d.ts +1 -1
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +36 -28
- package/dist/src/providers.js.map +1 -1
- package/dist/src/suggestions.d.ts.map +1 -1
- package/dist/src/suggestions.js +1 -3
- package/dist/src/suggestions.js.map +1 -1
- package/dist/src/types.d.ts +7 -1
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/util.js +1 -1
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/nextui/404/index.html +1 -1
- package/dist/src/web/nextui/404.html +1 -1
- package/dist/src/web/nextui/_next/static/Bl3o5lF4ON7Fjki46lPhr/_buildManifest.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/226-7bbb6c98a19542fd.js +37 -0
- package/dist/src/web/nextui/_next/static/chunks/249-ea9c0f034888ccff.js +125 -0
- package/dist/src/web/nextui/_next/static/chunks/339-501c32916b785ef1.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/365-e426ea5bc7e815fc.js +8 -0
- package/dist/src/web/nextui/_next/static/chunks/396-0a51429a01e24cdd.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/596-297f7ff4a0436e87.js +25 -0
- package/dist/src/web/nextui/_next/static/chunks/613-572c22424de64659.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/706-ae1d3352d28419e9.js +9 -0
- package/dist/src/web/nextui/_next/static/chunks/891-7035926a62c1c4e0.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-366629541fd598e9.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-319d2ee38d37574e.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-a6b1ff91723b7beb.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/layout-024c4adc71c9feb0.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/page-1ae60660130041b2.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-6ef16148040bf4f4.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/{ca377847-cb6ae6a6a073aebb.js → ca377847-26b462611379a4f7.js} +3 -3
- package/dist/src/web/nextui/_next/static/chunks/{fd9d1056-ac777be631f5a9e9.js → fd9d1056-fba4b53a2f01213b.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/framework-8883d1e9be70c3da.js +25 -0
- package/dist/src/web/nextui/_next/static/chunks/main-8ea85465d428ecfe.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/main-app-581ccf0003955b21.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/pages/_app-52924524f99094ab.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/pages/_error-c92d5c4bb2b49926.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/webpack-55c264ce2fd85eb7.js +1 -0
- package/dist/src/web/nextui/_next/static/css/4d399fceacd06992.css +1 -0
- package/dist/src/web/nextui/eval/index.html +1 -1
- package/dist/src/web/nextui/eval/index.txt +6 -6
- package/dist/src/web/nextui/index.html +1 -1
- package/dist/src/web/nextui/index.txt +5 -5
- package/dist/src/web/nextui/setup/index.html +27 -1
- package/dist/src/web/nextui/setup/index.txt +9 -9
- package/dist/src/web/server.d.ts.map +1 -1
- package/dist/src/web/server.js +9 -5
- package/dist/src/web/server.js.map +1 -1
- package/package.json +4 -4
- package/dist/src/web/nextui/_next/static/US6gOx8LHTX_Hzm9aYNrC/_buildManifest.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/339-4fc8a80fa840e771.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/373-8a280796c0f2d1af.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/583-125d32af505e9bc4.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/596-07e4a23a5c6cdf04.js +0 -25
- package/dist/src/web/nextui/_next/static/chunks/658-a62210d07dc4dcb6.js +0 -15
- package/dist/src/web/nextui/_next/static/chunks/707-699cbd84b259c37b.js +0 -37
- package/dist/src/web/nextui/_next/static/chunks/858-ceb6fa22e614492b.js +0 -125
- package/dist/src/web/nextui/_next/static/chunks/891-3000ea7c0a292558.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-50e40614fa05600e.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-c19c44ed1b2dfb58.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d4a1813b2f8c4532.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/layout-664a8d716d2d24b1.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/page-1f8ef6a00a2355f0.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/setup/page-182018a3c6397345.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/framework-43665103d101a22d.js +0 -25
- package/dist/src/web/nextui/_next/static/chunks/main-50cc0a98559591ce.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/main-app-c9dc13756d166550.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/pages/_app-6b79a29ad0d63b21.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/pages/_error-9aeb3e4d490fe4b8.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/webpack-6e474e42be502dd7.js +0 -1
- package/dist/src/web/nextui/_next/static/css/a35c840ac696f161.css +0 -1
- package/dist/src/web/nextui/api +0 -1
- package/src/__mocks__/esm.ts +0 -3
- package/src/assertions.ts +0 -580
- package/src/cache.ts +0 -109
- package/src/esm.ts +0 -13
- package/src/evaluator.ts +0 -500
- package/src/index.ts +0 -52
- package/src/logger.ts +0 -46
- package/src/main.ts +0 -442
- package/src/matchers.ts +0 -120
- package/src/onboarding.ts +0 -69
- package/src/prompts.ts +0 -39
- package/src/providers/anthropic.ts +0 -88
- package/src/providers/azureopenai.ts +0 -299
- package/src/providers/llama.ts +0 -95
- package/src/providers/localai.ts +0 -111
- package/src/providers/ollama.ts +0 -89
- package/src/providers/openai.ts +0 -337
- package/src/providers/replicate.ts +0 -99
- package/src/providers/scriptCompletion.ts +0 -35
- package/src/providers/shared.ts +0 -34
- package/src/providers.ts +0 -192
- package/src/share.ts +0 -27
- package/src/suggestions.ts +0 -63
- package/src/table.ts +0 -43
- package/src/tableOutput.html +0 -52
- package/src/telemetry.ts +0 -70
- package/src/types.ts +0 -299
- package/src/updates.ts +0 -46
- package/src/util.ts +0 -543
- package/src/web/nextui/.eslintrc.json +0 -3
- package/src/web/nextui/next.config.js +0 -14
- package/src/web/nextui/package-lock.json +0 -4644
- package/src/web/nextui/package.json +0 -47
- package/src/web/nextui/public/favicon.ico +0 -0
- package/src/web/nextui/public/logo.svg +0 -30
- package/src/web/nextui/src/app/Home.css +0 -3
- package/src/web/nextui/src/app/api/route.ts +0 -6
- package/src/web/nextui/src/app/components/DarkMode.css +0 -22
- package/src/web/nextui/src/app/components/DarkMode.tsx +0 -17
- package/src/web/nextui/src/app/components/Logo.css +0 -32
- package/src/web/nextui/src/app/components/Logo.tsx +0 -11
- package/src/web/nextui/src/app/components/PageShell.css +0 -33
- package/src/web/nextui/src/app/components/PageShell.tsx +0 -87
- package/src/web/nextui/src/app/eval/ConfigModal.tsx +0 -84
- package/src/web/nextui/src/app/eval/Eval.css +0 -13
- package/src/web/nextui/src/app/eval/Eval.tsx +0 -79
- package/src/web/nextui/src/app/eval/EvalOutputPromptDialog.tsx +0 -127
- package/src/web/nextui/src/app/eval/ResultsCharts.tsx +0 -355
- package/src/web/nextui/src/app/eval/ResultsTable.css +0 -179
- package/src/web/nextui/src/app/eval/ResultsTable.tsx +0 -503
- package/src/web/nextui/src/app/eval/ResultsView.tsx +0 -301
- package/src/web/nextui/src/app/eval/ShareModal.tsx +0 -70
- package/src/web/nextui/src/app/eval/[id]/not-found.tsx +0 -5
- package/src/web/nextui/src/app/eval/[id]/page.css +0 -9
- package/src/web/nextui/src/app/eval/[id]/page.tsx +0 -20
- package/src/web/nextui/src/app/eval/index.css +0 -0
- package/src/web/nextui/src/app/eval/page.tsx +0 -8
- package/src/web/nextui/src/app/eval/store.ts +0 -18
- package/src/web/nextui/src/app/eval/types.ts +0 -20
- package/src/web/nextui/src/app/globals.css +0 -58
- package/src/web/nextui/src/app/layout.tsx +0 -25
- package/src/web/nextui/src/app/page.tsx +0 -7
- package/src/web/nextui/src/app/setup/AssertsForm.tsx +0 -118
- package/src/web/nextui/src/app/setup/PromptDialog.tsx +0 -77
- package/src/web/nextui/src/app/setup/PromptsSection.tsx +0 -190
- package/src/web/nextui/src/app/setup/ProviderConfigDialog.tsx +0 -99
- package/src/web/nextui/src/app/setup/ProviderSelector.tsx +0 -149
- package/src/web/nextui/src/app/setup/RunTestSuiteButton.tsx +0 -88
- package/src/web/nextui/src/app/setup/TestCaseDialog.tsx +0 -108
- package/src/web/nextui/src/app/setup/TestCasesSection.tsx +0 -154
- package/src/web/nextui/src/app/setup/VarsForm.tsx +0 -57
- package/src/web/nextui/src/app/setup/page.css +0 -3
- package/src/web/nextui/src/app/setup/page.tsx +0 -160
- package/src/web/nextui/src/util/api.ts +0 -1
- package/src/web/nextui/src/util/store.ts +0 -53
- package/src/web/nextui/tsconfig.json +0 -28
- package/src/web/server.ts +0 -151
- /package/dist/src/web/nextui/_next/static/{US6gOx8LHTX_Hzm9aYNrC → Bl3o5lF4ON7Fjki46lPhr}/_ssgManifest.js +0 -0
package/src/util.ts
DELETED
|
@@ -1,543 +0,0 @@
|
|
|
1
|
-
import * as fs from 'fs';
|
|
2
|
-
import * as path from 'path';
|
|
3
|
-
import * as os from 'os';
|
|
4
|
-
|
|
5
|
-
import $RefParser from '@apidevtools/json-schema-ref-parser';
|
|
6
|
-
import fetch from 'node-fetch';
|
|
7
|
-
import yaml from 'js-yaml';
|
|
8
|
-
import nunjucks from 'nunjucks';
|
|
9
|
-
import { globSync } from 'glob';
|
|
10
|
-
import { parse as parsePath } from 'path';
|
|
11
|
-
import { parse as parseCsv } from 'csv-parse/sync';
|
|
12
|
-
import { stringify } from 'csv-stringify/sync';
|
|
13
|
-
|
|
14
|
-
import logger from './logger';
|
|
15
|
-
import { getDirectory } from './esm';
|
|
16
|
-
|
|
17
|
-
import type { RequestInfo, RequestInit, Response } from 'node-fetch';
|
|
18
|
-
|
|
19
|
-
import type {
|
|
20
|
-
Assertion,
|
|
21
|
-
CsvRow,
|
|
22
|
-
EvaluateSummary,
|
|
23
|
-
EvaluateTableOutput,
|
|
24
|
-
UnifiedConfig,
|
|
25
|
-
TestCase,
|
|
26
|
-
Prompt,
|
|
27
|
-
ProviderOptionsMap,
|
|
28
|
-
TestSuite,
|
|
29
|
-
} from './types';
|
|
30
|
-
|
|
31
|
-
export function readProviderPromptMap(
|
|
32
|
-
config: Partial<UnifiedConfig>,
|
|
33
|
-
parsedPrompts: Prompt[],
|
|
34
|
-
): TestSuite['providerPromptMap'] {
|
|
35
|
-
const ret: Record<string, string[]> = {};
|
|
36
|
-
|
|
37
|
-
if (!config.providers) {
|
|
38
|
-
return ret;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
const allPrompts = [];
|
|
42
|
-
for (const prompt of parsedPrompts) {
|
|
43
|
-
allPrompts.push(prompt.display);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
if (typeof config.providers === 'string') {
|
|
47
|
-
return { [config.providers]: allPrompts };
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
if (typeof config.providers === 'function') {
|
|
51
|
-
return { 'Custom function': allPrompts };
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
for (const provider of config.providers) {
|
|
55
|
-
if (typeof provider === 'object') {
|
|
56
|
-
const rawProvider = provider as ProviderOptionsMap;
|
|
57
|
-
const originalId = Object.keys(rawProvider)[0];
|
|
58
|
-
const providerObject = rawProvider[originalId];
|
|
59
|
-
const id = providerObject.id || originalId;
|
|
60
|
-
ret[id] = rawProvider[originalId].prompts || allPrompts;
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
return ret;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
const PROMPT_DELIMITER = '---';
|
|
68
|
-
|
|
69
|
-
function parseJson(json: string): any | undefined {
|
|
70
|
-
try {
|
|
71
|
-
return JSON.parse(json);
|
|
72
|
-
} catch (err) {
|
|
73
|
-
return undefined;
|
|
74
|
-
}
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
let globalConfigCache: any = null;
|
|
78
|
-
|
|
79
|
-
export function resetGlobalConfig(): void {
|
|
80
|
-
globalConfigCache = null;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
export function readGlobalConfig(): any {
|
|
84
|
-
if (!globalConfigCache) {
|
|
85
|
-
const configDir = getConfigDirectoryPath();
|
|
86
|
-
const configFilePath = path.join(configDir, 'promptfoo.yaml');
|
|
87
|
-
|
|
88
|
-
if (fs.existsSync(configFilePath)) {
|
|
89
|
-
globalConfigCache = yaml.load(fs.readFileSync(configFilePath, 'utf-8'));
|
|
90
|
-
} else {
|
|
91
|
-
if (!fs.existsSync(configDir)) {
|
|
92
|
-
fs.mkdirSync(configDir, { recursive: true });
|
|
93
|
-
}
|
|
94
|
-
globalConfigCache = { hasRun: false };
|
|
95
|
-
fs.writeFileSync(configFilePath, yaml.dump(globalConfigCache));
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
return globalConfigCache;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
export function maybeRecordFirstRun(): boolean {
|
|
103
|
-
// Return true if first run
|
|
104
|
-
try {
|
|
105
|
-
const config = readGlobalConfig();
|
|
106
|
-
if (!config.hasRun) {
|
|
107
|
-
config.hasRun = true;
|
|
108
|
-
fs.writeFileSync(path.join(getConfigDirectoryPath(), 'promptfoo.yaml'), yaml.dump(config));
|
|
109
|
-
return true;
|
|
110
|
-
}
|
|
111
|
-
return false;
|
|
112
|
-
} catch (err) {
|
|
113
|
-
return false;
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
export async function maybeReadConfig(configPath: string): Promise<UnifiedConfig | undefined> {
|
|
118
|
-
if (!fs.existsSync(configPath)) {
|
|
119
|
-
return undefined;
|
|
120
|
-
}
|
|
121
|
-
return readConfig(configPath);
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
export async function readConfig(configPath: string): Promise<UnifiedConfig> {
|
|
125
|
-
const ext = path.parse(configPath).ext;
|
|
126
|
-
switch (ext) {
|
|
127
|
-
case '.json':
|
|
128
|
-
const content = fs.readFileSync(configPath, 'utf-8');
|
|
129
|
-
return JSON.parse(content) as UnifiedConfig;
|
|
130
|
-
case '.js':
|
|
131
|
-
return require(configPath) as UnifiedConfig;
|
|
132
|
-
case '.yaml':
|
|
133
|
-
case '.yml':
|
|
134
|
-
let ret = yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
|
|
135
|
-
ret = (await $RefParser.dereference(ret)) as UnifiedConfig;
|
|
136
|
-
return ret;
|
|
137
|
-
default:
|
|
138
|
-
throw new Error(`Unsupported configuration file format: ${ext}`);
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
|
|
142
|
-
enum PromptInputType {
|
|
143
|
-
STRING = 1,
|
|
144
|
-
ARRAY = 2,
|
|
145
|
-
NAMED = 3,
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
export function readPrompts(
|
|
149
|
-
promptPathOrGlobs: string | string[] | Record<string, string>,
|
|
150
|
-
basePath: string = '',
|
|
151
|
-
): Prompt[] {
|
|
152
|
-
let promptPaths: string[] = [];
|
|
153
|
-
let promptContents: Prompt[] = [];
|
|
154
|
-
|
|
155
|
-
let inputType: PromptInputType | undefined;
|
|
156
|
-
let resolvedPath: string | undefined;
|
|
157
|
-
const resolvedPathToDisplay = new Map<string, string>();
|
|
158
|
-
if (typeof promptPathOrGlobs === 'string') {
|
|
159
|
-
resolvedPath = path.resolve(basePath, promptPathOrGlobs);
|
|
160
|
-
promptPaths = [resolvedPath];
|
|
161
|
-
resolvedPathToDisplay.set(resolvedPath, promptPathOrGlobs);
|
|
162
|
-
inputType = PromptInputType.STRING;
|
|
163
|
-
} else if (Array.isArray(promptPathOrGlobs)) {
|
|
164
|
-
promptPaths = promptPathOrGlobs.flatMap((pathOrGlob) => {
|
|
165
|
-
resolvedPath = path.resolve(basePath, pathOrGlob);
|
|
166
|
-
resolvedPathToDisplay.set(resolvedPath, pathOrGlob);
|
|
167
|
-
return globSync(resolvedPath);
|
|
168
|
-
});
|
|
169
|
-
inputType = PromptInputType.ARRAY;
|
|
170
|
-
} else if (typeof promptPathOrGlobs === 'object') {
|
|
171
|
-
promptPaths = Object.keys(promptPathOrGlobs).map((key) => {
|
|
172
|
-
resolvedPath = path.resolve(basePath, key);
|
|
173
|
-
resolvedPathToDisplay.set(resolvedPath, promptPathOrGlobs[key]);
|
|
174
|
-
return resolvedPath;
|
|
175
|
-
});
|
|
176
|
-
inputType = PromptInputType.NAMED;
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
for (const promptPath of promptPaths) {
|
|
180
|
-
const stat = fs.statSync(promptPath);
|
|
181
|
-
if (stat.isDirectory()) {
|
|
182
|
-
// FIXME(ian): Make directory handling share logic with file handling.
|
|
183
|
-
const filesInDirectory = fs.readdirSync(promptPath);
|
|
184
|
-
const fileContents = filesInDirectory.map((fileName) => {
|
|
185
|
-
const joinedPath = path.join(promptPath, fileName);
|
|
186
|
-
resolvedPath = path.resolve(basePath, joinedPath);
|
|
187
|
-
resolvedPathToDisplay.set(resolvedPath, joinedPath);
|
|
188
|
-
return fs.readFileSync(resolvedPath, 'utf-8');
|
|
189
|
-
});
|
|
190
|
-
promptContents.push(...fileContents.map((content) => ({ raw: content, display: content })));
|
|
191
|
-
} else {
|
|
192
|
-
const fileContent = fs.readFileSync(promptPath, 'utf-8');
|
|
193
|
-
|
|
194
|
-
let display: string | undefined;
|
|
195
|
-
if (inputType === PromptInputType.NAMED) {
|
|
196
|
-
display = resolvedPathToDisplay.get(promptPath) || promptPath;
|
|
197
|
-
} else {
|
|
198
|
-
display = fileContent.length > 200 ? promptPath : fileContent;
|
|
199
|
-
|
|
200
|
-
const ext = path.parse(promptPath).ext;
|
|
201
|
-
if (ext === '.jsonl') {
|
|
202
|
-
// Special case for JSONL file
|
|
203
|
-
const jsonLines = fileContent.split(/\r?\n/).filter((line) => line.length > 0);
|
|
204
|
-
for (const json of jsonLines) {
|
|
205
|
-
promptContents.push({ raw: json, display: json });
|
|
206
|
-
}
|
|
207
|
-
continue;
|
|
208
|
-
}
|
|
209
|
-
}
|
|
210
|
-
promptContents.push({ raw: fileContent, display });
|
|
211
|
-
}
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
if (promptContents.length === 1 && inputType !== PromptInputType.NAMED) {
|
|
215
|
-
const content = promptContents[0].raw;
|
|
216
|
-
promptContents = content
|
|
217
|
-
.split(PROMPT_DELIMITER)
|
|
218
|
-
.map((p) => ({ raw: p.trim(), display: p.trim() }));
|
|
219
|
-
}
|
|
220
|
-
if (promptContents.length === 0) {
|
|
221
|
-
throw new Error(`There are no prompts in ${JSON.stringify(promptPathOrGlobs)}`);
|
|
222
|
-
}
|
|
223
|
-
return promptContents;
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
export async function fetchCsvFromGoogleSheet(url: string): Promise<string> {
|
|
227
|
-
const csvUrl = url.replace(/\/edit.*$/, '/export?format=csv');
|
|
228
|
-
const response = await fetch(csvUrl);
|
|
229
|
-
if (response.status !== 200) {
|
|
230
|
-
throw new Error(`Failed to fetch CSV from Google Sheets URL: ${url}`);
|
|
231
|
-
}
|
|
232
|
-
const csvData = await response.text();
|
|
233
|
-
return csvData;
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
export async function readVarsFiles(
|
|
237
|
-
pathOrGlobs: string | string[],
|
|
238
|
-
basePath: string = '',
|
|
239
|
-
): Promise<Record<string, string | string[] | object>> {
|
|
240
|
-
if (typeof pathOrGlobs === 'string') {
|
|
241
|
-
pathOrGlobs = [pathOrGlobs];
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
const ret: Record<string, string | string[] | object> = {};
|
|
245
|
-
for (const pathOrGlob of pathOrGlobs) {
|
|
246
|
-
const resolvedPath = path.resolve(basePath, pathOrGlob);
|
|
247
|
-
const paths = globSync(resolvedPath);
|
|
248
|
-
|
|
249
|
-
for (const p of paths) {
|
|
250
|
-
const yamlData = yaml.load(fs.readFileSync(p, 'utf-8'));
|
|
251
|
-
Object.assign(ret, yamlData);
|
|
252
|
-
}
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
return ret;
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
export async function readTestsFile(varsPath: string, basePath: string = ''): Promise<CsvRow[]> {
|
|
259
|
-
// This function is confusingly named - it reads a CSV, JSON, or YAML file of
|
|
260
|
-
// TESTS or test equivalents.
|
|
261
|
-
const resolvedVarsPath = path.resolve(basePath, varsPath);
|
|
262
|
-
const fileExtension = parsePath(varsPath).ext.slice(1);
|
|
263
|
-
let rows: CsvRow[] = [];
|
|
264
|
-
|
|
265
|
-
if (fileExtension === 'csv') {
|
|
266
|
-
if (varsPath.startsWith('https://docs.google.com/spreadsheets/')) {
|
|
267
|
-
const csvData = await fetchCsvFromGoogleSheet(varsPath);
|
|
268
|
-
rows = parseCsv(csvData, { columns: true });
|
|
269
|
-
} else {
|
|
270
|
-
rows = parseCsv(fs.readFileSync(resolvedVarsPath, 'utf-8'), { columns: true });
|
|
271
|
-
}
|
|
272
|
-
} else if (fileExtension === 'json') {
|
|
273
|
-
rows = parseJson(fs.readFileSync(resolvedVarsPath, 'utf-8'));
|
|
274
|
-
} else if (fileExtension === 'yaml' || fileExtension === 'yml') {
|
|
275
|
-
rows = yaml.load(fs.readFileSync(resolvedVarsPath, 'utf-8')) as unknown as any;
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
return rows;
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
export async function readTests(
|
|
282
|
-
tests: string | string[] | TestCase[] | undefined,
|
|
283
|
-
basePath: string = '',
|
|
284
|
-
): Promise<TestCase[]> {
|
|
285
|
-
const ret: TestCase[] = [];
|
|
286
|
-
|
|
287
|
-
const loadTestsFromGlob = async (loadTestsGlob: string) => {
|
|
288
|
-
const resolvedPath = path.resolve(basePath, loadTestsGlob);
|
|
289
|
-
const testFiles = globSync(resolvedPath);
|
|
290
|
-
for (const testFile of testFiles) {
|
|
291
|
-
const testFileContent = yaml.load(fs.readFileSync(testFile, 'utf-8')) as TestCase[];
|
|
292
|
-
for (const testCase of testFileContent) {
|
|
293
|
-
if (typeof testCase.vars === 'string' || Array.isArray(testCase.vars)) {
|
|
294
|
-
const testcaseBasePath = path.dirname(testFile);
|
|
295
|
-
testCase.vars = await readVarsFiles(testCase.vars, testcaseBasePath);
|
|
296
|
-
}
|
|
297
|
-
}
|
|
298
|
-
ret.push(...testFileContent);
|
|
299
|
-
}
|
|
300
|
-
};
|
|
301
|
-
|
|
302
|
-
if (typeof tests === 'string') {
|
|
303
|
-
if (tests.endsWith('yaml') || tests.endsWith('yml')) {
|
|
304
|
-
// Load testcase config from yaml
|
|
305
|
-
await loadTestsFromGlob(tests);
|
|
306
|
-
} else {
|
|
307
|
-
// Legacy load CSV
|
|
308
|
-
const vars = await readTestsFile(tests, basePath);
|
|
309
|
-
return vars.map((row, idx) => {
|
|
310
|
-
const test = testCaseFromCsvRow(row);
|
|
311
|
-
test.description = `Row #${idx + 1}`;
|
|
312
|
-
return test;
|
|
313
|
-
});
|
|
314
|
-
}
|
|
315
|
-
} else if (Array.isArray(tests)) {
|
|
316
|
-
for (const maybeTestsGlob of tests) {
|
|
317
|
-
if (typeof maybeTestsGlob === 'string') {
|
|
318
|
-
// Assume it's a filepath
|
|
319
|
-
await loadTestsFromGlob(maybeTestsGlob);
|
|
320
|
-
} else {
|
|
321
|
-
// Assume it's a full test case
|
|
322
|
-
ret.push(maybeTestsGlob);
|
|
323
|
-
}
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
// Some validation of the shape of tests
|
|
328
|
-
for (const test of ret) {
|
|
329
|
-
if (!test.assert && !test.vars) {
|
|
330
|
-
throw new Error(
|
|
331
|
-
`Test case must have either "assert" or "vars" property. Instead got ${JSON.stringify(
|
|
332
|
-
test,
|
|
333
|
-
null,
|
|
334
|
-
2,
|
|
335
|
-
)}`,
|
|
336
|
-
);
|
|
337
|
-
}
|
|
338
|
-
}
|
|
339
|
-
|
|
340
|
-
return ret;
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
export function writeOutput(
|
|
344
|
-
outputPath: string,
|
|
345
|
-
results: EvaluateSummary,
|
|
346
|
-
config: Partial<UnifiedConfig>,
|
|
347
|
-
shareableUrl: string | null,
|
|
348
|
-
): void {
|
|
349
|
-
const outputExtension = outputPath.split('.').pop()?.toLowerCase();
|
|
350
|
-
|
|
351
|
-
const outputToSimpleString = (output: EvaluateTableOutput) =>
|
|
352
|
-
`${output.pass ? '[PASS]' : '[FAIL]'} (${output.score.toFixed(2)}) ${output.text}`;
|
|
353
|
-
|
|
354
|
-
if (outputExtension === 'csv' || outputExtension === 'txt') {
|
|
355
|
-
const csvOutput = stringify([
|
|
356
|
-
[...results.table.head.prompts, ...results.table.head.vars],
|
|
357
|
-
...results.table.body.map((row) => [...row.outputs.map(outputToSimpleString), ...row.vars]),
|
|
358
|
-
]);
|
|
359
|
-
fs.writeFileSync(outputPath, csvOutput);
|
|
360
|
-
} else if (outputExtension === 'json') {
|
|
361
|
-
fs.writeFileSync(outputPath, JSON.stringify({ results, config, shareableUrl }, null, 2));
|
|
362
|
-
} else if (outputExtension === 'yaml' || outputExtension === 'yml') {
|
|
363
|
-
fs.writeFileSync(outputPath, yaml.dump({ results, config, shareableUrl }));
|
|
364
|
-
} else if (outputExtension === 'html') {
|
|
365
|
-
const template = fs.readFileSync(`${getDirectory()}/tableOutput.html`, 'utf-8');
|
|
366
|
-
const table = [
|
|
367
|
-
[...results.table.head.prompts, ...results.table.head.vars],
|
|
368
|
-
...results.table.body.map((row) => [...row.outputs.map(outputToSimpleString), ...row.vars]),
|
|
369
|
-
];
|
|
370
|
-
const htmlOutput = getNunjucksEngine().renderString(template, {
|
|
371
|
-
table,
|
|
372
|
-
results: results.results,
|
|
373
|
-
});
|
|
374
|
-
fs.writeFileSync(outputPath, htmlOutput);
|
|
375
|
-
} else {
|
|
376
|
-
throw new Error('Unsupported output file format. Use CSV, JSON, or YAML.');
|
|
377
|
-
}
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
export function fetchWithTimeout(
|
|
381
|
-
url: RequestInfo,
|
|
382
|
-
options: RequestInit = {},
|
|
383
|
-
timeout: number,
|
|
384
|
-
): Promise<Response> {
|
|
385
|
-
return new Promise((resolve, reject) => {
|
|
386
|
-
const controller = new AbortController();
|
|
387
|
-
const { signal } = controller;
|
|
388
|
-
options.signal = signal;
|
|
389
|
-
|
|
390
|
-
const timeoutId = setTimeout(() => {
|
|
391
|
-
controller.abort();
|
|
392
|
-
reject(new Error(`Request timed out after ${timeout} ms`));
|
|
393
|
-
}, timeout);
|
|
394
|
-
|
|
395
|
-
fetch(url, options)
|
|
396
|
-
.then((response) => {
|
|
397
|
-
clearTimeout(timeoutId);
|
|
398
|
-
resolve(response);
|
|
399
|
-
})
|
|
400
|
-
.catch((error) => {
|
|
401
|
-
clearTimeout(timeoutId);
|
|
402
|
-
reject(error);
|
|
403
|
-
});
|
|
404
|
-
});
|
|
405
|
-
}
|
|
406
|
-
|
|
407
|
-
export async function fetchWithRetries(
|
|
408
|
-
url: RequestInfo,
|
|
409
|
-
options: RequestInit = {},
|
|
410
|
-
timeout: number,
|
|
411
|
-
retries: number = 3,
|
|
412
|
-
): Promise<Response> {
|
|
413
|
-
let lastError;
|
|
414
|
-
for (let i = 0; i < retries; i++) {
|
|
415
|
-
try {
|
|
416
|
-
return await fetchWithTimeout(url, options, timeout);
|
|
417
|
-
} catch (error) {
|
|
418
|
-
lastError = error;
|
|
419
|
-
const waitTime = Math.pow(2, i) * 1000; // Exponential backoff
|
|
420
|
-
await new Promise((resolve) => setTimeout(resolve, waitTime));
|
|
421
|
-
}
|
|
422
|
-
}
|
|
423
|
-
throw new Error(`Request failed after ${retries} retries: ${(lastError as Error).message}`);
|
|
424
|
-
}
|
|
425
|
-
|
|
426
|
-
const RESULT_HISTORY_LENGTH = 50;
|
|
427
|
-
|
|
428
|
-
export function getConfigDirectoryPath(): string {
|
|
429
|
-
return path.join(os.homedir(), '.promptfoo');
|
|
430
|
-
}
|
|
431
|
-
|
|
432
|
-
export function getLatestResultsPath(): string {
|
|
433
|
-
return path.join(getConfigDirectoryPath(), 'output', 'latest.json');
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
export function writeLatestResults(results: EvaluateSummary, config: Partial<UnifiedConfig>) {
|
|
437
|
-
const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
|
|
438
|
-
|
|
439
|
-
// Replace hyphens with colons (Windows compatibility).
|
|
440
|
-
const timestamp = new Date().toISOString().replace(/:/g, '-');
|
|
441
|
-
|
|
442
|
-
const newResultsPath = path.join(resultsDirectory, `eval-${timestamp}.json`);
|
|
443
|
-
const latestResultsPath = getLatestResultsPath();
|
|
444
|
-
try {
|
|
445
|
-
fs.mkdirSync(resultsDirectory, { recursive: true });
|
|
446
|
-
fs.writeFileSync(
|
|
447
|
-
newResultsPath,
|
|
448
|
-
JSON.stringify(
|
|
449
|
-
{
|
|
450
|
-
version: 1,
|
|
451
|
-
config,
|
|
452
|
-
results,
|
|
453
|
-
},
|
|
454
|
-
null,
|
|
455
|
-
2,
|
|
456
|
-
),
|
|
457
|
-
);
|
|
458
|
-
|
|
459
|
-
try {
|
|
460
|
-
fs.unlinkSync(latestResultsPath);
|
|
461
|
-
} catch {}
|
|
462
|
-
fs.symlinkSync(newResultsPath, latestResultsPath);
|
|
463
|
-
|
|
464
|
-
cleanupOldResults();
|
|
465
|
-
} catch (err) {
|
|
466
|
-
logger.error(`Failed to write latest results to ${newResultsPath}:\n${err}`);
|
|
467
|
-
}
|
|
468
|
-
}
|
|
469
|
-
|
|
470
|
-
export function listPreviousResults(): string[] {
|
|
471
|
-
const directory = path.join(getConfigDirectoryPath(), 'output');
|
|
472
|
-
const files = fs.readdirSync(directory);
|
|
473
|
-
const resultsFiles = files.filter((file) => file.startsWith('eval-') && file.endsWith('.json'));
|
|
474
|
-
const sortedFiles = resultsFiles.sort((a, b) => {
|
|
475
|
-
const statA = fs.statSync(path.join(directory, a));
|
|
476
|
-
const statB = fs.statSync(path.join(directory, b));
|
|
477
|
-
return statA.birthtime.getTime() - statB.birthtime.getTime(); // sort in ascending order
|
|
478
|
-
});
|
|
479
|
-
return sortedFiles;
|
|
480
|
-
}
|
|
481
|
-
|
|
482
|
-
export function cleanupOldResults(remaining = RESULT_HISTORY_LENGTH) {
|
|
483
|
-
const sortedFiles = listPreviousResults();
|
|
484
|
-
for (let i = 0; i < sortedFiles.length - remaining; i++) {
|
|
485
|
-
fs.unlinkSync(path.join(getConfigDirectoryPath(), 'output', sortedFiles[i]));
|
|
486
|
-
}
|
|
487
|
-
}
|
|
488
|
-
|
|
489
|
-
export function readResult(
|
|
490
|
-
name: string,
|
|
491
|
-
): { results: EvaluateSummary; config: Partial<UnifiedConfig> } | undefined {
|
|
492
|
-
const resultsDirectory = path.join(getConfigDirectoryPath(), 'output');
|
|
493
|
-
const resultsPath = path.join(resultsDirectory, name);
|
|
494
|
-
try {
|
|
495
|
-
const results = JSON.parse(fs.readFileSync(fs.realpathSync(resultsPath), 'utf-8'));
|
|
496
|
-
return results;
|
|
497
|
-
} catch (err) {
|
|
498
|
-
logger.error(`Failed to read results from ${resultsPath}:\n${err}`);
|
|
499
|
-
}
|
|
500
|
-
}
|
|
501
|
-
|
|
502
|
-
export function readLatestResults():
|
|
503
|
-
| { results: EvaluateSummary; config: Partial<UnifiedConfig> }
|
|
504
|
-
| undefined {
|
|
505
|
-
return JSON.parse(fs.readFileSync(getLatestResultsPath(), 'utf-8'));
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
export function cosineSimilarity(vecA: number[], vecB: number[]) {
|
|
509
|
-
if (vecA.length !== vecB.length) {
|
|
510
|
-
throw new Error('Vectors must be of equal length');
|
|
511
|
-
}
|
|
512
|
-
const dotProduct = vecA.reduce((acc, val, idx) => acc + val * vecB[idx], 0);
|
|
513
|
-
const vecAMagnitude = Math.sqrt(vecA.reduce((acc, val) => acc + val * val, 0));
|
|
514
|
-
const vecBMagnitude = Math.sqrt(vecB.reduce((acc, val) => acc + val * val, 0));
|
|
515
|
-
return dotProduct / (vecAMagnitude * vecBMagnitude);
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
export function testCaseFromCsvRow(row: CsvRow): TestCase {
|
|
519
|
-
const vars: Record<string, string> = {};
|
|
520
|
-
const asserts: Assertion[] = [];
|
|
521
|
-
for (const [key, value] of Object.entries(row)) {
|
|
522
|
-
if (key === '__expected') {
|
|
523
|
-
if (value.trim() !== '') {
|
|
524
|
-
const { assertionFromString } = require('./assertions');
|
|
525
|
-
asserts.push(assertionFromString(value));
|
|
526
|
-
}
|
|
527
|
-
} else {
|
|
528
|
-
vars[key] = value;
|
|
529
|
-
}
|
|
530
|
-
}
|
|
531
|
-
|
|
532
|
-
return {
|
|
533
|
-
vars,
|
|
534
|
-
assert: asserts,
|
|
535
|
-
};
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
export function getNunjucksEngine() {
|
|
539
|
-
nunjucks.configure({
|
|
540
|
-
autoescape: false,
|
|
541
|
-
});
|
|
542
|
-
return nunjucks;
|
|
543
|
-
}
|
|
@@ -1,14 +0,0 @@
|
|
|
1
|
-
/** @type {import('next').NextConfig} */
|
|
2
|
-
const nextConfig = {
|
|
3
|
-
output: process.env.USING_VERCEL ? 'standalone' : 'export',
|
|
4
|
-
trailingSlash: true,
|
|
5
|
-
webpack: (config, { buildId, dev, isServer, defaultLoaders, webpack }) => {
|
|
6
|
-
config.externals.push({
|
|
7
|
-
'utf-8-validate': 'commonjs utf-8-validate',
|
|
8
|
-
bufferutil: 'commonjs bufferutil',
|
|
9
|
-
});
|
|
10
|
-
return config;
|
|
11
|
-
},
|
|
12
|
-
};
|
|
13
|
-
|
|
14
|
-
module.exports = nextConfig;
|