promptfoo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +19 -0
- package/README.md +353 -0
- package/dist/__mocks__/esm.d.ts +2 -0
- package/dist/__mocks__/esm.d.ts.map +1 -0
- package/dist/__mocks__/esm.js +4 -0
- package/dist/__mocks__/esm.js.map +1 -0
- package/dist/esm.d.ts +2 -0
- package/dist/esm.d.ts.map +1 -0
- package/dist/esm.js +9 -0
- package/dist/esm.js.map +1 -0
- package/dist/evaluator.d.ts +3 -0
- package/dist/evaluator.d.ts.map +1 -0
- package/dist/evaluator.js +162 -0
- package/dist/evaluator.js.map +1 -0
- package/dist/index.d.ts +7 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +29 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +11 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +38 -0
- package/dist/logger.js.map +1 -0
- package/dist/main.d.ts +3 -0
- package/dist/main.d.ts.map +1 -0
- package/dist/main.js +90 -0
- package/dist/main.js.map +1 -0
- package/dist/providers.d.ts +21 -0
- package/dist/providers.d.ts.map +1 -0
- package/dist/providers.js +145 -0
- package/dist/providers.js.map +1 -0
- package/dist/tableOutput.html +55 -0
- package/dist/types.d.ts +55 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/util.d.ts +6 -0
- package/dist/util.d.ts.map +1 -0
- package/dist/util.js +62 -0
- package/dist/util.js.map +1 -0
- package/package.json +55 -0
- package/src/__mocks__/esm.ts +3 -0
- package/src/esm.ts +10 -0
- package/src/evaluator.ts +203 -0
- package/src/index.ts +35 -0
- package/src/logger.ts +38 -0
- package/src/main.ts +108 -0
- package/src/providers.ts +170 -0
- package/src/tableOutput.html +55 -0
- package/src/types.ts +63 -0
- package/src/util.ts +67 -0
package/dist/logger.js
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import chalk from 'chalk';
|
|
2
|
+
import winston from 'winston';
|
|
3
|
+
const logLevels = {
|
|
4
|
+
error: 0,
|
|
5
|
+
warn: 1,
|
|
6
|
+
info: 2,
|
|
7
|
+
debug: 3,
|
|
8
|
+
};
|
|
9
|
+
const customFormatter = winston.format.printf(({ level, message, ...args }) => {
|
|
10
|
+
if (level === 'error') {
|
|
11
|
+
return chalk.red(message);
|
|
12
|
+
}
|
|
13
|
+
else if (level === 'warn') {
|
|
14
|
+
return chalk.yellow(message);
|
|
15
|
+
}
|
|
16
|
+
else if (level === 'info') {
|
|
17
|
+
return message;
|
|
18
|
+
}
|
|
19
|
+
else if (level === 'debug') {
|
|
20
|
+
return chalk.cyan(message);
|
|
21
|
+
}
|
|
22
|
+
throw new Error(`Invalid log level: ${level}`);
|
|
23
|
+
});
|
|
24
|
+
const logger = winston.createLogger({
|
|
25
|
+
levels: logLevels,
|
|
26
|
+
format: winston.format.combine(winston.format.simple(), customFormatter),
|
|
27
|
+
transports: [new winston.transports.Console()],
|
|
28
|
+
});
|
|
29
|
+
export function setLogLevel(level) {
|
|
30
|
+
if (logLevels.hasOwnProperty(level)) {
|
|
31
|
+
logger.transports[0].level = level;
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
throw new Error(`Invalid log level: ${level}`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
export default logger;
|
|
38
|
+
//# sourceMappingURL=logger.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"logger.js","sourceRoot":"","sources":["../src/logger.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,OAAO,MAAM,SAAS,CAAC;AAE9B,MAAM,SAAS,GAAG;IAChB,KAAK,EAAE,CAAC;IACR,IAAI,EAAE,CAAC;IACP,IAAI,EAAE,CAAC;IACP,KAAK,EAAE,CAAC;CACT,CAAC;AAEF,MAAM,eAAe,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,IAAI,EAAE,EAAE,EAAE;IAC5E,IAAI,KAAK,KAAK,OAAO,EAAE;QACrB,OAAO,KAAK,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;KAC3B;SAAM,IAAI,KAAK,KAAK,MAAM,EAAE;QAC3B,OAAO,KAAK,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;KAC9B;SAAM,IAAI,KAAK,KAAK,MAAM,EAAE;QAC3B,OAAO,OAAO,CAAC;KAChB;SAAM,IAAI,KAAK,KAAK,OAAO,EAAE;QAC5B,OAAO,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;KAC5B;IACD,MAAM,IAAI,KAAK,CAAC,sBAAsB,KAAK,EAAE,CAAC,CAAC;AACjD,CAAC,CAAC,CAAC;AAEH,MAAM,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC;IAClC,MAAM,EAAE,SAAS;IACjB,MAAM,EAAE,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,eAAe,CAAC;IACxE,UAAU,EAAE,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;CAC/C,CAAC,CAAC;AAEH,MAAM,UAAU,WAAW,CAAC,KAA6B;IACvD,IAAI,SAAS,CAAC,cAAc,CAAC,KAAK,CAAC,EAAE;QACnC,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC;KACpC;SAAM;QACL,MAAM,IAAI,KAAK,CAAC,sBAAsB,KAAK,EAAE,CAAC,CAAC;KAChD;AACH,CAAC;AAED,eAAe,MAAM,CAAC"}
|
package/dist/main.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"main.d.ts","sourceRoot":"","sources":["../src/main.ts"],"names":[],"mappings":""}
|
package/dist/main.js
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { readFileSync } from 'fs';
|
|
3
|
+
import { parse } from 'path';
|
|
4
|
+
import Table from 'cli-table3';
|
|
5
|
+
import chalk from 'chalk';
|
|
6
|
+
import { Command } from 'commander';
|
|
7
|
+
import logger, { setLogLevel } from './logger.js';
|
|
8
|
+
import { loadApiProvider } from './providers.js';
|
|
9
|
+
import { evaluate } from './evaluator.js';
|
|
10
|
+
import { readPrompts, readVars, writeOutput } from './util.js';
|
|
11
|
+
const program = new Command();
|
|
12
|
+
program
|
|
13
|
+
.command('eval')
|
|
14
|
+
.description('Evaluate prompts')
|
|
15
|
+
.requiredOption('-p, --prompt <paths...>', 'Paths to prompt files (.txt)')
|
|
16
|
+
.requiredOption('-r, --provider <name or path...>', 'One of: openai:chat, openai:completion, openai:<model name>, or path to custom API caller module')
|
|
17
|
+
.option('-o, --output <path>', 'Path to output file (csv, json, yaml, html)')
|
|
18
|
+
.option('-v, --vars <path>', 'Path to file with prompt variables (csv, json, yaml)')
|
|
19
|
+
.option('-c, --config <path>', 'Path to configuration file')
|
|
20
|
+
.option('-j, --max-concurrency <number>', 'Maximum number of concurrent API calls')
|
|
21
|
+
.option('--verbose', 'Show debug logs')
|
|
22
|
+
.action(async (cmdObj) => {
|
|
23
|
+
if (cmdObj.verbose) {
|
|
24
|
+
setLogLevel('debug');
|
|
25
|
+
}
|
|
26
|
+
const configPath = cmdObj.config;
|
|
27
|
+
let config = {};
|
|
28
|
+
if (configPath) {
|
|
29
|
+
const ext = parse(configPath).ext;
|
|
30
|
+
switch (ext) {
|
|
31
|
+
case '.json':
|
|
32
|
+
const content = readFileSync(configPath, 'utf-8');
|
|
33
|
+
config = JSON.parse(content);
|
|
34
|
+
break;
|
|
35
|
+
case '.js':
|
|
36
|
+
config = require(configPath);
|
|
37
|
+
break;
|
|
38
|
+
default:
|
|
39
|
+
throw new Error(`Unsupported configuration file format: ${ext}`);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
let vars = [];
|
|
43
|
+
if (cmdObj.vars) {
|
|
44
|
+
vars = readVars(cmdObj.vars);
|
|
45
|
+
}
|
|
46
|
+
const providers = cmdObj.provider.map((p) => loadApiProvider(p));
|
|
47
|
+
const options = {
|
|
48
|
+
prompts: readPrompts(cmdObj.prompt),
|
|
49
|
+
vars,
|
|
50
|
+
providers,
|
|
51
|
+
showProgressBar: true,
|
|
52
|
+
maxConcurrency: cmdObj.maxConcurrency && cmdObj.maxConcurrency > 0 ? cmdObj.maxConcurrency : undefined,
|
|
53
|
+
...config,
|
|
54
|
+
};
|
|
55
|
+
const summary = await evaluate(options);
|
|
56
|
+
if (cmdObj.output) {
|
|
57
|
+
logger.info(chalk.yellow(`Writing output to ${cmdObj.output}`));
|
|
58
|
+
writeOutput(cmdObj.output, summary);
|
|
59
|
+
}
|
|
60
|
+
else {
|
|
61
|
+
// Output table by default
|
|
62
|
+
const maxWidth = process.stdout.columns ? process.stdout.columns - 10 : 120;
|
|
63
|
+
const head = summary.table[0];
|
|
64
|
+
const table = new Table({
|
|
65
|
+
head,
|
|
66
|
+
colWidths: Array(head.length).fill(Math.floor(maxWidth / head.length)),
|
|
67
|
+
wordWrap: true,
|
|
68
|
+
wrapOnWordBoundary: true,
|
|
69
|
+
style: {
|
|
70
|
+
head: ['blue', 'bold'],
|
|
71
|
+
},
|
|
72
|
+
});
|
|
73
|
+
// Skip first row (header) and add the rest. Color the first column green if it's a success, red if it's a failure.
|
|
74
|
+
for (const row of summary.table.slice(1)) {
|
|
75
|
+
const color = row[0] === 'PASS' ? 'green' : row[0].startsWith('FAIL') ? 'red' : undefined;
|
|
76
|
+
table.push(row.map((col, i) => (i === 0 && color ? chalk[color](col) : col)));
|
|
77
|
+
}
|
|
78
|
+
logger.info('\n' + table.toString());
|
|
79
|
+
}
|
|
80
|
+
logger.info('Evaluation complete');
|
|
81
|
+
logger.info(chalk.green.bold(`Successes: ${summary.stats.successes}`));
|
|
82
|
+
logger.info(chalk.red.bold(`Failures: ${summary.stats.failures}`));
|
|
83
|
+
logger.info(`Token usage: Total ${summary.stats.tokenUsage.total} Prompt ${summary.stats.tokenUsage.prompt} Completion ${summary.stats.tokenUsage.completion}`);
|
|
84
|
+
logger.info('Done.');
|
|
85
|
+
});
|
|
86
|
+
program.parse(process.argv);
|
|
87
|
+
if (!process.argv.slice(2).length) {
|
|
88
|
+
program.outputHelp();
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=main.js.map
|
package/dist/main.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"main.js","sourceRoot":"","sources":["../src/main.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,KAAK,EAAE,MAAM,MAAM,CAAC;AAE7B,OAAO,KAAK,MAAM,YAAY,CAAC;AAC/B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAEpC,OAAO,MAAM,EAAE,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAClD,OAAO,EAAE,eAAe,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,EAAE,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AAC1C,OAAO,EAAE,WAAW,EAAE,QAAQ,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAI/D,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;AAE9B,OAAO;KACJ,OAAO,CAAC,MAAM,CAAC;KACf,WAAW,CAAC,kBAAkB,CAAC;KAC/B,cAAc,CAAC,yBAAyB,EAAE,8BAA8B,CAAC;KACzE,cAAc,CACb,kCAAkC,EAClC,kGAAkG,CACnG;KACA,MAAM,CAAC,qBAAqB,EAAE,6CAA6C,CAAC;KAC5E,MAAM,CAAC,mBAAmB,EAAE,sDAAsD,CAAC;KACnF,MAAM,CAAC,qBAAqB,EAAE,4BAA4B,CAAC;KAC3D,MAAM,CAAC,gCAAgC,EAAE,wCAAwC,CAAC;KAClF,MAAM,CAAC,WAAW,EAAE,iBAAiB,CAAC;KACtC,MAAM,CAAC,KAAK,EAAE,MAAoC,EAAE,EAAE;IACrD,IAAI,MAAM,CAAC,OAAO,EAAE;QAClB,WAAW,CAAC,OAAO,CAAC,CAAC;KACtB;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC;IACjC,IAAI,MAAM,GAAG,EAAE,CAAC;IAChB,IAAI,UAAU,EAAE;QACd,MAAM,GAAG,GAAG,KAAK,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC;QAClC,QAAQ,GAAG,EAAE;YACX,KAAK,OAAO;gBACV,MAAM,OAAO,GAAG,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC;gBAClD,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;gBAC7B,MAAM;YACR,KAAK,KAAK;gBACR,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAAC;gBAC7B,MAAM;YACR;gBACE,MAAM,IAAI,KAAK,CAAC,0CAA0C,GAAG,EAAE,CAAC,CAAC;SACpE;KACF;IAED,IAAI,IAAI,GAAiB,EAAE,CAAC;IAC5B,IAAI,MAAM,CAAC,IAAI,EAAE;QACf,IAAI,GAAG,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;KAC9B;IAED,MAAM,SAAS,GAAG,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC,CAAC;IACjE,MAAM,OAAO,GAAoB;QAC/B,OAAO,EAAE,WAAW,CAAC,MAAM,CAAC,MAAM,CAAC;QACnC,IAAI;QACJ,SAAS;QACT,eAAe,EAAE,IAAI;QACrB,cAAc,EACZ,MAAM,CAAC,cAAc,IAAI,MAAM,CAAC,cAAc,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC,SAAS;QACxF,GAAG,MAAM;KACV,CAAC;IAEF,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,CAAC;IAExC,IAAI,MAAM,CAAC,MAAM,EAAE;QACjB,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,qBAAqB,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QAChE,WAAW,CAAC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KACrC;SAAM;QACL,0BAA0B;QAC1B,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;QAC5E,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,KAAK,GAAG,IAAI,KAAK,CAAC;YACtB,IAAI;YACJ,SAAS,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC;YACtE,QAAQ,EAAE,IAAI;YACd,kBAAkB,EAAE,IAAI;YACxB,KAAK,EAAE;gBACL,IAAI,EAAE,CAAC,MAAM,EAAE,MAAM,CAAC;aACvB;SACF,CAAC,CAAC;QACH,mHAAmH;QACnH,KAAK,MAAM,GAAG,IAAI,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;YACxC,MAAM,KAAK,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;YAC1F,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;SAC/E;QAED,MAAM,CAAC,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC;KACtC;IACD,MAAM,CAAC,IAAI,CAAC,qBAAqB,CAAC,CAAC;IACnC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,cAAc,OAAO,CAAC,KAAK,CAAC,SAAS,EAAE,CAAC,CAAC,CAAC;IACvE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,aAAa,OAAO,CAAC,KAAK,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;IACnE,MAAM,CAAC,IAAI,CACT,sBAAsB,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,WAAW,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,MAAM,eAAe,OAAO,CAAC,KAAK,CAAC,UAAU,CAAC,UAAU,EAAE,CACnJ,CAAC;IACF,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;AACvB,CAAC,CAAC,CAAC;AAEL,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;AAE5B,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE;IACjC,OAAO,CAAC,UAAU,EAAE,CAAC;CACtB"}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { ApiProvider, ProviderResponse } from './types.js';
|
|
2
|
+
export declare class OpenAiGenericProvider implements ApiProvider {
|
|
3
|
+
modelName: string;
|
|
4
|
+
apiKey: string;
|
|
5
|
+
constructor(modelName: string, apiKey?: string);
|
|
6
|
+
id(): string;
|
|
7
|
+
toString(): string;
|
|
8
|
+
callApi(prompt: string): Promise<ProviderResponse>;
|
|
9
|
+
}
|
|
10
|
+
export declare class OpenAiCompletionProvider extends OpenAiGenericProvider {
|
|
11
|
+
static OPENAI_COMPLETION_MODELS: string[];
|
|
12
|
+
constructor(modelName: string, apiKey?: string);
|
|
13
|
+
callApi(prompt: string): Promise<ProviderResponse>;
|
|
14
|
+
}
|
|
15
|
+
export declare class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
|
|
16
|
+
static OPENAI_CHAT_MODELS: string[];
|
|
17
|
+
constructor(modelName: string, apiKey?: string);
|
|
18
|
+
callApi(prompt: string): Promise<ProviderResponse>;
|
|
19
|
+
}
|
|
20
|
+
export declare function loadApiProvider(providerPath: string): ApiProvider;
|
|
21
|
+
//# sourceMappingURL=providers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"providers.d.ts","sourceRoot":"","sources":["../src/providers.ts"],"names":[],"mappings":"AAEA,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAG3D,qBAAa,qBAAsB,YAAW,WAAW;IACvD,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,EAAE,MAAM,CAAC;gBAEH,SAAS,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM;IAY9C,EAAE,IAAI,MAAM;IAIZ,QAAQ,IAAI,MAAM;IAKZ,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;CAGzD;AAED,qBAAa,wBAAyB,SAAQ,qBAAqB;IACjE,MAAM,CAAC,wBAAwB,WAM7B;gBAEU,SAAS,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM;IAWxC,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;CA4BzD;AAED,qBAAa,4BAA6B,SAAQ,qBAAqB;IACrE,MAAM,CAAC,kBAAkB,WAOvB;gBAEU,SAAS,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,MAAM;IAWxC,OAAO,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,CAAC;CAoCzD;AAED,wBAAgB,eAAe,CAAC,YAAY,EAAE,MAAM,GAAG,WAAW,CAyBjE"}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import fetch from 'node-fetch';
|
|
2
|
+
import logger from './logger.js';
|
|
3
|
+
export class OpenAiGenericProvider {
|
|
4
|
+
constructor(modelName, apiKey) {
|
|
5
|
+
this.modelName = modelName;
|
|
6
|
+
const key = apiKey || process.env.OPENAI_API_KEY;
|
|
7
|
+
if (!key) {
|
|
8
|
+
throw new Error('OpenAI API key is not set. Set OPENAI_API_KEY environment variable or pass it as an argument to the constructor.');
|
|
9
|
+
}
|
|
10
|
+
this.apiKey = key;
|
|
11
|
+
}
|
|
12
|
+
id() {
|
|
13
|
+
return `openai:${this.modelName}`;
|
|
14
|
+
}
|
|
15
|
+
toString() {
|
|
16
|
+
return `[OpenAI Provider ${this.modelName}]`;
|
|
17
|
+
}
|
|
18
|
+
// @ts-ignore: Prompt is not used in this implementation
|
|
19
|
+
async callApi(prompt) {
|
|
20
|
+
throw new Error('Not implemented');
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
class OpenAiCompletionProvider extends OpenAiGenericProvider {
|
|
24
|
+
constructor(modelName, apiKey) {
|
|
25
|
+
if (!OpenAiCompletionProvider.OPENAI_COMPLETION_MODELS.includes(modelName)) {
|
|
26
|
+
throw new Error(`Unknown OpenAI completion model name: ${modelName}. Use one of the following: ${OpenAiCompletionProvider.OPENAI_COMPLETION_MODELS.join(', ')}`);
|
|
27
|
+
}
|
|
28
|
+
super(modelName, apiKey);
|
|
29
|
+
}
|
|
30
|
+
async callApi(prompt) {
|
|
31
|
+
const body = {
|
|
32
|
+
model: this.modelName,
|
|
33
|
+
prompt,
|
|
34
|
+
max_tokens: process.env.OPENAI_MAX_TOKENS || 1024,
|
|
35
|
+
temperature: process.env.OPENAI_TEMPERATURE || 0,
|
|
36
|
+
};
|
|
37
|
+
logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
|
|
38
|
+
const response = await fetch('https://api.openai.com/v1/completions', {
|
|
39
|
+
method: 'POST',
|
|
40
|
+
headers: {
|
|
41
|
+
'Content-Type': 'application/json',
|
|
42
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
43
|
+
},
|
|
44
|
+
body: JSON.stringify(body),
|
|
45
|
+
});
|
|
46
|
+
const data = (await response.json());
|
|
47
|
+
logger.debug(`\tOpenAI API response: ${JSON.stringify(data)}`);
|
|
48
|
+
return {
|
|
49
|
+
output: data.choices[0].text,
|
|
50
|
+
tokenUsage: {
|
|
51
|
+
total: data.usage.total_tokens,
|
|
52
|
+
prompt: data.usage.prompt_tokens,
|
|
53
|
+
completion: data.usage.completion_tokens,
|
|
54
|
+
},
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
OpenAiCompletionProvider.OPENAI_COMPLETION_MODELS = [
|
|
59
|
+
'text-davinci-003',
|
|
60
|
+
'text-davinci-002',
|
|
61
|
+
'text-curie-001',
|
|
62
|
+
'text-babbage-001',
|
|
63
|
+
'text-ada-001',
|
|
64
|
+
];
|
|
65
|
+
export { OpenAiCompletionProvider };
|
|
66
|
+
class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
|
|
67
|
+
constructor(modelName, apiKey) {
|
|
68
|
+
if (!OpenAiChatCompletionProvider.OPENAI_CHAT_MODELS.includes(modelName)) {
|
|
69
|
+
throw new Error(`Unknown OpenAI completion model name: ${modelName}. Use one of the following: ${OpenAiCompletionProvider.OPENAI_COMPLETION_MODELS.join(', ')}`);
|
|
70
|
+
}
|
|
71
|
+
super(modelName, apiKey);
|
|
72
|
+
}
|
|
73
|
+
async callApi(prompt) {
|
|
74
|
+
let messages;
|
|
75
|
+
try {
|
|
76
|
+
// User can specify `messages` payload as JSON, or we'll just put the
|
|
77
|
+
// string prompt into a `messages` array.
|
|
78
|
+
messages = JSON.parse(prompt);
|
|
79
|
+
}
|
|
80
|
+
catch (e) {
|
|
81
|
+
messages = [{ role: 'user', content: prompt }];
|
|
82
|
+
}
|
|
83
|
+
const body = {
|
|
84
|
+
model: this.modelName,
|
|
85
|
+
messages: messages,
|
|
86
|
+
max_tokens: process.env.OPENAI_MAX_TOKENS || 1024,
|
|
87
|
+
temperature: process.env.OPENAI_MAX_TEMPERATURE || 0,
|
|
88
|
+
};
|
|
89
|
+
logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
|
|
90
|
+
const response = await fetch('https://api.openai.com/v1/chat/completions', {
|
|
91
|
+
method: 'POST',
|
|
92
|
+
headers: {
|
|
93
|
+
'Content-Type': 'application/json',
|
|
94
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
95
|
+
},
|
|
96
|
+
body: JSON.stringify(body),
|
|
97
|
+
});
|
|
98
|
+
const data = (await response.json());
|
|
99
|
+
logger.debug(`\tOpenAI API response: ${JSON.stringify(data)}`);
|
|
100
|
+
return {
|
|
101
|
+
output: data.choices[0].message.content,
|
|
102
|
+
tokenUsage: {
|
|
103
|
+
total: data.usage.total_tokens,
|
|
104
|
+
prompt: data.usage.prompt_tokens,
|
|
105
|
+
completion: data.usage.completion_tokens,
|
|
106
|
+
},
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
OpenAiChatCompletionProvider.OPENAI_CHAT_MODELS = [
|
|
111
|
+
'gpt-4',
|
|
112
|
+
'gpt-4-0314',
|
|
113
|
+
'gpt-4-32k',
|
|
114
|
+
'gpt-4-32k-0314',
|
|
115
|
+
'gpt-3.5-turbo',
|
|
116
|
+
'gpt-3.5-turbo-0301',
|
|
117
|
+
];
|
|
118
|
+
export { OpenAiChatCompletionProvider };
|
|
119
|
+
export function loadApiProvider(providerPath) {
|
|
120
|
+
if (providerPath?.startsWith('openai:')) {
|
|
121
|
+
// Load OpenAI module
|
|
122
|
+
const options = providerPath.split(':');
|
|
123
|
+
const modelType = options[1];
|
|
124
|
+
const modelName = options[2];
|
|
125
|
+
if (modelType === 'chat') {
|
|
126
|
+
return new OpenAiChatCompletionProvider(modelName || 'gpt-3.5-turbo');
|
|
127
|
+
}
|
|
128
|
+
else if (modelType === 'completion') {
|
|
129
|
+
return new OpenAiCompletionProvider(modelName || 'text-davinci-003');
|
|
130
|
+
}
|
|
131
|
+
else if (OpenAiChatCompletionProvider.OPENAI_CHAT_MODELS.includes(modelType)) {
|
|
132
|
+
return new OpenAiChatCompletionProvider(modelType);
|
|
133
|
+
}
|
|
134
|
+
else if (OpenAiCompletionProvider.OPENAI_COMPLETION_MODELS.includes(modelType)) {
|
|
135
|
+
return new OpenAiCompletionProvider(modelType);
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
throw new Error(`Unknown OpenAI model type: ${modelType}. Use one of the following providers: openai:chat:<model name>, openai:completion:<model name>`);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// Load custom module
|
|
142
|
+
const CustomApiProvider = require(providerPath).default;
|
|
143
|
+
return new CustomApiProvider();
|
|
144
|
+
}
|
|
145
|
+
//# sourceMappingURL=providers.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"providers.js","sourceRoot":"","sources":["../src/providers.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,YAAY,CAAC;AAG/B,OAAO,MAAM,MAAM,aAAa,CAAC;AAEjC,MAAM,OAAO,qBAAqB;IAIhC,YAAY,SAAiB,EAAE,MAAe;QAC5C,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;QAE3B,MAAM,GAAG,GAAG,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;QACjD,IAAI,CAAC,GAAG,EAAE;YACR,MAAM,IAAI,KAAK,CACb,kHAAkH,CACnH,CAAC;SACH;QACD,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC;IACpB,CAAC;IAED,EAAE;QACA,OAAO,UAAU,IAAI,CAAC,SAAS,EAAE,CAAC;IACpC,CAAC;IAED,QAAQ;QACN,OAAO,oBAAoB,IAAI,CAAC,SAAS,GAAG,CAAC;IAC/C,CAAC;IAED,wDAAwD;IACxD,KAAK,CAAC,OAAO,CAAC,MAAc;QAC1B,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;IACrC,CAAC;CACF;AAED,MAAa,wBAAyB,SAAQ,qBAAqB;IASjE,YAAY,SAAiB,EAAE,MAAe;QAC5C,IAAI,CAAC,wBAAwB,CAAC,wBAAwB,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE;YAC1E,MAAM,IAAI,KAAK,CACb,yCAAyC,SAAS,+BAA+B,wBAAwB,CAAC,wBAAwB,CAAC,IAAI,CACrI,IAAI,CACL,EAAE,CACJ,CAAC;SACH;QACD,KAAK,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,MAAc;QAC1B,MAAM,IAAI,GAAG;YACX,KAAK,EAAE,IAAI,CAAC,SAAS;YACrB,MAAM;YACN,UAAU,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,IAAI;YACjD,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,kBAAkB,IAAI,CAAC;SACjD,CAAC;QACF,MAAM,CAAC,KAAK,CAAC,uBAAuB,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC5D,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,uCAAuC,EAAE;YACpE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;aACvC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAmB,CAAC;QACvD,MAAM,CAAC,KAAK,CAAC,0BAA0B,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC/D,OAAO;YACL,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,IAAI;YAC5B,UAAU,EAAE;gBACV,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,YAAY;gBAC9B,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa;gBAChC,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,iBAAiB;aACzC;SACF,CAAC;IACJ,CAAC;;AA9CM,iDAAwB,GAAG;IAChC,kBAAkB;IAClB,kBAAkB;IAClB,gBAAgB;IAChB,kBAAkB;IAClB,cAAc;CACf,CAAC;SAPS,wBAAwB;AAkDrC,MAAa,4BAA6B,SAAQ,qBAAqB;IAUrE,YAAY,SAAiB,EAAE,MAAe;QAC5C,IAAI,CAAC,4BAA4B,CAAC,kBAAkB,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE;YACxE,MAAM,IAAI,KAAK,CACb,yCAAyC,SAAS,+BAA+B,wBAAwB,CAAC,wBAAwB,CAAC,IAAI,CACrI,IAAI,CACL,EAAE,CACJ,CAAC;SACH;QACD,KAAK,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;IAC3B,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,MAAc;QAC1B,IAAI,QAA6C,CAAC;QAClD,IAAI;YACF,qEAAqE;YACrE,yCAAyC;YACzC,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;SAC/B;QAAC,OAAO,CAAC,EAAE;YACV,QAAQ,GAAG,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;SAChD;QACD,MAAM,IAAI,GAAG;YACX,KAAK,EAAE,IAAI,CAAC,SAAS;YACrB,QAAQ,EAAE,QAAQ;YAClB,UAAU,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,IAAI;YACjD,WAAW,EAAE,OAAO,CAAC,GAAG,CAAC,sBAAsB,IAAI,CAAC;SACrD,CAAC;QACF,MAAM,CAAC,KAAK,CAAC,uBAAuB,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC5D,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,4CAA4C,EAAE;YACzE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,IAAI,CAAC,MAAM,EAAE;aACvC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAmB,CAAC;QACvD,MAAM,CAAC,KAAK,CAAC,0BAA0B,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC/D,OAAO;YACL,MAAM,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO;YACvC,UAAU,EAAE;gBACV,KAAK,EAAE,IAAI,CAAC,KAAK,CAAC,YAAY;gBAC9B,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa;gBAChC,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,iBAAiB;aACzC;SACF,CAAC;IACJ,CAAC;;AAvDM,+CAAkB,GAAG;IAC1B,OAAO;IACP,YAAY;IACZ,WAAW;IACX,gBAAgB;IAChB,eAAe;IACf,oBAAoB;CACrB,CAAC;SARS,4BAA4B;AA2DzC,MAAM,UAAU,eAAe,CAAC,YAAoB;IAClD,IAAI,YAAY,EAAE,UAAU,CAAC,SAAS,CAAC,EAAE;QACvC,qBAAqB;QACrB,MAAM,OAAO,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACxC,MAAM,SAAS,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QAC7B,MAAM,SAAS,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QAE7B,IAAI,SAAS,KAAK,MAAM,EAAE;YACxB,OAAO,IAAI,4BAA4B,CAAC,SAAS,IAAI,eAAe,CAAC,CAAC;SACvE;aAAM,IAAI,SAAS,KAAK,YAAY,EAAE;YACrC,OAAO,IAAI,wBAAwB,CAAC,SAAS,IAAI,kBAAkB,CAAC,CAAC;SACtE;aAAM,IAAI,4BAA4B,CAAC,kBAAkB,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE;YAC9E,OAAO,IAAI,4BAA4B,CAAC,SAAS,CAAC,CAAC;SACpD;aAAM,IAAI,wBAAwB,CAAC,wBAAwB,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE;YAChF,OAAO,IAAI,wBAAwB,CAAC,SAAS,CAAC,CAAC;SAChD;aAAM;YACL,MAAM,IAAI,KAAK,CACb,8BAA8B,SAAS,gGAAgG,CACxI,CAAC;SACH;KACF;IAED,qBAAqB;IACrB,MAAM,iBAAiB,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC,OAAO,CAAC;IACxD,OAAO,IAAI,iBAAiB,EAAE,CAAC;AACjC,CAAC"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html>
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="utf-8" />
|
|
5
|
+
<meta name="viewport" content="width=device-width" />
|
|
6
|
+
<title>Table Output</title>
|
|
7
|
+
<style>
|
|
8
|
+
body {
|
|
9
|
+
font-family: -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Helvetica, Arial,
|
|
10
|
+
sans-serif;
|
|
11
|
+
}
|
|
12
|
+
table,
|
|
13
|
+
th,
|
|
14
|
+
td {
|
|
15
|
+
border: 1px solid black;
|
|
16
|
+
border-collapse: collapse;
|
|
17
|
+
text-align: left;
|
|
18
|
+
word-break: break-all;
|
|
19
|
+
}
|
|
20
|
+
th,
|
|
21
|
+
td {
|
|
22
|
+
padding: 5px;
|
|
23
|
+
}
|
|
24
|
+
/* If data-content is exactly "PASS", set font color to green */
|
|
25
|
+
tr > td[data-content="PASS"]:first-child {
|
|
26
|
+
color: green;
|
|
27
|
+
font-weight: bold;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/* If data-content starts with "FAIL", set font color to red */
|
|
31
|
+
tr > td[data-content^="FAIL"]:first-child {
|
|
32
|
+
color: red;
|
|
33
|
+
font-weight: bold;
|
|
34
|
+
}
|
|
35
|
+
</style>
|
|
36
|
+
</head>
|
|
37
|
+
<body>
|
|
38
|
+
<table>
|
|
39
|
+
<thead>
|
|
40
|
+
{% for header in table[0] %}
|
|
41
|
+
<th>{{ header }}</th>
|
|
42
|
+
{% endfor %}
|
|
43
|
+
</thead>
|
|
44
|
+
<tbody>
|
|
45
|
+
{% for row in table.slice(1) %}
|
|
46
|
+
<tr>
|
|
47
|
+
{% for cell in row %}
|
|
48
|
+
<td data-content="{{cell}}">{{ cell }}</td>
|
|
49
|
+
{% endfor %}
|
|
50
|
+
</tr>
|
|
51
|
+
{% endfor %}
|
|
52
|
+
</tbody>
|
|
53
|
+
</table>
|
|
54
|
+
</body>
|
|
55
|
+
</html>
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
export interface CommandLineOptions {
|
|
2
|
+
prompt: string[];
|
|
3
|
+
provider: string[];
|
|
4
|
+
output?: string;
|
|
5
|
+
vars?: string;
|
|
6
|
+
config?: string;
|
|
7
|
+
verbose?: boolean;
|
|
8
|
+
maxConcurrency?: number;
|
|
9
|
+
}
|
|
10
|
+
export interface ApiProvider {
|
|
11
|
+
id: () => string;
|
|
12
|
+
callApi: (prompt: string) => Promise<ProviderResponse>;
|
|
13
|
+
}
|
|
14
|
+
interface TokenUsage {
|
|
15
|
+
total: number;
|
|
16
|
+
prompt: number;
|
|
17
|
+
completion: number;
|
|
18
|
+
}
|
|
19
|
+
export interface ProviderResponse {
|
|
20
|
+
output: string;
|
|
21
|
+
tokenUsage?: TokenUsage;
|
|
22
|
+
}
|
|
23
|
+
export interface CsvRow {
|
|
24
|
+
[key: string]: string;
|
|
25
|
+
}
|
|
26
|
+
export type VarMapping = Record<string, string>;
|
|
27
|
+
export interface EvaluateOptions {
|
|
28
|
+
providers: ApiProvider[];
|
|
29
|
+
prompts: string[];
|
|
30
|
+
vars?: VarMapping[];
|
|
31
|
+
maxConcurrency?: number;
|
|
32
|
+
showProgressBar?: boolean;
|
|
33
|
+
}
|
|
34
|
+
export interface Prompt {
|
|
35
|
+
raw: string;
|
|
36
|
+
display: string;
|
|
37
|
+
}
|
|
38
|
+
export interface EvaluateResult {
|
|
39
|
+
prompt: Prompt;
|
|
40
|
+
vars: Record<string, string>;
|
|
41
|
+
response?: ProviderResponse;
|
|
42
|
+
error?: string;
|
|
43
|
+
success: boolean;
|
|
44
|
+
}
|
|
45
|
+
export interface EvaluateSummary {
|
|
46
|
+
results: EvaluateResult[];
|
|
47
|
+
table: string[][];
|
|
48
|
+
stats: {
|
|
49
|
+
successes: number;
|
|
50
|
+
failures: number;
|
|
51
|
+
tokenUsage: TokenUsage;
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
export {};
|
|
55
|
+
//# sourceMappingURL=types.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,kBAAkB;IACjC,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,MAAM,MAAM,CAAC;IACjB,OAAO,EAAE,CAAC,MAAM,EAAE,MAAM,KAAK,OAAO,CAAC,gBAAgB,CAAC,CAAC;CACxD;AAED,UAAU,UAAU;IAClB,KAAK,EAAE,MAAM,CAAC;IACd,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,MAAM,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,UAAU,CAAC;CACzB;AAED,MAAM,WAAW,MAAM;IACrB,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAAC;CACvB;AAED,MAAM,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;AAEhD,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,WAAW,EAAE,CAAC;IACzB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,IAAI,CAAC,EAAE,UAAU,EAAE,CAAC;IAEpB,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC3B;AAED,MAAM,WAAW,MAAM;IACrB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAC7B,QAAQ,CAAC,EAAE,gBAAgB,CAAC;IAC5B,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,OAAO,CAAC;CAClB;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,cAAc,EAAE,CAAC;IAC1B,KAAK,EAAE,MAAM,EAAE,EAAE,CAAC;IAClB,KAAK,EAAE;QACL,SAAS,EAAE,MAAM,CAAC;QAClB,QAAQ,EAAE,MAAM,CAAC;QACjB,UAAU,EAAE,UAAU,CAAC;KACxB,CAAC;CACH"}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":""}
|
package/dist/util.d.ts
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { CsvRow } from './types.js';
|
|
2
|
+
import type { EvaluateSummary } from './types.js';
|
|
3
|
+
export declare function readPrompts(promptPaths: string[]): string[];
|
|
4
|
+
export declare function readVars(varsPath: string): CsvRow[];
|
|
5
|
+
export declare function writeOutput(outputPath: string, summary: EvaluateSummary): void;
|
|
6
|
+
//# sourceMappingURL=util.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"util.d.ts","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAMpC,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAYlD,wBAAgB,WAAW,CAAC,WAAW,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAM3D;AAED,wBAAgB,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,EAAE,CAanD;AAED,wBAAgB,WAAW,CAAC,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,GAAG,IAAI,CAoB9E"}
|
package/dist/util.js
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import * as fs from 'fs';
|
|
2
|
+
import yaml from 'js-yaml';
|
|
3
|
+
import nunjucks from 'nunjucks';
|
|
4
|
+
import { parse as parsePath } from 'path';
|
|
5
|
+
import { parse as parseCsv } from 'csv-parse/sync';
|
|
6
|
+
import { stringify } from 'csv-stringify/sync';
|
|
7
|
+
import { getDirectory } from './esm.js';
|
|
8
|
+
const PROMPT_DELIMITER = '---';
|
|
9
|
+
function parseJson(json) {
|
|
10
|
+
try {
|
|
11
|
+
return JSON.parse(json);
|
|
12
|
+
}
|
|
13
|
+
catch (err) {
|
|
14
|
+
return undefined;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
export function readPrompts(promptPaths) {
|
|
18
|
+
let promptContents = promptPaths.map((path) => fs.readFileSync(path, 'utf-8'));
|
|
19
|
+
if (promptContents.length === 1) {
|
|
20
|
+
promptContents = promptContents[0].split(PROMPT_DELIMITER).map((p) => p.trim());
|
|
21
|
+
}
|
|
22
|
+
return promptContents;
|
|
23
|
+
}
|
|
24
|
+
export function readVars(varsPath) {
|
|
25
|
+
const fileExtension = parsePath(varsPath).ext.slice(1);
|
|
26
|
+
let rows = [];
|
|
27
|
+
if (fileExtension === 'csv') {
|
|
28
|
+
rows = parseCsv(fs.readFileSync(varsPath, 'utf-8'), { columns: true });
|
|
29
|
+
}
|
|
30
|
+
else if (fileExtension === 'json') {
|
|
31
|
+
rows = parseJson(fs.readFileSync(varsPath, 'utf-8'));
|
|
32
|
+
}
|
|
33
|
+
else if (fileExtension === 'yaml' || fileExtension === 'yml') {
|
|
34
|
+
rows = yaml.load(fs.readFileSync(varsPath, 'utf-8'));
|
|
35
|
+
}
|
|
36
|
+
return rows;
|
|
37
|
+
}
|
|
38
|
+
export function writeOutput(outputPath, summary) {
|
|
39
|
+
const outputExtension = outputPath.split('.').pop()?.toLowerCase();
|
|
40
|
+
if (outputExtension === 'csv' || outputExtension === 'txt') {
|
|
41
|
+
const csvOutput = stringify(summary.table);
|
|
42
|
+
fs.writeFileSync(outputPath, csvOutput);
|
|
43
|
+
}
|
|
44
|
+
else if (outputExtension === 'json') {
|
|
45
|
+
fs.writeFileSync(outputPath, JSON.stringify(summary, null, 2));
|
|
46
|
+
}
|
|
47
|
+
else if (outputExtension === 'yaml' || outputExtension === 'yml') {
|
|
48
|
+
fs.writeFileSync(outputPath, yaml.dump(summary));
|
|
49
|
+
}
|
|
50
|
+
else if (outputExtension === 'html') {
|
|
51
|
+
const template = fs.readFileSync(`${getDirectory()}/tableOutput.html`, 'utf-8');
|
|
52
|
+
const htmlOutput = nunjucks.renderString(template, {
|
|
53
|
+
table: summary.table,
|
|
54
|
+
results: summary.results,
|
|
55
|
+
});
|
|
56
|
+
fs.writeFileSync(outputPath, htmlOutput);
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
throw new Error('Unsupported output file format. Use CSV, JSON, or YAML.');
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
//# sourceMappingURL=util.js.map
|
package/dist/util.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"util.js","sourceRoot":"","sources":["../src/util.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,IAAI,CAAC;AAEzB,OAAO,IAAI,MAAM,SAAS,CAAC;AAC3B,OAAO,QAAQ,MAAM,UAAU,CAAC;AAChC,OAAO,EAAE,KAAK,IAAI,SAAS,EAAE,MAAM,MAAM,CAAC;AAE1C,OAAO,EAAE,KAAK,IAAI,QAAQ,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,oBAAoB,CAAC;AAE/C,OAAO,EAAE,YAAY,EAAE,MAAM,UAAU,CAAC;AAIxC,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAE/B,SAAS,SAAS,CAAC,IAAY;IAC7B,IAAI;QACF,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;KACzB;IAAC,OAAO,GAAG,EAAE;QACZ,OAAO,SAAS,CAAC;KAClB;AACH,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,WAAqB;IAC/C,IAAI,cAAc,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,EAAE,CAAC,YAAY,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC;IAC/E,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE;QAC/B,cAAc,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;KACjF;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,QAAgB;IACvC,MAAM,aAAa,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACvD,IAAI,IAAI,GAAa,EAAE,CAAC;IAExB,IAAI,aAAa,KAAK,KAAK,EAAE;QAC3B,IAAI,GAAG,QAAQ,CAAC,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;KACxE;SAAM,IAAI,aAAa,KAAK,MAAM,EAAE;QACnC,IAAI,GAAG,SAAS,CAAC,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;KACtD;SAAM,IAAI,aAAa,KAAK,MAAM,IAAI,aAAa,KAAK,KAAK,EAAE;QAC9D,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAmB,CAAC;KACxE;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,UAAkB,EAAE,OAAwB;IACtE,MAAM,eAAe,GAAG,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,WAAW,EAAE,CAAC;IAEnE,IAAI,eAAe,KAAK,KAAK,IAAI,eAAe,KAAK,KAAK,EAAE;QAC1D,MAAM,SAAS,GAAG,SAAS,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QAC3C,EAAE,CAAC,aAAa,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;KACzC;SAAM,IAAI,eAAe,KAAK,MAAM,EAAE;QACrC,EAAE,CAAC,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;KAChE;SAAM,IAAI,eAAe,KAAK,MAAM,IAAI,eAAe,KAAK,KAAK,EAAE;QAClE,EAAE,CAAC,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;KAClD;SAAM,IAAI,eAAe,KAAK,MAAM,EAAE;QACrC,MAAM,QAAQ,GAAG,EAAE,CAAC,YAAY,CAAC,GAAG,YAAY,EAAE,mBAAmB,EAAE,OAAO,CAAC,CAAC;QAChF,MAAM,UAAU,GAAG,QAAQ,CAAC,YAAY,CAAC,QAAQ,EAAE;YACjD,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,OAAO,EAAE,OAAO,CAAC,OAAO;SACzB,CAAC,CAAC;QACH,EAAE,CAAC,aAAa,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;KAC1C;SAAM;QACL,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;KAC5E;AACH,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "promptfoo",
|
|
3
|
+
"author": "Ian Webster",
|
|
4
|
+
"version": "0.1.0",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"type": "module",
|
|
7
|
+
"main": "dist/index.js",
|
|
8
|
+
"types": "dist/index.d.ts",
|
|
9
|
+
"typings": "dist/index.d.ts",
|
|
10
|
+
"files": [
|
|
11
|
+
"dist",
|
|
12
|
+
"src"
|
|
13
|
+
],
|
|
14
|
+
"engines": {
|
|
15
|
+
"node": ">=12"
|
|
16
|
+
},
|
|
17
|
+
"bin": {
|
|
18
|
+
"promptfoo": "dist/main.js"
|
|
19
|
+
},
|
|
20
|
+
"scripts": {
|
|
21
|
+
"local": "ts-node --esm src/main.ts",
|
|
22
|
+
"build": "tsc && cp src/*.html dist/",
|
|
23
|
+
"build:watch": "tsc --watch",
|
|
24
|
+
"prepare": "tsc",
|
|
25
|
+
"test": "jest",
|
|
26
|
+
"test:watch": "jest --watch",
|
|
27
|
+
"format": "prettier -w ."
|
|
28
|
+
},
|
|
29
|
+
"devDependencies": {
|
|
30
|
+
"@types/async": "^3.2.20",
|
|
31
|
+
"@types/cli-progress": "^3.11.0",
|
|
32
|
+
"@types/jest": "^29.5.1",
|
|
33
|
+
"@types/js-yaml": "^4.0.5",
|
|
34
|
+
"@types/nunjucks": "^3.2.2",
|
|
35
|
+
"babel-jest": "^29.5.0",
|
|
36
|
+
"jest-watch-typeahead": "^2.2.2",
|
|
37
|
+
"prettier": "^2.8.8",
|
|
38
|
+
"ts-jest": "^29.1.0",
|
|
39
|
+
"ts-node": "^10.9.1",
|
|
40
|
+
"typescript": "^5.0.4"
|
|
41
|
+
},
|
|
42
|
+
"dependencies": {
|
|
43
|
+
"async": "^3.2.4",
|
|
44
|
+
"chalk": "^4.1.2",
|
|
45
|
+
"cli-progress": "^3.12.0",
|
|
46
|
+
"cli-table3": "^0.6.3",
|
|
47
|
+
"commander": "^10.0.1",
|
|
48
|
+
"csv-parse": "^5.3.8",
|
|
49
|
+
"csv-stringify": "^6.3.2",
|
|
50
|
+
"js-yaml": "^4.1.0",
|
|
51
|
+
"node-fetch": "^3.3.1",
|
|
52
|
+
"nunjucks": "^3.2.4",
|
|
53
|
+
"winston": "^3.8.2"
|
|
54
|
+
}
|
|
55
|
+
}
|
package/src/esm.ts
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
// esm-specific crap that needs to get mocked out in tests
|
|
2
|
+
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { fileURLToPath } from 'url';
|
|
5
|
+
|
|
6
|
+
export function getDirectory(): string {
|
|
7
|
+
// @ts-ignore: Jest chokes on this
|
|
8
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
9
|
+
return path.dirname(__filename);
|
|
10
|
+
}
|