promptfoo 0.69.2 → 0.70.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +1 -1
- package/dist/src/assertions.d.ts +5 -9
- package/dist/src/assertions.d.ts.map +1 -1
- package/dist/src/assertions.js +12 -9
- package/dist/src/assertions.js.map +1 -1
- package/dist/src/commands/eval.d.ts +5 -0
- package/dist/src/commands/eval.d.ts.map +1 -0
- package/dist/src/commands/eval.js +288 -0
- package/dist/src/commands/eval.js.map +1 -0
- package/dist/src/commands/generate.d.ts +20 -0
- package/dist/src/commands/generate.d.ts.map +1 -0
- package/dist/src/commands/generate.js +215 -0
- package/dist/src/commands/generate.js.map +1 -0
- package/dist/src/commands/redteam.d.ts +10 -0
- package/dist/src/commands/redteam.d.ts.map +1 -0
- package/dist/src/commands/redteam.js +191 -0
- package/dist/src/commands/redteam.js.map +1 -0
- package/dist/src/config.d.ts +17 -0
- package/dist/src/config.d.ts.map +1 -0
- package/dist/src/config.js +424 -0
- package/dist/src/config.js.map +1 -0
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +12 -10
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/main.js +13 -569
- package/dist/src/main.js.map +1 -1
- package/dist/src/providers/pythonCompletion.d.ts +1 -0
- package/dist/src/providers/pythonCompletion.d.ts.map +1 -1
- package/dist/src/providers/pythonCompletion.js +10 -3
- package/dist/src/providers/pythonCompletion.js.map +1 -1
- package/dist/src/redteam/constants.d.ts +5 -0
- package/dist/src/redteam/constants.d.ts.map +1 -1
- package/dist/src/redteam/constants.js +53 -1
- package/dist/src/redteam/constants.js.map +1 -1
- package/dist/src/redteam/index.d.ts +0 -2
- package/dist/src/redteam/index.d.ts.map +1 -1
- package/dist/src/redteam/index.js +24 -40
- package/dist/src/redteam/index.js.map +1 -1
- package/dist/src/redteam/iterative.d.ts +1 -1
- package/dist/src/redteam/iterative.js +1 -1
- package/dist/src/redteam/iterative.js.map +1 -1
- package/dist/src/redteam/iterativeImage.d.ts +1 -1
- package/dist/src/redteam/iterativeImage.js +1 -1
- package/dist/src/redteam/iterativeImage.js.map +1 -1
- package/dist/src/redteam/plugins/base.d.ts +35 -0
- package/dist/src/redteam/plugins/base.d.ts.map +1 -0
- package/dist/src/redteam/plugins/base.js +48 -0
- package/dist/src/redteam/plugins/base.js.map +1 -0
- package/dist/src/redteam/plugins/competitors.d.ts +10 -0
- package/dist/src/redteam/plugins/competitors.d.ts.map +1 -0
- package/dist/src/redteam/plugins/competitors.js +47 -0
- package/dist/src/redteam/plugins/competitors.js.map +1 -0
- package/dist/src/redteam/plugins/contracts.d.ts +10 -0
- package/dist/src/redteam/plugins/contracts.d.ts.map +1 -0
- package/dist/src/redteam/plugins/contracts.js +47 -0
- package/dist/src/redteam/plugins/contracts.js.map +1 -0
- package/dist/src/redteam/plugins/excessiveAgency.d.ts +10 -0
- package/dist/src/redteam/plugins/excessiveAgency.d.ts.map +1 -0
- package/dist/src/redteam/plugins/excessiveAgency.js +42 -0
- package/dist/src/redteam/plugins/excessiveAgency.js.map +1 -0
- package/dist/src/redteam/plugins/hallucination.d.ts +10 -0
- package/dist/src/redteam/plugins/hallucination.d.ts.map +1 -0
- package/dist/src/redteam/plugins/hallucination.js +43 -0
- package/dist/src/redteam/plugins/hallucination.js.map +1 -0
- package/dist/src/redteam/{getHarmfulTests.d.ts → plugins/harmful.d.ts} +9 -9
- package/dist/src/redteam/plugins/harmful.d.ts.map +1 -0
- package/dist/src/redteam/{getHarmfulTests.js → plugins/harmful.js} +2 -2
- package/dist/src/redteam/plugins/harmful.js.map +1 -0
- package/dist/src/redteam/plugins/hijacking.d.ts +10 -0
- package/dist/src/redteam/plugins/hijacking.d.ts.map +1 -0
- package/dist/src/redteam/plugins/hijacking.js +47 -0
- package/dist/src/redteam/plugins/hijacking.js.map +1 -0
- package/dist/src/redteam/plugins/overreliance.d.ts +10 -0
- package/dist/src/redteam/plugins/overreliance.d.ts.map +1 -0
- package/dist/src/redteam/plugins/overreliance.js +42 -0
- package/dist/src/redteam/plugins/overreliance.js.map +1 -0
- package/dist/src/redteam/{getPiiTests.d.ts → plugins/pii.d.ts} +2 -2
- package/dist/src/redteam/plugins/pii.d.ts.map +1 -0
- package/dist/src/redteam/{getPiiTests.js → plugins/pii.js} +2 -2
- package/dist/src/redteam/plugins/pii.js.map +1 -0
- package/dist/src/redteam/plugins/politics.d.ts +10 -0
- package/dist/src/redteam/plugins/politics.d.ts.map +1 -0
- package/dist/src/redteam/plugins/politics.js +57 -0
- package/dist/src/redteam/plugins/politics.js.map +1 -0
- package/dist/src/testCases.d.ts.map +1 -1
- package/dist/src/testCases.js +3 -0
- package/dist/src/testCases.js.map +1 -1
- package/dist/src/types.d.ts +15 -3
- package/dist/src/types.d.ts.map +1 -1
- package/dist/src/types.js +9 -2
- package/dist/src/types.js.map +1 -1
- package/dist/src/util.d.ts +0 -10
- package/dist/src/util.d.ts.map +1 -1
- package/dist/src/util.js +2 -246
- package/dist/src/util.js.map +1 -1
- package/dist/src/web/nextui/404/index.html +1 -1
- package/dist/src/web/nextui/404.html +1 -1
- package/dist/src/web/nextui/_next/static/chunks/858-5d3a3678769b7e36.js +1 -1
- package/dist/src/web/nextui/_next/static/chunks/954-c35d4864ecbacd62.js +6 -0
- package/dist/src/web/nextui/_next/static/chunks/app/auth/login/{page-ee73165dd261f3ca.js → page-6fcc9431205718c7.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/auth/signup/{page-7375a6707eb8675e.js → page-8caf49a834d34420.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-4f93aacd25866d60.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-ce320e6d1e6d1d23.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/{page-310e2e58179970fa.js → page-3c5a944373865122.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-73e894c39cc191f1.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/{layout-6b3048b719443145.js → layout-2038906de6c19565.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/{page-251d4ea0ac894cd9.js → page-e07a0ddbf3d6e21c.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/progress/page-73442c531d579c51.js +1 -0
- package/dist/src/web/nextui/_next/static/chunks/app/prompts/{page-6d29c01079a556f4.js → page-50e27c24c9e255bd.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/report/{page-477181752ee9b493.js → page-be00cf77531ce9cb.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/app/setup/{page-5a4d6156d3c83470.js → page-26cb5d2478fdbd34.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/{main-app-345c3eca7e5cf432.js → main-app-929a26b3c8cd3f7a.js} +1 -1
- package/dist/src/web/nextui/_next/static/chunks/{webpack-c9f728822666f852.js → webpack-8a9bc9ee0defb756.js} +1 -1
- package/dist/src/web/nextui/_next/static/css/106779eb64615639.css +1 -0
- package/dist/src/web/nextui/auth/login/index.html +1 -1
- package/dist/src/web/nextui/auth/login/index.txt +6 -6
- package/dist/src/web/nextui/auth/signup/index.html +1 -1
- package/dist/src/web/nextui/auth/signup/index.txt +6 -6
- package/dist/src/web/nextui/datasets/index.html +1 -1
- package/dist/src/web/nextui/datasets/index.txt +6 -6
- package/dist/src/web/nextui/eval/index.html +1 -1
- package/dist/src/web/nextui/eval/index.txt +6 -6
- package/dist/src/web/nextui/index.html +1 -1
- package/dist/src/web/nextui/index.txt +5 -5
- package/dist/src/web/nextui/progress/index.html +1 -1
- package/dist/src/web/nextui/progress/index.txt +6 -6
- package/dist/src/web/nextui/prompts/index.html +1 -1
- package/dist/src/web/nextui/prompts/index.txt +6 -6
- package/dist/src/web/nextui/report/index.html +1 -1
- package/dist/src/web/nextui/report/index.txt +6 -6
- package/dist/src/web/nextui/setup/index.html +2 -2
- package/dist/src/web/nextui/setup/index.txt +7 -7
- package/package.json +1 -1
- package/dist/src/redteam/getCompetitorTests.d.ts +0 -3
- package/dist/src/redteam/getCompetitorTests.d.ts.map +0 -1
- package/dist/src/redteam/getCompetitorTests.js +0 -60
- package/dist/src/redteam/getCompetitorTests.js.map +0 -1
- package/dist/src/redteam/getHallucinationTests.d.ts +0 -3
- package/dist/src/redteam/getHallucinationTests.d.ts.map +0 -1
- package/dist/src/redteam/getHallucinationTests.js +0 -56
- package/dist/src/redteam/getHallucinationTests.js.map +0 -1
- package/dist/src/redteam/getHarmfulTests.d.ts.map +0 -1
- package/dist/src/redteam/getHarmfulTests.js.map +0 -1
- package/dist/src/redteam/getHijackingTests.d.ts +0 -3
- package/dist/src/redteam/getHijackingTests.d.ts.map +0 -1
- package/dist/src/redteam/getHijackingTests.js +0 -60
- package/dist/src/redteam/getHijackingTests.js.map +0 -1
- package/dist/src/redteam/getOverconfidenceTests.d.ts +0 -3
- package/dist/src/redteam/getOverconfidenceTests.d.ts.map +0 -1
- package/dist/src/redteam/getOverconfidenceTests.js +0 -55
- package/dist/src/redteam/getOverconfidenceTests.js.map +0 -1
- package/dist/src/redteam/getPiiTests.d.ts.map +0 -1
- package/dist/src/redteam/getPiiTests.js.map +0 -1
- package/dist/src/redteam/getPoliticalStatementsTests.d.ts +0 -3
- package/dist/src/redteam/getPoliticalStatementsTests.d.ts.map +0 -1
- package/dist/src/redteam/getPoliticalStatementsTests.js +0 -70
- package/dist/src/redteam/getPoliticalStatementsTests.js.map +0 -1
- package/dist/src/redteam/getUnderconfidenceTests.d.ts +0 -3
- package/dist/src/redteam/getUnderconfidenceTests.d.ts.map +0 -1
- package/dist/src/redteam/getUnderconfidenceTests.js +0 -55
- package/dist/src/redteam/getUnderconfidenceTests.js.map +0 -1
- package/dist/src/redteam/getUnintendedContractTests.d.ts +0 -3
- package/dist/src/redteam/getUnintendedContractTests.d.ts.map +0 -1
- package/dist/src/redteam/getUnintendedContractTests.js +0 -60
- package/dist/src/redteam/getUnintendedContractTests.js.map +0 -1
- package/dist/src/web/nextui/_next/static/chunks/954-58788165fb1e9563.js +0 -6
- package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-c11cfb1b2c58325f.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-50073ee4b153b82b.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/eval/page-87d1e9bc26842e95.js +0 -1
- package/dist/src/web/nextui/_next/static/chunks/app/progress/page-15df1d043dee2f17.js +0 -1
- package/dist/src/web/nextui/_next/static/css/5bd2f45de1f3ba83.css +0 -1
- /package/dist/src/web/nextui/_next/static/{_4HZa8ihrRiRqQU13EScL → ENNANMoEha-uMGFo0DvzO}/_buildManifest.js +0 -0
- /package/dist/src/web/nextui/_next/static/{_4HZa8ihrRiRqQU13EScL → ENNANMoEha-uMGFo0DvzO}/_ssgManifest.js +0 -0
- /package/dist/src/web/nextui/_next/static/chunks/{2-57ab5e84907f795a.js → 2-671ad31c05d2c976.js} +0 -0
package/dist/src/main.js
CHANGED
|
@@ -1,207 +1,35 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
"use strict";
|
|
3
|
-
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
4
|
-
if (k2 === undefined) k2 = k;
|
|
5
|
-
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
6
|
-
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
7
|
-
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
8
|
-
}
|
|
9
|
-
Object.defineProperty(o, k2, desc);
|
|
10
|
-
}) : (function(o, m, k, k2) {
|
|
11
|
-
if (k2 === undefined) k2 = k;
|
|
12
|
-
o[k2] = m[k];
|
|
13
|
-
}));
|
|
14
|
-
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
15
|
-
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
16
|
-
}) : function(o, v) {
|
|
17
|
-
o["default"] = v;
|
|
18
|
-
});
|
|
19
|
-
var __importStar = (this && this.__importStar) || function (mod) {
|
|
20
|
-
if (mod && mod.__esModule) return mod;
|
|
21
|
-
var result = {};
|
|
22
|
-
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
|
23
|
-
__setModuleDefault(result, mod);
|
|
24
|
-
return result;
|
|
25
|
-
};
|
|
26
3
|
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
27
4
|
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
28
5
|
};
|
|
29
6
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
30
7
|
const chalk_1 = __importDefault(require("chalk"));
|
|
31
|
-
const chokidar_1 = __importDefault(require("chokidar"));
|
|
32
8
|
const commander_1 = require("commander");
|
|
33
|
-
const dedent_1 = __importDefault(require("dedent"));
|
|
34
9
|
const fs_1 = __importDefault(require("fs"));
|
|
35
|
-
const js_yaml_1 = __importDefault(require("js-yaml"));
|
|
36
10
|
const path_1 = __importDefault(require("path"));
|
|
37
11
|
const readline_1 = __importDefault(require("readline"));
|
|
38
|
-
const tiny_invariant_1 = __importDefault(require("tiny-invariant"));
|
|
39
|
-
const assertions_1 = require("./assertions");
|
|
40
|
-
const validateAssertions_1 = require("./assertions/validateAssertions");
|
|
41
12
|
const cache_1 = require("./cache");
|
|
42
13
|
const checkNodeVersion_1 = require("./checkNodeVersion");
|
|
43
|
-
const cliState_1 = __importDefault(require("./cliState"));
|
|
44
14
|
const config_1 = require("./commands/config");
|
|
45
15
|
const delete_1 = require("./commands/delete");
|
|
46
|
-
const
|
|
47
|
-
const filterTests_1 = require("./commands/eval/filterTests");
|
|
16
|
+
const eval_1 = require("./commands/eval");
|
|
48
17
|
const export_1 = require("./commands/export");
|
|
18
|
+
const generate_1 = require("./commands/generate");
|
|
49
19
|
const import_1 = require("./commands/import");
|
|
50
20
|
const list_1 = require("./commands/list");
|
|
21
|
+
const redteam_1 = require("./commands/redteam");
|
|
51
22
|
const show_1 = require("./commands/show");
|
|
23
|
+
const config_2 = require("./config");
|
|
52
24
|
const esm_1 = require("./esm");
|
|
53
|
-
const evaluator_1 = require("./evaluator");
|
|
54
25
|
const feedback_1 = require("./feedback");
|
|
55
|
-
const logger_1 =
|
|
26
|
+
const logger_1 = __importDefault(require("./logger"));
|
|
56
27
|
const onboarding_1 = require("./onboarding");
|
|
57
|
-
const prompts_1 = require("./prompts");
|
|
58
|
-
const providers_1 = require("./providers");
|
|
59
|
-
const redteam_1 = require("./redteam");
|
|
60
|
-
const constants_1 = require("./redteam/constants");
|
|
61
28
|
const share_1 = require("./share");
|
|
62
|
-
const table_1 = require("./table");
|
|
63
29
|
const telemetry_1 = __importDefault(require("./telemetry"));
|
|
64
|
-
const testCases_1 = require("./testCases");
|
|
65
|
-
const types_1 = require("./types");
|
|
66
30
|
const updates_1 = require("./updates");
|
|
67
31
|
const util_1 = require("./util");
|
|
68
32
|
const server_1 = require("./web/server");
|
|
69
|
-
async function resolveConfigs(cmdObj, defaultConfig) {
|
|
70
|
-
// Config parsing
|
|
71
|
-
let fileConfig = {};
|
|
72
|
-
const configPaths = cmdObj.config;
|
|
73
|
-
if (configPaths) {
|
|
74
|
-
fileConfig = await (0, util_1.readConfigs)(configPaths);
|
|
75
|
-
}
|
|
76
|
-
// Standalone assertion mode
|
|
77
|
-
if (cmdObj.assertions) {
|
|
78
|
-
if (!cmdObj.modelOutputs) {
|
|
79
|
-
logger_1.default.error(chalk_1.default.red('You must provide --model-outputs when using --assertions'));
|
|
80
|
-
process.exit(1);
|
|
81
|
-
}
|
|
82
|
-
const modelOutputs = JSON.parse(fs_1.default.readFileSync(path_1.default.join(process.cwd(), cmdObj.modelOutputs), 'utf8'));
|
|
83
|
-
const assertions = await (0, assertions_1.readAssertions)(cmdObj.assertions);
|
|
84
|
-
fileConfig.prompts = ['{{output}}'];
|
|
85
|
-
fileConfig.providers = ['echo'];
|
|
86
|
-
fileConfig.tests = modelOutputs.map((output) => {
|
|
87
|
-
if (typeof output === 'string') {
|
|
88
|
-
return {
|
|
89
|
-
vars: {
|
|
90
|
-
output,
|
|
91
|
-
},
|
|
92
|
-
assert: assertions,
|
|
93
|
-
};
|
|
94
|
-
}
|
|
95
|
-
return {
|
|
96
|
-
vars: {
|
|
97
|
-
output: output.output,
|
|
98
|
-
...(output.tags === undefined ? {} : { tags: output.tags.join(', ') }),
|
|
99
|
-
},
|
|
100
|
-
assert: assertions,
|
|
101
|
-
};
|
|
102
|
-
});
|
|
103
|
-
}
|
|
104
|
-
// Use basepath in cases where path was supplied in the config file
|
|
105
|
-
const basePath = configPaths ? path_1.default.dirname(configPaths[0]) : '';
|
|
106
|
-
const defaultTestRaw = fileConfig.defaultTest || defaultConfig.defaultTest;
|
|
107
|
-
const config = {
|
|
108
|
-
description: fileConfig.description || defaultConfig.description,
|
|
109
|
-
prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts || [],
|
|
110
|
-
providers: cmdObj.providers || fileConfig.providers || defaultConfig.providers || [],
|
|
111
|
-
tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests || [],
|
|
112
|
-
scenarios: fileConfig.scenarios || defaultConfig.scenarios,
|
|
113
|
-
env: fileConfig.env || defaultConfig.env,
|
|
114
|
-
sharing: process.env.PROMPTFOO_DISABLE_SHARING === '1'
|
|
115
|
-
? false
|
|
116
|
-
: fileConfig.sharing ?? defaultConfig.sharing ?? true,
|
|
117
|
-
defaultTest: defaultTestRaw ? await (0, testCases_1.readTest)(defaultTestRaw, basePath) : undefined,
|
|
118
|
-
derivedMetrics: fileConfig.derivedMetrics || defaultConfig.derivedMetrics,
|
|
119
|
-
outputPath: cmdObj.output || fileConfig.outputPath || defaultConfig.outputPath,
|
|
120
|
-
metadata: fileConfig.metadata || defaultConfig.metadata,
|
|
121
|
-
};
|
|
122
|
-
// Validation
|
|
123
|
-
if (!config.prompts || config.prompts.length === 0) {
|
|
124
|
-
logger_1.default.error(chalk_1.default.red('You must provide at least 1 prompt'));
|
|
125
|
-
process.exit(1);
|
|
126
|
-
}
|
|
127
|
-
if (!config.providers || config.providers.length === 0) {
|
|
128
|
-
logger_1.default.error(chalk_1.default.red('You must specify at least 1 provider (for example, openai:gpt-4o)'));
|
|
129
|
-
process.exit(1);
|
|
130
|
-
}
|
|
131
|
-
(0, tiny_invariant_1.default)(Array.isArray(config.providers), 'providers must be an array');
|
|
132
|
-
config.providers.forEach((provider) => {
|
|
133
|
-
const result = types_1.ProviderSchema.safeParse(provider);
|
|
134
|
-
if (!result.success) {
|
|
135
|
-
const errors = result.error.errors
|
|
136
|
-
.map((err) => {
|
|
137
|
-
return `- ${err.message}`;
|
|
138
|
-
})
|
|
139
|
-
.join('\n');
|
|
140
|
-
const providerString = typeof provider === 'string' ? provider : JSON.stringify(provider);
|
|
141
|
-
logger_1.default.warn(chalk_1.default.yellow((0, dedent_1.default) `
|
|
142
|
-
Provider: ${providerString} encountered errors during schema validation:
|
|
143
|
-
|
|
144
|
-
${errors}
|
|
145
|
-
|
|
146
|
-
Please double check your configuration.` + '\n'));
|
|
147
|
-
}
|
|
148
|
-
});
|
|
149
|
-
// Parse prompts, providers, and tests
|
|
150
|
-
const parsedPrompts = await (0, prompts_1.readPrompts)(config.prompts, cmdObj.prompts ? undefined : basePath);
|
|
151
|
-
const parsedProviders = await (0, providers_1.loadApiProviders)(config.providers, {
|
|
152
|
-
env: config.env,
|
|
153
|
-
basePath,
|
|
154
|
-
});
|
|
155
|
-
const parsedTests = await (0, testCases_1.readTests)(config.tests || [], cmdObj.tests ? undefined : basePath);
|
|
156
|
-
// Parse testCases for each scenario
|
|
157
|
-
if (fileConfig.scenarios) {
|
|
158
|
-
for (const scenario of fileConfig.scenarios) {
|
|
159
|
-
const parsedScenarioTests = await (0, testCases_1.readTests)(scenario.tests, cmdObj.tests ? undefined : basePath);
|
|
160
|
-
scenario.tests = parsedScenarioTests;
|
|
161
|
-
const filteredTests = await (0, filterTests_1.filterTests)({
|
|
162
|
-
...scenario,
|
|
163
|
-
providers: parsedProviders,
|
|
164
|
-
prompts: parsedPrompts,
|
|
165
|
-
}, {
|
|
166
|
-
firstN: cmdObj.filterFirstN,
|
|
167
|
-
pattern: cmdObj.filterPattern,
|
|
168
|
-
failing: cmdObj.filterFailing,
|
|
169
|
-
});
|
|
170
|
-
(0, tiny_invariant_1.default)(filteredTests, 'filteredTests are undefined');
|
|
171
|
-
scenario.tests = filteredTests;
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
const parsedProviderPromptMap = (0, prompts_1.readProviderPromptMap)(config, parsedPrompts);
|
|
175
|
-
if (parsedPrompts.length === 0) {
|
|
176
|
-
logger_1.default.error(chalk_1.default.red('No prompts found'));
|
|
177
|
-
process.exit(1);
|
|
178
|
-
}
|
|
179
|
-
const defaultTest = {
|
|
180
|
-
options: {
|
|
181
|
-
prefix: cmdObj.promptPrefix,
|
|
182
|
-
suffix: cmdObj.promptSuffix,
|
|
183
|
-
provider: cmdObj.grader,
|
|
184
|
-
// rubricPrompt
|
|
185
|
-
...(config.defaultTest?.options || {}),
|
|
186
|
-
},
|
|
187
|
-
...config.defaultTest,
|
|
188
|
-
};
|
|
189
|
-
const testSuite = {
|
|
190
|
-
description: config.description,
|
|
191
|
-
prompts: parsedPrompts,
|
|
192
|
-
providers: parsedProviders,
|
|
193
|
-
providerPromptMap: parsedProviderPromptMap,
|
|
194
|
-
tests: parsedTests,
|
|
195
|
-
scenarios: config.scenarios,
|
|
196
|
-
defaultTest,
|
|
197
|
-
derivedMetrics: config.derivedMetrics,
|
|
198
|
-
nunjucksFilters: await (0, util_1.readFilters)(fileConfig.nunjucksFilters || defaultConfig.nunjucksFilters || {}),
|
|
199
|
-
};
|
|
200
|
-
if (testSuite.tests) {
|
|
201
|
-
(0, validateAssertions_1.validateAssertions)(testSuite.tests);
|
|
202
|
-
}
|
|
203
|
-
return { config, testSuite, basePath };
|
|
204
|
-
}
|
|
205
33
|
async function main() {
|
|
206
34
|
await (0, updates_1.checkForUpdates)();
|
|
207
35
|
const pwd = process.cwd();
|
|
@@ -214,7 +42,7 @@ async function main() {
|
|
|
214
42
|
let defaultConfig = {};
|
|
215
43
|
let defaultConfigPath;
|
|
216
44
|
for (const _path of potentialPaths) {
|
|
217
|
-
const maybeConfig = await (0,
|
|
45
|
+
const maybeConfig = await (0, config_2.maybeReadConfig)(_path);
|
|
218
46
|
if (maybeConfig) {
|
|
219
47
|
defaultConfig = maybeConfig;
|
|
220
48
|
defaultConfigPath = _path;
|
|
@@ -339,399 +167,15 @@ async function main() {
|
|
|
339
167
|
.action((message) => {
|
|
340
168
|
(0, feedback_1.gatherFeedback)(message);
|
|
341
169
|
});
|
|
342
|
-
|
|
343
|
-
generateCommand
|
|
344
|
-
.command('dataset')
|
|
345
|
-
.description('Generate test cases')
|
|
346
|
-
.option('-i, --instructions [instructions]', 'Additional instructions to follow while generating test cases')
|
|
347
|
-
.option('-c, --config [path]', 'Path to configuration file. Defaults to promptfooconfig.yaml')
|
|
348
|
-
.option('-o, --output [path]', 'Path to output file')
|
|
349
|
-
.option('-w, --write', 'Write results to promptfoo configuration file')
|
|
350
|
-
.option('--numPersonas <number>', 'Number of personas to generate', '5')
|
|
351
|
-
.option('--numTestCasesPerPersona <number>', 'Number of test cases per persona', '3')
|
|
352
|
-
.option('--no-cache', 'Do not read or write results to disk cache', false)
|
|
353
|
-
.option('--env-file <path>', 'Path to .env file')
|
|
354
|
-
.action(async (options) => {
|
|
355
|
-
(0, util_1.setupEnv)(options.envFile);
|
|
356
|
-
if (!options.cache) {
|
|
357
|
-
logger_1.default.info('Cache is disabled.');
|
|
358
|
-
(0, cache_1.disableCache)();
|
|
359
|
-
}
|
|
360
|
-
let testSuite;
|
|
361
|
-
const configPath = options.config || defaultConfigPath;
|
|
362
|
-
if (configPath) {
|
|
363
|
-
const resolved = await resolveConfigs({
|
|
364
|
-
config: [configPath],
|
|
365
|
-
}, defaultConfig);
|
|
366
|
-
testSuite = resolved.testSuite;
|
|
367
|
-
}
|
|
368
|
-
else {
|
|
369
|
-
throw new Error('Could not find config file. Please use `--config`');
|
|
370
|
-
}
|
|
371
|
-
const startTime = Date.now();
|
|
372
|
-
telemetry_1.default.record('command_used', {
|
|
373
|
-
name: 'generate_dataset - started',
|
|
374
|
-
numPrompts: testSuite.prompts.length,
|
|
375
|
-
numTestsExisting: (testSuite.tests || []).length,
|
|
376
|
-
});
|
|
377
|
-
await telemetry_1.default.send();
|
|
378
|
-
const results = await (0, testCases_1.synthesizeFromTestSuite)(testSuite, {
|
|
379
|
-
instructions: options.instructions,
|
|
380
|
-
numPersonas: parseInt(options.numPersonas, 10),
|
|
381
|
-
numTestCasesPerPersona: parseInt(options.numTestCasesPerPersona, 10),
|
|
382
|
-
});
|
|
383
|
-
const configAddition = { tests: results.map((result) => ({ vars: result })) };
|
|
384
|
-
const yamlString = js_yaml_1.default.dump(configAddition);
|
|
385
|
-
if (options.output) {
|
|
386
|
-
fs_1.default.writeFileSync(options.output, yamlString);
|
|
387
|
-
(0, util_1.printBorder)();
|
|
388
|
-
logger_1.default.info(`Wrote ${results.length} new test cases to ${options.output}`);
|
|
389
|
-
(0, util_1.printBorder)();
|
|
390
|
-
}
|
|
391
|
-
else {
|
|
392
|
-
(0, util_1.printBorder)();
|
|
393
|
-
logger_1.default.info('New test Cases');
|
|
394
|
-
(0, util_1.printBorder)();
|
|
395
|
-
logger_1.default.info(yamlString);
|
|
396
|
-
}
|
|
397
|
-
(0, util_1.printBorder)();
|
|
398
|
-
if (options.write && configPath) {
|
|
399
|
-
const existingConfig = js_yaml_1.default.load(fs_1.default.readFileSync(configPath, 'utf8'));
|
|
400
|
-
existingConfig.tests = [...(existingConfig.tests || []), ...configAddition.tests];
|
|
401
|
-
fs_1.default.writeFileSync(configPath, js_yaml_1.default.dump(existingConfig));
|
|
402
|
-
logger_1.default.info(`Wrote ${results.length} new test cases to ${configPath}`);
|
|
403
|
-
}
|
|
404
|
-
else {
|
|
405
|
-
logger_1.default.info(`Copy the above test cases or run ${chalk_1.default.greenBright('promptfoo generate dataset --write')} to write directly to the config`);
|
|
406
|
-
}
|
|
407
|
-
telemetry_1.default.record('command_used', {
|
|
408
|
-
name: 'generate_dataset',
|
|
409
|
-
numPrompts: testSuite.prompts.length,
|
|
410
|
-
numTestsExisting: (testSuite.tests || []).length,
|
|
411
|
-
numTestsGenerated: results.length,
|
|
412
|
-
duration: Math.round((Date.now() - startTime) / 1000),
|
|
413
|
-
});
|
|
414
|
-
await telemetry_1.default.send();
|
|
415
|
-
});
|
|
416
|
-
generateCommand
|
|
417
|
-
.command('redteam')
|
|
418
|
-
.description('Generate adversarial test cases')
|
|
419
|
-
.option('-c, --config [path]', 'Path to configuration file. Defaults to promptfooconfig.yaml')
|
|
420
|
-
.option('-o, --output [path]', 'Path to output file')
|
|
421
|
-
.option('-w, --write', 'Write results to promptfoo configuration file')
|
|
422
|
-
.option('--purpose <purpose>', 'Set the system purpose. If not set, the system purpose will be inferred from the config file')
|
|
423
|
-
.option('--provider <provider>', `Provider to use for generating adversarial tests. Defaults to: ${constants_1.REDTEAM_MODEL}`)
|
|
424
|
-
.option('--injectVar <varname>', 'Override the variable to inject user input into the prompt. If not set, the variable will default to {{query}}')
|
|
425
|
-
.option('--plugins <plugins>', (0, dedent_1.default) `Comma-separated list of plugins to use. Defaults to:
|
|
426
|
-
\n- ${Array.from(redteam_1.DEFAULT_PLUGINS).sort().join('\n- ')}\n\n
|
|
427
|
-
`, (val) => val.split(',').map((x) => x.trim()))
|
|
428
|
-
.option('--add-plugins <plugins>', (0, dedent_1.default) `Comma-separated list of plugins to run in addition to the default plugins:
|
|
429
|
-
\n- ${redteam_1.ADDITIONAL_PLUGINS.sort().join('\n- ')}\n\n
|
|
430
|
-
`, (val) => val.split(',').map((x) => x.trim()))
|
|
431
|
-
.option('--no-cache', 'Do not read or write results to disk cache', false)
|
|
432
|
-
.option('--env-file <path>', 'Path to .env file')
|
|
433
|
-
.action(async ({ addPlugins, cache, config, envFile, injectVar, output, plugins, provider, purpose, write, }) => {
|
|
434
|
-
(0, util_1.setupEnv)(envFile);
|
|
435
|
-
if (!cache) {
|
|
436
|
-
logger_1.default.info('Cache is disabled.');
|
|
437
|
-
(0, cache_1.disableCache)();
|
|
438
|
-
}
|
|
439
|
-
let testSuite;
|
|
440
|
-
const configPath = config || defaultConfigPath;
|
|
441
|
-
if (configPath) {
|
|
442
|
-
const resolved = await resolveConfigs({
|
|
443
|
-
config: [configPath],
|
|
444
|
-
}, defaultConfig);
|
|
445
|
-
testSuite = resolved.testSuite;
|
|
446
|
-
}
|
|
447
|
-
else {
|
|
448
|
-
throw new Error('Could not find config file. Please use `--config`');
|
|
449
|
-
}
|
|
450
|
-
const startTime = Date.now();
|
|
451
|
-
telemetry_1.default.record('command_used', {
|
|
452
|
-
name: 'generate redteam - started',
|
|
453
|
-
numPrompts: testSuite.prompts.length,
|
|
454
|
-
numTestsExisting: (testSuite.tests || []).length,
|
|
455
|
-
});
|
|
456
|
-
await telemetry_1.default.send();
|
|
457
|
-
const redteamTests = await (0, redteam_1.synthesizeFromTestSuite)(testSuite, {
|
|
458
|
-
purpose,
|
|
459
|
-
injectVar,
|
|
460
|
-
plugins: addPlugins && addPlugins.length > 0
|
|
461
|
-
? Array.from(plugins || redteam_1.DEFAULT_PLUGINS).concat(addPlugins)
|
|
462
|
-
: plugins,
|
|
463
|
-
provider,
|
|
464
|
-
});
|
|
465
|
-
if (output) {
|
|
466
|
-
const existingYaml = js_yaml_1.default.load(fs_1.default.readFileSync(configPath, 'utf8'));
|
|
467
|
-
const updatedYaml = {
|
|
468
|
-
...existingYaml,
|
|
469
|
-
tests: redteamTests,
|
|
470
|
-
metadata: {
|
|
471
|
-
...existingYaml.metadata,
|
|
472
|
-
redteam: true,
|
|
473
|
-
},
|
|
474
|
-
};
|
|
475
|
-
fs_1.default.writeFileSync(output, js_yaml_1.default.dump(updatedYaml, { skipInvalid: true }));
|
|
476
|
-
(0, util_1.printBorder)();
|
|
477
|
-
logger_1.default.info(`Wrote ${redteamTests.length} new test cases to ${output}`);
|
|
478
|
-
(0, util_1.printBorder)();
|
|
479
|
-
}
|
|
480
|
-
else if (write && configPath) {
|
|
481
|
-
const existingConfig = js_yaml_1.default.load(fs_1.default.readFileSync(configPath, 'utf8'));
|
|
482
|
-
existingConfig.tests = [...(existingConfig.tests || []), ...redteamTests];
|
|
483
|
-
fs_1.default.writeFileSync(configPath, js_yaml_1.default.dump(existingConfig));
|
|
484
|
-
logger_1.default.info(`Wrote ${redteamTests.length} new test cases to ${configPath}`);
|
|
485
|
-
}
|
|
486
|
-
else {
|
|
487
|
-
logger_1.default.info(js_yaml_1.default.dump(redteamTests, { skipInvalid: true }));
|
|
488
|
-
}
|
|
489
|
-
telemetry_1.default.record('command_used', {
|
|
490
|
-
name: 'generate redteam',
|
|
491
|
-
numPrompts: testSuite.prompts.length,
|
|
492
|
-
numTestsExisting: (testSuite.tests || []).length,
|
|
493
|
-
numTestsGenerated: redteamTests.length,
|
|
494
|
-
duration: Math.round((Date.now() - startTime) / 1000),
|
|
495
|
-
});
|
|
496
|
-
await telemetry_1.default.send();
|
|
497
|
-
});
|
|
498
|
-
program
|
|
499
|
-
.command('eval')
|
|
500
|
-
.description('Evaluate prompts')
|
|
501
|
-
.option('-p, --prompts <paths...>', 'Paths to prompt files (.txt)')
|
|
502
|
-
.option('-r, --providers <name or path...>', 'One of: openai:chat, openai:completion, openai:<model name>, or path to custom API caller module')
|
|
503
|
-
.option('-c, --config <paths...>', 'Path to configuration file. Automatically loads promptfooconfig.js/json/yaml')
|
|
504
|
-
.option(
|
|
505
|
-
// TODO(ian): Remove `vars` for v1
|
|
506
|
-
'-v, --vars, -t, --tests <path>', 'Path to CSV with test cases', defaultConfig?.commandLineOptions?.vars)
|
|
507
|
-
.option('-a, --assertions <path>', 'Path to assertions file')
|
|
508
|
-
.option('--model-outputs <path>', 'Path to JSON containing list of LLM output strings')
|
|
509
|
-
.option('-t, --tests <path>', 'Path to CSV with test cases')
|
|
510
|
-
.option('-o, --output <paths...>', 'Path to output file (csv, txt, json, yaml, yml, html), default is no output file')
|
|
511
|
-
.option('-j, --max-concurrency <number>', 'Maximum number of concurrent API calls', defaultConfig.evaluateOptions?.maxConcurrency
|
|
512
|
-
? String(defaultConfig.evaluateOptions.maxConcurrency)
|
|
513
|
-
: `${evaluator_1.DEFAULT_MAX_CONCURRENCY}`)
|
|
514
|
-
.option('--repeat <number>', 'Number of times to run each test', defaultConfig.evaluateOptions?.repeat ? String(defaultConfig.evaluateOptions.repeat) : '1')
|
|
515
|
-
.option('--delay <number>', 'Delay between each test (in milliseconds)', defaultConfig.evaluateOptions?.delay ? String(defaultConfig.evaluateOptions.delay) : '0')
|
|
516
|
-
.option('--table-cell-max-length <number>', 'Truncate console table cells to this length', '250')
|
|
517
|
-
.option('--suggest-prompts <number>', 'Generate N new prompts and append them to the prompt list')
|
|
518
|
-
.option('--prompt-prefix <path>', 'This prefix is prepended to every prompt', defaultConfig.defaultTest?.options?.prefix)
|
|
519
|
-
.option('--prompt-suffix <path>', 'This suffix is append to every prompt', defaultConfig.defaultTest?.options?.suffix)
|
|
520
|
-
.option('--no-write', 'Do not write results to promptfoo directory', defaultConfig?.commandLineOptions?.write)
|
|
521
|
-
.option('--no-cache', 'Do not read or write results to disk cache',
|
|
522
|
-
// TODO(ian): Remove commandLineOptions.cache in v1
|
|
523
|
-
defaultConfig?.commandLineOptions?.cache ?? defaultConfig?.evaluateOptions?.cache)
|
|
524
|
-
.option('--no-progress-bar', 'Do not show progress bar')
|
|
525
|
-
.option('--table', 'Output table in CLI', defaultConfig?.commandLineOptions?.table ?? true)
|
|
526
|
-
.option('--no-table', 'Do not output table in CLI', defaultConfig?.commandLineOptions?.table)
|
|
527
|
-
.option('--share', 'Create a shareable URL', defaultConfig?.commandLineOptions?.share)
|
|
528
|
-
.option('--grader <provider>', 'Model that will grade outputs', defaultConfig?.commandLineOptions?.grader)
|
|
529
|
-
.option('--verbose', 'Show debug logs', defaultConfig?.commandLineOptions?.verbose)
|
|
530
|
-
.option('-w, --watch', 'Watch for changes in config and re-run')
|
|
531
|
-
.option('--env-file <path>', 'Path to .env file')
|
|
532
|
-
.option('--interactive-providers', 'Run providers interactively, one at a time', defaultConfig?.evaluateOptions?.interactiveProviders)
|
|
533
|
-
.option('-n, --filter-first-n <number>', 'Only run the first N tests')
|
|
534
|
-
.option('--filter-pattern <pattern>', 'Only run tests whose description matches the regular expression pattern')
|
|
535
|
-
.option('--filter-providers <providers>', 'Only run tests with these providers')
|
|
536
|
-
.option('--filter-failing <path>', 'Path to json output file')
|
|
537
|
-
.option('--var <key=value>', 'Set a variable in key=value format', (value, previous = {}) => {
|
|
538
|
-
const [key, val] = value.split('=');
|
|
539
|
-
if (!key || val === undefined) {
|
|
540
|
-
throw new Error('--var must be specified in key=value format.');
|
|
541
|
-
}
|
|
542
|
-
previous[key] = val;
|
|
543
|
-
return previous;
|
|
544
|
-
}, {})
|
|
545
|
-
.action(async (cmdObj) => {
|
|
546
|
-
(0, util_1.setupEnv)(cmdObj.envFile);
|
|
547
|
-
let config = undefined;
|
|
548
|
-
let testSuite = undefined;
|
|
549
|
-
let basePath = undefined;
|
|
550
|
-
const runEvaluation = async (initialization) => {
|
|
551
|
-
const startTime = Date.now();
|
|
552
|
-
telemetry_1.default.record('command_used', {
|
|
553
|
-
name: 'eval - started',
|
|
554
|
-
watch: Boolean(cmdObj.watch),
|
|
555
|
-
});
|
|
556
|
-
await telemetry_1.default.send();
|
|
557
|
-
// Misc settings
|
|
558
|
-
if (cmdObj.verbose) {
|
|
559
|
-
(0, logger_1.setLogLevel)('debug');
|
|
560
|
-
}
|
|
561
|
-
const iterations = parseInt(cmdObj.repeat || '', 10);
|
|
562
|
-
const repeat = !isNaN(iterations) && iterations > 0 ? iterations : 1;
|
|
563
|
-
if (!cmdObj.cache || repeat > 1) {
|
|
564
|
-
logger_1.default.info('Cache is disabled.');
|
|
565
|
-
(0, cache_1.disableCache)();
|
|
566
|
-
}
|
|
567
|
-
({ config, testSuite, basePath } = await resolveConfigs(cmdObj, defaultConfig));
|
|
568
|
-
cliState_1.default.basePath = basePath;
|
|
569
|
-
let maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
|
|
570
|
-
const delay = parseInt(cmdObj.delay || '', 0);
|
|
571
|
-
if (delay > 0) {
|
|
572
|
-
maxConcurrency = 1;
|
|
573
|
-
logger_1.default.info(`Running at concurrency=1 because ${delay}ms delay was requested between API calls`);
|
|
574
|
-
}
|
|
575
|
-
testSuite.tests = await (0, filterTests_1.filterTests)(testSuite, {
|
|
576
|
-
firstN: cmdObj.filterFirstN,
|
|
577
|
-
pattern: cmdObj.filterPattern,
|
|
578
|
-
failing: cmdObj.filterFailing,
|
|
579
|
-
});
|
|
580
|
-
testSuite.providers = (0, filterProviders_1.filterProviders)(testSuite.providers, cmdObj.filterProviders);
|
|
581
|
-
const options = {
|
|
582
|
-
showProgressBar: (0, logger_1.getLogLevel)() === 'debug' ? false : cmdObj.progressBar,
|
|
583
|
-
maxConcurrency: !isNaN(maxConcurrency) && maxConcurrency > 0 ? maxConcurrency : undefined,
|
|
584
|
-
repeat,
|
|
585
|
-
delay: !isNaN(delay) && delay > 0 ? delay : undefined,
|
|
586
|
-
interactiveProviders: cmdObj.interactiveProviders,
|
|
587
|
-
...evaluateOptions,
|
|
588
|
-
};
|
|
589
|
-
if (cmdObj.grader) {
|
|
590
|
-
testSuite.defaultTest = testSuite.defaultTest || {};
|
|
591
|
-
testSuite.defaultTest.options = testSuite.defaultTest.options || {};
|
|
592
|
-
testSuite.defaultTest.options.provider = await (0, providers_1.loadApiProvider)(cmdObj.grader);
|
|
593
|
-
}
|
|
594
|
-
if (cmdObj.var) {
|
|
595
|
-
testSuite.defaultTest = testSuite.defaultTest || {};
|
|
596
|
-
testSuite.defaultTest.vars = { ...testSuite.defaultTest.vars, ...cmdObj.var };
|
|
597
|
-
}
|
|
598
|
-
if (cmdObj.generateSuggestions) {
|
|
599
|
-
options.generateSuggestions = true;
|
|
600
|
-
}
|
|
601
|
-
const summary = await (0, evaluator_1.evaluate)(testSuite, {
|
|
602
|
-
...options,
|
|
603
|
-
eventSource: 'cli',
|
|
604
|
-
});
|
|
605
|
-
const shareableUrl = cmdObj.share && config.sharing ? await (0, share_1.createShareableUrl)(summary, config) : null;
|
|
606
|
-
if (cmdObj.table && (0, logger_1.getLogLevel)() !== 'debug') {
|
|
607
|
-
// Output CLI table
|
|
608
|
-
const table = (0, table_1.generateTable)(summary, parseInt(cmdObj.tableCellMaxLength || '', 10));
|
|
609
|
-
logger_1.default.info('\n' + table.toString());
|
|
610
|
-
if (summary.table.body.length > 25) {
|
|
611
|
-
const rowsLeft = summary.table.body.length - 25;
|
|
612
|
-
logger_1.default.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
|
|
613
|
-
}
|
|
614
|
-
}
|
|
615
|
-
else if (summary.stats.failures !== 0) {
|
|
616
|
-
logger_1.default.debug(`At least one evaluation failure occurred. This might be caused by the underlying call to the provider, or a test failure. Context: \n${JSON.stringify(summary.results)}`);
|
|
617
|
-
}
|
|
618
|
-
await (0, util_1.migrateResultsFromFileSystemToDatabase)();
|
|
619
|
-
let evalId = null;
|
|
620
|
-
if (cmdObj.write) {
|
|
621
|
-
evalId = await (0, util_1.writeResultsToDatabase)(summary, config);
|
|
622
|
-
}
|
|
623
|
-
const { outputPath } = config;
|
|
624
|
-
if (outputPath) {
|
|
625
|
-
// Write output to file
|
|
626
|
-
if (typeof outputPath === 'string') {
|
|
627
|
-
await (0, util_1.writeOutput)(outputPath, evalId, summary, config, shareableUrl);
|
|
628
|
-
}
|
|
629
|
-
else if (Array.isArray(outputPath)) {
|
|
630
|
-
await (0, util_1.writeMultipleOutputs)(outputPath, evalId, summary, config, shareableUrl);
|
|
631
|
-
}
|
|
632
|
-
logger_1.default.info(chalk_1.default.yellow(`Writing output to ${outputPath}`));
|
|
633
|
-
}
|
|
634
|
-
telemetry_1.default.maybeShowNotice();
|
|
635
|
-
(0, util_1.printBorder)();
|
|
636
|
-
if (!cmdObj.write) {
|
|
637
|
-
logger_1.default.info(`${chalk_1.default.green('✔')} Evaluation complete`);
|
|
638
|
-
}
|
|
639
|
-
else {
|
|
640
|
-
if (shareableUrl) {
|
|
641
|
-
logger_1.default.info(`${chalk_1.default.green('✔')} Evaluation complete: ${shareableUrl}`);
|
|
642
|
-
}
|
|
643
|
-
else {
|
|
644
|
-
logger_1.default.info(`${chalk_1.default.green('✔')} Evaluation complete.\n`);
|
|
645
|
-
logger_1.default.info(`» Run ${chalk_1.default.greenBright.bold('promptfoo view')} to use the local web viewer`);
|
|
646
|
-
logger_1.default.info(`» Run ${chalk_1.default.greenBright.bold('promptfoo share')} to create a shareable URL`);
|
|
647
|
-
logger_1.default.info(`» This project needs your feedback. What's one thing we can improve? ${chalk_1.default.greenBright.bold('https://forms.gle/YFLgTe1dKJKNSCsU7')}`);
|
|
648
|
-
}
|
|
649
|
-
}
|
|
650
|
-
(0, util_1.printBorder)();
|
|
651
|
-
logger_1.default.info(chalk_1.default.green.bold(`Successes: ${summary.stats.successes}`));
|
|
652
|
-
logger_1.default.info(chalk_1.default.red.bold(`Failures: ${summary.stats.failures}`));
|
|
653
|
-
logger_1.default.info(`Token usage: Total ${summary.stats.tokenUsage.total}, Prompt ${summary.stats.tokenUsage.prompt}, Completion ${summary.stats.tokenUsage.completion}, Cached ${summary.stats.tokenUsage.cached}`);
|
|
654
|
-
telemetry_1.default.record('command_used', {
|
|
655
|
-
name: 'eval',
|
|
656
|
-
watch: Boolean(cmdObj.watch),
|
|
657
|
-
duration: Math.round((Date.now() - startTime) / 1000),
|
|
658
|
-
});
|
|
659
|
-
await telemetry_1.default.send();
|
|
660
|
-
if (cmdObj.watch) {
|
|
661
|
-
if (initialization) {
|
|
662
|
-
const configPaths = (cmdObj.config || [defaultConfigPath]).filter(Boolean);
|
|
663
|
-
if (!configPaths.length) {
|
|
664
|
-
logger_1.default.error('Could not locate config file(s) to watch');
|
|
665
|
-
process.exit(1);
|
|
666
|
-
}
|
|
667
|
-
const basePath = path_1.default.dirname(configPaths[0]);
|
|
668
|
-
const promptPaths = Array.isArray(config.prompts)
|
|
669
|
-
? config.prompts
|
|
670
|
-
.map((p) => {
|
|
671
|
-
if (typeof p === 'string' && p.startsWith('file://')) {
|
|
672
|
-
return path_1.default.resolve(basePath, p.slice('file://'.length));
|
|
673
|
-
}
|
|
674
|
-
else if (typeof p === 'object' && p.id && p.id.startsWith('file://')) {
|
|
675
|
-
return path_1.default.resolve(basePath, p.id.slice('file://'.length));
|
|
676
|
-
}
|
|
677
|
-
return null;
|
|
678
|
-
})
|
|
679
|
-
.filter(Boolean)
|
|
680
|
-
: [];
|
|
681
|
-
const providerPaths = Array.isArray(config.providers)
|
|
682
|
-
? config.providers
|
|
683
|
-
.map((p) => typeof p === 'string' && p.startsWith('file://')
|
|
684
|
-
? path_1.default.resolve(basePath, p.slice('file://'.length))
|
|
685
|
-
: null)
|
|
686
|
-
.filter(Boolean)
|
|
687
|
-
: [];
|
|
688
|
-
const varPaths = Array.isArray(config.tests)
|
|
689
|
-
? config.tests
|
|
690
|
-
.flatMap((t) => {
|
|
691
|
-
if (typeof t === 'string' && t.startsWith('file://')) {
|
|
692
|
-
return path_1.default.resolve(basePath, t.slice('file://'.length));
|
|
693
|
-
}
|
|
694
|
-
else if (typeof t !== 'string' && t.vars) {
|
|
695
|
-
return Object.values(t.vars).flatMap((v) => {
|
|
696
|
-
if (typeof v === 'string' && v.startsWith('file://')) {
|
|
697
|
-
return path_1.default.resolve(basePath, v.slice('file://'.length));
|
|
698
|
-
}
|
|
699
|
-
return [];
|
|
700
|
-
});
|
|
701
|
-
}
|
|
702
|
-
return [];
|
|
703
|
-
})
|
|
704
|
-
.filter(Boolean)
|
|
705
|
-
: [];
|
|
706
|
-
const watchPaths = Array.from(new Set([...configPaths, ...promptPaths, ...providerPaths, ...varPaths]));
|
|
707
|
-
const watcher = chokidar_1.default.watch(watchPaths, { ignored: /^\./, persistent: true });
|
|
708
|
-
watcher
|
|
709
|
-
.on('change', async (path) => {
|
|
710
|
-
(0, util_1.printBorder)();
|
|
711
|
-
logger_1.default.info(`File change detected: ${path}`);
|
|
712
|
-
(0, util_1.printBorder)();
|
|
713
|
-
await runEvaluation();
|
|
714
|
-
})
|
|
715
|
-
.on('error', (error) => logger_1.default.error(`Watcher error: ${error}`))
|
|
716
|
-
.on('ready', () => watchPaths.forEach((watchPath) => logger_1.default.info(`Watching for file changes on ${watchPath} ...`)));
|
|
717
|
-
}
|
|
718
|
-
}
|
|
719
|
-
else {
|
|
720
|
-
logger_1.default.info('Done.');
|
|
721
|
-
if (summary.stats.failures > 0) {
|
|
722
|
-
const exitCode = Number(process.env.PROMPTFOO_FAILED_TEST_EXIT_CODE);
|
|
723
|
-
process.exit(isNaN(exitCode) ? 100 : exitCode);
|
|
724
|
-
}
|
|
725
|
-
}
|
|
726
|
-
};
|
|
727
|
-
await runEvaluation(true /* initialization */);
|
|
728
|
-
});
|
|
729
|
-
(0, list_1.listCommand)(program);
|
|
730
|
-
(0, show_1.showCommand)(program);
|
|
170
|
+
(0, config_1.configCommand)(program);
|
|
731
171
|
(0, delete_1.deleteCommand)(program);
|
|
732
|
-
(0,
|
|
172
|
+
(0, eval_1.evalCommand)(program, defaultConfig, defaultConfigPath, evaluateOptions);
|
|
733
173
|
(0, export_1.exportCommand)(program);
|
|
734
|
-
(0,
|
|
174
|
+
(0, generate_1.generateCommand)(program, defaultConfig, defaultConfigPath);
|
|
175
|
+
(0, import_1.importCommand)(program);
|
|
176
|
+
(0, list_1.listCommand)(program);
|
|
177
|
+
(0, redteam_1.redteamCommand)(program);
|
|
178
|
+
(0, show_1.showCommand)(program);
|
|
735
179
|
program.parse(process.argv);
|
|
736
180
|
if (!process.argv.slice(2).length) {
|
|
737
181
|
program.outputHelp();
|