promptfoo 0.69.2 → 0.70.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. package/dist/package.json +1 -1
  2. package/dist/src/assertions.d.ts +5 -9
  3. package/dist/src/assertions.d.ts.map +1 -1
  4. package/dist/src/assertions.js +12 -9
  5. package/dist/src/assertions.js.map +1 -1
  6. package/dist/src/commands/eval.d.ts +5 -0
  7. package/dist/src/commands/eval.d.ts.map +1 -0
  8. package/dist/src/commands/eval.js +288 -0
  9. package/dist/src/commands/eval.js.map +1 -0
  10. package/dist/src/commands/generate.d.ts +20 -0
  11. package/dist/src/commands/generate.d.ts.map +1 -0
  12. package/dist/src/commands/generate.js +215 -0
  13. package/dist/src/commands/generate.js.map +1 -0
  14. package/dist/src/commands/redteam.d.ts +10 -0
  15. package/dist/src/commands/redteam.d.ts.map +1 -0
  16. package/dist/src/commands/redteam.js +191 -0
  17. package/dist/src/commands/redteam.js.map +1 -0
  18. package/dist/src/config.d.ts +17 -0
  19. package/dist/src/config.d.ts.map +1 -0
  20. package/dist/src/config.js +424 -0
  21. package/dist/src/config.js.map +1 -0
  22. package/dist/src/evaluator.d.ts.map +1 -1
  23. package/dist/src/evaluator.js +12 -10
  24. package/dist/src/evaluator.js.map +1 -1
  25. package/dist/src/main.js +13 -569
  26. package/dist/src/main.js.map +1 -1
  27. package/dist/src/providers/pythonCompletion.d.ts +1 -0
  28. package/dist/src/providers/pythonCompletion.d.ts.map +1 -1
  29. package/dist/src/providers/pythonCompletion.js +10 -3
  30. package/dist/src/providers/pythonCompletion.js.map +1 -1
  31. package/dist/src/redteam/constants.d.ts +5 -0
  32. package/dist/src/redteam/constants.d.ts.map +1 -1
  33. package/dist/src/redteam/constants.js +53 -1
  34. package/dist/src/redteam/constants.js.map +1 -1
  35. package/dist/src/redteam/index.d.ts +0 -2
  36. package/dist/src/redteam/index.d.ts.map +1 -1
  37. package/dist/src/redteam/index.js +24 -40
  38. package/dist/src/redteam/index.js.map +1 -1
  39. package/dist/src/redteam/iterative.d.ts +1 -1
  40. package/dist/src/redteam/iterative.js +1 -1
  41. package/dist/src/redteam/iterative.js.map +1 -1
  42. package/dist/src/redteam/iterativeImage.d.ts +1 -1
  43. package/dist/src/redteam/iterativeImage.js +1 -1
  44. package/dist/src/redteam/iterativeImage.js.map +1 -1
  45. package/dist/src/redteam/plugins/base.d.ts +35 -0
  46. package/dist/src/redteam/plugins/base.d.ts.map +1 -0
  47. package/dist/src/redteam/plugins/base.js +48 -0
  48. package/dist/src/redteam/plugins/base.js.map +1 -0
  49. package/dist/src/redteam/plugins/competitors.d.ts +10 -0
  50. package/dist/src/redteam/plugins/competitors.d.ts.map +1 -0
  51. package/dist/src/redteam/plugins/competitors.js +47 -0
  52. package/dist/src/redteam/plugins/competitors.js.map +1 -0
  53. package/dist/src/redteam/plugins/contracts.d.ts +10 -0
  54. package/dist/src/redteam/plugins/contracts.d.ts.map +1 -0
  55. package/dist/src/redteam/plugins/contracts.js +47 -0
  56. package/dist/src/redteam/plugins/contracts.js.map +1 -0
  57. package/dist/src/redteam/plugins/excessiveAgency.d.ts +10 -0
  58. package/dist/src/redteam/plugins/excessiveAgency.d.ts.map +1 -0
  59. package/dist/src/redteam/plugins/excessiveAgency.js +42 -0
  60. package/dist/src/redteam/plugins/excessiveAgency.js.map +1 -0
  61. package/dist/src/redteam/plugins/hallucination.d.ts +10 -0
  62. package/dist/src/redteam/plugins/hallucination.d.ts.map +1 -0
  63. package/dist/src/redteam/plugins/hallucination.js +43 -0
  64. package/dist/src/redteam/plugins/hallucination.js.map +1 -0
  65. package/dist/src/redteam/{getHarmfulTests.d.ts → plugins/harmful.d.ts} +9 -9
  66. package/dist/src/redteam/plugins/harmful.d.ts.map +1 -0
  67. package/dist/src/redteam/{getHarmfulTests.js → plugins/harmful.js} +2 -2
  68. package/dist/src/redteam/plugins/harmful.js.map +1 -0
  69. package/dist/src/redteam/plugins/hijacking.d.ts +10 -0
  70. package/dist/src/redteam/plugins/hijacking.d.ts.map +1 -0
  71. package/dist/src/redteam/plugins/hijacking.js +47 -0
  72. package/dist/src/redteam/plugins/hijacking.js.map +1 -0
  73. package/dist/src/redteam/plugins/overreliance.d.ts +10 -0
  74. package/dist/src/redteam/plugins/overreliance.d.ts.map +1 -0
  75. package/dist/src/redteam/plugins/overreliance.js +42 -0
  76. package/dist/src/redteam/plugins/overreliance.js.map +1 -0
  77. package/dist/src/redteam/{getPiiTests.d.ts → plugins/pii.d.ts} +2 -2
  78. package/dist/src/redteam/plugins/pii.d.ts.map +1 -0
  79. package/dist/src/redteam/{getPiiTests.js → plugins/pii.js} +2 -2
  80. package/dist/src/redteam/plugins/pii.js.map +1 -0
  81. package/dist/src/redteam/plugins/politics.d.ts +10 -0
  82. package/dist/src/redteam/plugins/politics.d.ts.map +1 -0
  83. package/dist/src/redteam/plugins/politics.js +57 -0
  84. package/dist/src/redteam/plugins/politics.js.map +1 -0
  85. package/dist/src/testCases.d.ts.map +1 -1
  86. package/dist/src/testCases.js +3 -0
  87. package/dist/src/testCases.js.map +1 -1
  88. package/dist/src/types.d.ts +15 -3
  89. package/dist/src/types.d.ts.map +1 -1
  90. package/dist/src/types.js +9 -2
  91. package/dist/src/types.js.map +1 -1
  92. package/dist/src/util.d.ts +0 -10
  93. package/dist/src/util.d.ts.map +1 -1
  94. package/dist/src/util.js +2 -246
  95. package/dist/src/util.js.map +1 -1
  96. package/dist/src/web/nextui/404/index.html +1 -1
  97. package/dist/src/web/nextui/404.html +1 -1
  98. package/dist/src/web/nextui/_next/static/chunks/858-5d3a3678769b7e36.js +1 -1
  99. package/dist/src/web/nextui/_next/static/chunks/954-c35d4864ecbacd62.js +6 -0
  100. package/dist/src/web/nextui/_next/static/chunks/app/auth/login/{page-ee73165dd261f3ca.js → page-6fcc9431205718c7.js} +1 -1
  101. package/dist/src/web/nextui/_next/static/chunks/app/auth/signup/{page-7375a6707eb8675e.js → page-8caf49a834d34420.js} +1 -1
  102. package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-4f93aacd25866d60.js +1 -0
  103. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-ce320e6d1e6d1d23.js +1 -0
  104. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/{page-310e2e58179970fa.js → page-3c5a944373865122.js} +1 -1
  105. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-73e894c39cc191f1.js +1 -0
  106. package/dist/src/web/nextui/_next/static/chunks/app/{layout-6b3048b719443145.js → layout-2038906de6c19565.js} +1 -1
  107. package/dist/src/web/nextui/_next/static/chunks/app/{page-251d4ea0ac894cd9.js → page-e07a0ddbf3d6e21c.js} +1 -1
  108. package/dist/src/web/nextui/_next/static/chunks/app/progress/page-73442c531d579c51.js +1 -0
  109. package/dist/src/web/nextui/_next/static/chunks/app/prompts/{page-6d29c01079a556f4.js → page-50e27c24c9e255bd.js} +1 -1
  110. package/dist/src/web/nextui/_next/static/chunks/app/report/{page-477181752ee9b493.js → page-be00cf77531ce9cb.js} +1 -1
  111. package/dist/src/web/nextui/_next/static/chunks/app/setup/{page-5a4d6156d3c83470.js → page-26cb5d2478fdbd34.js} +1 -1
  112. package/dist/src/web/nextui/_next/static/chunks/{main-app-345c3eca7e5cf432.js → main-app-929a26b3c8cd3f7a.js} +1 -1
  113. package/dist/src/web/nextui/_next/static/chunks/{webpack-c9f728822666f852.js → webpack-8a9bc9ee0defb756.js} +1 -1
  114. package/dist/src/web/nextui/_next/static/css/106779eb64615639.css +1 -0
  115. package/dist/src/web/nextui/auth/login/index.html +1 -1
  116. package/dist/src/web/nextui/auth/login/index.txt +6 -6
  117. package/dist/src/web/nextui/auth/signup/index.html +1 -1
  118. package/dist/src/web/nextui/auth/signup/index.txt +6 -6
  119. package/dist/src/web/nextui/datasets/index.html +1 -1
  120. package/dist/src/web/nextui/datasets/index.txt +6 -6
  121. package/dist/src/web/nextui/eval/index.html +1 -1
  122. package/dist/src/web/nextui/eval/index.txt +6 -6
  123. package/dist/src/web/nextui/index.html +1 -1
  124. package/dist/src/web/nextui/index.txt +5 -5
  125. package/dist/src/web/nextui/progress/index.html +1 -1
  126. package/dist/src/web/nextui/progress/index.txt +6 -6
  127. package/dist/src/web/nextui/prompts/index.html +1 -1
  128. package/dist/src/web/nextui/prompts/index.txt +6 -6
  129. package/dist/src/web/nextui/report/index.html +1 -1
  130. package/dist/src/web/nextui/report/index.txt +6 -6
  131. package/dist/src/web/nextui/setup/index.html +2 -2
  132. package/dist/src/web/nextui/setup/index.txt +7 -7
  133. package/package.json +1 -1
  134. package/dist/src/redteam/getCompetitorTests.d.ts +0 -3
  135. package/dist/src/redteam/getCompetitorTests.d.ts.map +0 -1
  136. package/dist/src/redteam/getCompetitorTests.js +0 -60
  137. package/dist/src/redteam/getCompetitorTests.js.map +0 -1
  138. package/dist/src/redteam/getHallucinationTests.d.ts +0 -3
  139. package/dist/src/redteam/getHallucinationTests.d.ts.map +0 -1
  140. package/dist/src/redteam/getHallucinationTests.js +0 -56
  141. package/dist/src/redteam/getHallucinationTests.js.map +0 -1
  142. package/dist/src/redteam/getHarmfulTests.d.ts.map +0 -1
  143. package/dist/src/redteam/getHarmfulTests.js.map +0 -1
  144. package/dist/src/redteam/getHijackingTests.d.ts +0 -3
  145. package/dist/src/redteam/getHijackingTests.d.ts.map +0 -1
  146. package/dist/src/redteam/getHijackingTests.js +0 -60
  147. package/dist/src/redteam/getHijackingTests.js.map +0 -1
  148. package/dist/src/redteam/getOverconfidenceTests.d.ts +0 -3
  149. package/dist/src/redteam/getOverconfidenceTests.d.ts.map +0 -1
  150. package/dist/src/redteam/getOverconfidenceTests.js +0 -55
  151. package/dist/src/redteam/getOverconfidenceTests.js.map +0 -1
  152. package/dist/src/redteam/getPiiTests.d.ts.map +0 -1
  153. package/dist/src/redteam/getPiiTests.js.map +0 -1
  154. package/dist/src/redteam/getPoliticalStatementsTests.d.ts +0 -3
  155. package/dist/src/redteam/getPoliticalStatementsTests.d.ts.map +0 -1
  156. package/dist/src/redteam/getPoliticalStatementsTests.js +0 -70
  157. package/dist/src/redteam/getPoliticalStatementsTests.js.map +0 -1
  158. package/dist/src/redteam/getUnderconfidenceTests.d.ts +0 -3
  159. package/dist/src/redteam/getUnderconfidenceTests.d.ts.map +0 -1
  160. package/dist/src/redteam/getUnderconfidenceTests.js +0 -55
  161. package/dist/src/redteam/getUnderconfidenceTests.js.map +0 -1
  162. package/dist/src/redteam/getUnintendedContractTests.d.ts +0 -3
  163. package/dist/src/redteam/getUnintendedContractTests.d.ts.map +0 -1
  164. package/dist/src/redteam/getUnintendedContractTests.js +0 -60
  165. package/dist/src/redteam/getUnintendedContractTests.js.map +0 -1
  166. package/dist/src/web/nextui/_next/static/chunks/954-58788165fb1e9563.js +0 -6
  167. package/dist/src/web/nextui/_next/static/chunks/app/datasets/page-c11cfb1b2c58325f.js +0 -1
  168. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-50073ee4b153b82b.js +0 -1
  169. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-87d1e9bc26842e95.js +0 -1
  170. package/dist/src/web/nextui/_next/static/chunks/app/progress/page-15df1d043dee2f17.js +0 -1
  171. package/dist/src/web/nextui/_next/static/css/5bd2f45de1f3ba83.css +0 -1
  172. /package/dist/src/web/nextui/_next/static/{_4HZa8ihrRiRqQU13EScL → ENNANMoEha-uMGFo0DvzO}/_buildManifest.js +0 -0
  173. /package/dist/src/web/nextui/_next/static/{_4HZa8ihrRiRqQU13EScL → ENNANMoEha-uMGFo0DvzO}/_ssgManifest.js +0 -0
  174. /package/dist/src/web/nextui/_next/static/chunks/{2-57ab5e84907f795a.js → 2-671ad31c05d2c976.js} +0 -0
package/dist/src/main.js CHANGED
@@ -1,207 +1,35 @@
1
1
  #!/usr/bin/env node
2
2
  "use strict";
3
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
4
- if (k2 === undefined) k2 = k;
5
- var desc = Object.getOwnPropertyDescriptor(m, k);
6
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
7
- desc = { enumerable: true, get: function() { return m[k]; } };
8
- }
9
- Object.defineProperty(o, k2, desc);
10
- }) : (function(o, m, k, k2) {
11
- if (k2 === undefined) k2 = k;
12
- o[k2] = m[k];
13
- }));
14
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
15
- Object.defineProperty(o, "default", { enumerable: true, value: v });
16
- }) : function(o, v) {
17
- o["default"] = v;
18
- });
19
- var __importStar = (this && this.__importStar) || function (mod) {
20
- if (mod && mod.__esModule) return mod;
21
- var result = {};
22
- if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
23
- __setModuleDefault(result, mod);
24
- return result;
25
- };
26
3
  var __importDefault = (this && this.__importDefault) || function (mod) {
27
4
  return (mod && mod.__esModule) ? mod : { "default": mod };
28
5
  };
29
6
  Object.defineProperty(exports, "__esModule", { value: true });
30
7
  const chalk_1 = __importDefault(require("chalk"));
31
- const chokidar_1 = __importDefault(require("chokidar"));
32
8
  const commander_1 = require("commander");
33
- const dedent_1 = __importDefault(require("dedent"));
34
9
  const fs_1 = __importDefault(require("fs"));
35
- const js_yaml_1 = __importDefault(require("js-yaml"));
36
10
  const path_1 = __importDefault(require("path"));
37
11
  const readline_1 = __importDefault(require("readline"));
38
- const tiny_invariant_1 = __importDefault(require("tiny-invariant"));
39
- const assertions_1 = require("./assertions");
40
- const validateAssertions_1 = require("./assertions/validateAssertions");
41
12
  const cache_1 = require("./cache");
42
13
  const checkNodeVersion_1 = require("./checkNodeVersion");
43
- const cliState_1 = __importDefault(require("./cliState"));
44
14
  const config_1 = require("./commands/config");
45
15
  const delete_1 = require("./commands/delete");
46
- const filterProviders_1 = require("./commands/eval/filterProviders");
47
- const filterTests_1 = require("./commands/eval/filterTests");
16
+ const eval_1 = require("./commands/eval");
48
17
  const export_1 = require("./commands/export");
18
+ const generate_1 = require("./commands/generate");
49
19
  const import_1 = require("./commands/import");
50
20
  const list_1 = require("./commands/list");
21
+ const redteam_1 = require("./commands/redteam");
51
22
  const show_1 = require("./commands/show");
23
+ const config_2 = require("./config");
52
24
  const esm_1 = require("./esm");
53
- const evaluator_1 = require("./evaluator");
54
25
  const feedback_1 = require("./feedback");
55
- const logger_1 = __importStar(require("./logger"));
26
+ const logger_1 = __importDefault(require("./logger"));
56
27
  const onboarding_1 = require("./onboarding");
57
- const prompts_1 = require("./prompts");
58
- const providers_1 = require("./providers");
59
- const redteam_1 = require("./redteam");
60
- const constants_1 = require("./redteam/constants");
61
28
  const share_1 = require("./share");
62
- const table_1 = require("./table");
63
29
  const telemetry_1 = __importDefault(require("./telemetry"));
64
- const testCases_1 = require("./testCases");
65
- const types_1 = require("./types");
66
30
  const updates_1 = require("./updates");
67
31
  const util_1 = require("./util");
68
32
  const server_1 = require("./web/server");
69
- async function resolveConfigs(cmdObj, defaultConfig) {
70
- // Config parsing
71
- let fileConfig = {};
72
- const configPaths = cmdObj.config;
73
- if (configPaths) {
74
- fileConfig = await (0, util_1.readConfigs)(configPaths);
75
- }
76
- // Standalone assertion mode
77
- if (cmdObj.assertions) {
78
- if (!cmdObj.modelOutputs) {
79
- logger_1.default.error(chalk_1.default.red('You must provide --model-outputs when using --assertions'));
80
- process.exit(1);
81
- }
82
- const modelOutputs = JSON.parse(fs_1.default.readFileSync(path_1.default.join(process.cwd(), cmdObj.modelOutputs), 'utf8'));
83
- const assertions = await (0, assertions_1.readAssertions)(cmdObj.assertions);
84
- fileConfig.prompts = ['{{output}}'];
85
- fileConfig.providers = ['echo'];
86
- fileConfig.tests = modelOutputs.map((output) => {
87
- if (typeof output === 'string') {
88
- return {
89
- vars: {
90
- output,
91
- },
92
- assert: assertions,
93
- };
94
- }
95
- return {
96
- vars: {
97
- output: output.output,
98
- ...(output.tags === undefined ? {} : { tags: output.tags.join(', ') }),
99
- },
100
- assert: assertions,
101
- };
102
- });
103
- }
104
- // Use basepath in cases where path was supplied in the config file
105
- const basePath = configPaths ? path_1.default.dirname(configPaths[0]) : '';
106
- const defaultTestRaw = fileConfig.defaultTest || defaultConfig.defaultTest;
107
- const config = {
108
- description: fileConfig.description || defaultConfig.description,
109
- prompts: cmdObj.prompts || fileConfig.prompts || defaultConfig.prompts || [],
110
- providers: cmdObj.providers || fileConfig.providers || defaultConfig.providers || [],
111
- tests: cmdObj.tests || cmdObj.vars || fileConfig.tests || defaultConfig.tests || [],
112
- scenarios: fileConfig.scenarios || defaultConfig.scenarios,
113
- env: fileConfig.env || defaultConfig.env,
114
- sharing: process.env.PROMPTFOO_DISABLE_SHARING === '1'
115
- ? false
116
- : fileConfig.sharing ?? defaultConfig.sharing ?? true,
117
- defaultTest: defaultTestRaw ? await (0, testCases_1.readTest)(defaultTestRaw, basePath) : undefined,
118
- derivedMetrics: fileConfig.derivedMetrics || defaultConfig.derivedMetrics,
119
- outputPath: cmdObj.output || fileConfig.outputPath || defaultConfig.outputPath,
120
- metadata: fileConfig.metadata || defaultConfig.metadata,
121
- };
122
- // Validation
123
- if (!config.prompts || config.prompts.length === 0) {
124
- logger_1.default.error(chalk_1.default.red('You must provide at least 1 prompt'));
125
- process.exit(1);
126
- }
127
- if (!config.providers || config.providers.length === 0) {
128
- logger_1.default.error(chalk_1.default.red('You must specify at least 1 provider (for example, openai:gpt-4o)'));
129
- process.exit(1);
130
- }
131
- (0, tiny_invariant_1.default)(Array.isArray(config.providers), 'providers must be an array');
132
- config.providers.forEach((provider) => {
133
- const result = types_1.ProviderSchema.safeParse(provider);
134
- if (!result.success) {
135
- const errors = result.error.errors
136
- .map((err) => {
137
- return `- ${err.message}`;
138
- })
139
- .join('\n');
140
- const providerString = typeof provider === 'string' ? provider : JSON.stringify(provider);
141
- logger_1.default.warn(chalk_1.default.yellow((0, dedent_1.default) `
142
- Provider: ${providerString} encountered errors during schema validation:
143
-
144
- ${errors}
145
-
146
- Please double check your configuration.` + '\n'));
147
- }
148
- });
149
- // Parse prompts, providers, and tests
150
- const parsedPrompts = await (0, prompts_1.readPrompts)(config.prompts, cmdObj.prompts ? undefined : basePath);
151
- const parsedProviders = await (0, providers_1.loadApiProviders)(config.providers, {
152
- env: config.env,
153
- basePath,
154
- });
155
- const parsedTests = await (0, testCases_1.readTests)(config.tests || [], cmdObj.tests ? undefined : basePath);
156
- // Parse testCases for each scenario
157
- if (fileConfig.scenarios) {
158
- for (const scenario of fileConfig.scenarios) {
159
- const parsedScenarioTests = await (0, testCases_1.readTests)(scenario.tests, cmdObj.tests ? undefined : basePath);
160
- scenario.tests = parsedScenarioTests;
161
- const filteredTests = await (0, filterTests_1.filterTests)({
162
- ...scenario,
163
- providers: parsedProviders,
164
- prompts: parsedPrompts,
165
- }, {
166
- firstN: cmdObj.filterFirstN,
167
- pattern: cmdObj.filterPattern,
168
- failing: cmdObj.filterFailing,
169
- });
170
- (0, tiny_invariant_1.default)(filteredTests, 'filteredTests are undefined');
171
- scenario.tests = filteredTests;
172
- }
173
- }
174
- const parsedProviderPromptMap = (0, prompts_1.readProviderPromptMap)(config, parsedPrompts);
175
- if (parsedPrompts.length === 0) {
176
- logger_1.default.error(chalk_1.default.red('No prompts found'));
177
- process.exit(1);
178
- }
179
- const defaultTest = {
180
- options: {
181
- prefix: cmdObj.promptPrefix,
182
- suffix: cmdObj.promptSuffix,
183
- provider: cmdObj.grader,
184
- // rubricPrompt
185
- ...(config.defaultTest?.options || {}),
186
- },
187
- ...config.defaultTest,
188
- };
189
- const testSuite = {
190
- description: config.description,
191
- prompts: parsedPrompts,
192
- providers: parsedProviders,
193
- providerPromptMap: parsedProviderPromptMap,
194
- tests: parsedTests,
195
- scenarios: config.scenarios,
196
- defaultTest,
197
- derivedMetrics: config.derivedMetrics,
198
- nunjucksFilters: await (0, util_1.readFilters)(fileConfig.nunjucksFilters || defaultConfig.nunjucksFilters || {}),
199
- };
200
- if (testSuite.tests) {
201
- (0, validateAssertions_1.validateAssertions)(testSuite.tests);
202
- }
203
- return { config, testSuite, basePath };
204
- }
205
33
  async function main() {
206
34
  await (0, updates_1.checkForUpdates)();
207
35
  const pwd = process.cwd();
@@ -214,7 +42,7 @@ async function main() {
214
42
  let defaultConfig = {};
215
43
  let defaultConfigPath;
216
44
  for (const _path of potentialPaths) {
217
- const maybeConfig = await (0, util_1.maybeReadConfig)(_path);
45
+ const maybeConfig = await (0, config_2.maybeReadConfig)(_path);
218
46
  if (maybeConfig) {
219
47
  defaultConfig = maybeConfig;
220
48
  defaultConfigPath = _path;
@@ -339,399 +167,15 @@ async function main() {
339
167
  .action((message) => {
340
168
  (0, feedback_1.gatherFeedback)(message);
341
169
  });
342
- const generateCommand = program.command('generate').description('Generate synthetic data');
343
- generateCommand
344
- .command('dataset')
345
- .description('Generate test cases')
346
- .option('-i, --instructions [instructions]', 'Additional instructions to follow while generating test cases')
347
- .option('-c, --config [path]', 'Path to configuration file. Defaults to promptfooconfig.yaml')
348
- .option('-o, --output [path]', 'Path to output file')
349
- .option('-w, --write', 'Write results to promptfoo configuration file')
350
- .option('--numPersonas <number>', 'Number of personas to generate', '5')
351
- .option('--numTestCasesPerPersona <number>', 'Number of test cases per persona', '3')
352
- .option('--no-cache', 'Do not read or write results to disk cache', false)
353
- .option('--env-file <path>', 'Path to .env file')
354
- .action(async (options) => {
355
- (0, util_1.setupEnv)(options.envFile);
356
- if (!options.cache) {
357
- logger_1.default.info('Cache is disabled.');
358
- (0, cache_1.disableCache)();
359
- }
360
- let testSuite;
361
- const configPath = options.config || defaultConfigPath;
362
- if (configPath) {
363
- const resolved = await resolveConfigs({
364
- config: [configPath],
365
- }, defaultConfig);
366
- testSuite = resolved.testSuite;
367
- }
368
- else {
369
- throw new Error('Could not find config file. Please use `--config`');
370
- }
371
- const startTime = Date.now();
372
- telemetry_1.default.record('command_used', {
373
- name: 'generate_dataset - started',
374
- numPrompts: testSuite.prompts.length,
375
- numTestsExisting: (testSuite.tests || []).length,
376
- });
377
- await telemetry_1.default.send();
378
- const results = await (0, testCases_1.synthesizeFromTestSuite)(testSuite, {
379
- instructions: options.instructions,
380
- numPersonas: parseInt(options.numPersonas, 10),
381
- numTestCasesPerPersona: parseInt(options.numTestCasesPerPersona, 10),
382
- });
383
- const configAddition = { tests: results.map((result) => ({ vars: result })) };
384
- const yamlString = js_yaml_1.default.dump(configAddition);
385
- if (options.output) {
386
- fs_1.default.writeFileSync(options.output, yamlString);
387
- (0, util_1.printBorder)();
388
- logger_1.default.info(`Wrote ${results.length} new test cases to ${options.output}`);
389
- (0, util_1.printBorder)();
390
- }
391
- else {
392
- (0, util_1.printBorder)();
393
- logger_1.default.info('New test Cases');
394
- (0, util_1.printBorder)();
395
- logger_1.default.info(yamlString);
396
- }
397
- (0, util_1.printBorder)();
398
- if (options.write && configPath) {
399
- const existingConfig = js_yaml_1.default.load(fs_1.default.readFileSync(configPath, 'utf8'));
400
- existingConfig.tests = [...(existingConfig.tests || []), ...configAddition.tests];
401
- fs_1.default.writeFileSync(configPath, js_yaml_1.default.dump(existingConfig));
402
- logger_1.default.info(`Wrote ${results.length} new test cases to ${configPath}`);
403
- }
404
- else {
405
- logger_1.default.info(`Copy the above test cases or run ${chalk_1.default.greenBright('promptfoo generate dataset --write')} to write directly to the config`);
406
- }
407
- telemetry_1.default.record('command_used', {
408
- name: 'generate_dataset',
409
- numPrompts: testSuite.prompts.length,
410
- numTestsExisting: (testSuite.tests || []).length,
411
- numTestsGenerated: results.length,
412
- duration: Math.round((Date.now() - startTime) / 1000),
413
- });
414
- await telemetry_1.default.send();
415
- });
416
- generateCommand
417
- .command('redteam')
418
- .description('Generate adversarial test cases')
419
- .option('-c, --config [path]', 'Path to configuration file. Defaults to promptfooconfig.yaml')
420
- .option('-o, --output [path]', 'Path to output file')
421
- .option('-w, --write', 'Write results to promptfoo configuration file')
422
- .option('--purpose <purpose>', 'Set the system purpose. If not set, the system purpose will be inferred from the config file')
423
- .option('--provider <provider>', `Provider to use for generating adversarial tests. Defaults to: ${constants_1.REDTEAM_MODEL}`)
424
- .option('--injectVar <varname>', 'Override the variable to inject user input into the prompt. If not set, the variable will default to {{query}}')
425
- .option('--plugins <plugins>', (0, dedent_1.default) `Comma-separated list of plugins to use. Defaults to:
426
- \n- ${Array.from(redteam_1.DEFAULT_PLUGINS).sort().join('\n- ')}\n\n
427
- `, (val) => val.split(',').map((x) => x.trim()))
428
- .option('--add-plugins <plugins>', (0, dedent_1.default) `Comma-separated list of plugins to run in addition to the default plugins:
429
- \n- ${redteam_1.ADDITIONAL_PLUGINS.sort().join('\n- ')}\n\n
430
- `, (val) => val.split(',').map((x) => x.trim()))
431
- .option('--no-cache', 'Do not read or write results to disk cache', false)
432
- .option('--env-file <path>', 'Path to .env file')
433
- .action(async ({ addPlugins, cache, config, envFile, injectVar, output, plugins, provider, purpose, write, }) => {
434
- (0, util_1.setupEnv)(envFile);
435
- if (!cache) {
436
- logger_1.default.info('Cache is disabled.');
437
- (0, cache_1.disableCache)();
438
- }
439
- let testSuite;
440
- const configPath = config || defaultConfigPath;
441
- if (configPath) {
442
- const resolved = await resolveConfigs({
443
- config: [configPath],
444
- }, defaultConfig);
445
- testSuite = resolved.testSuite;
446
- }
447
- else {
448
- throw new Error('Could not find config file. Please use `--config`');
449
- }
450
- const startTime = Date.now();
451
- telemetry_1.default.record('command_used', {
452
- name: 'generate redteam - started',
453
- numPrompts: testSuite.prompts.length,
454
- numTestsExisting: (testSuite.tests || []).length,
455
- });
456
- await telemetry_1.default.send();
457
- const redteamTests = await (0, redteam_1.synthesizeFromTestSuite)(testSuite, {
458
- purpose,
459
- injectVar,
460
- plugins: addPlugins && addPlugins.length > 0
461
- ? Array.from(plugins || redteam_1.DEFAULT_PLUGINS).concat(addPlugins)
462
- : plugins,
463
- provider,
464
- });
465
- if (output) {
466
- const existingYaml = js_yaml_1.default.load(fs_1.default.readFileSync(configPath, 'utf8'));
467
- const updatedYaml = {
468
- ...existingYaml,
469
- tests: redteamTests,
470
- metadata: {
471
- ...existingYaml.metadata,
472
- redteam: true,
473
- },
474
- };
475
- fs_1.default.writeFileSync(output, js_yaml_1.default.dump(updatedYaml, { skipInvalid: true }));
476
- (0, util_1.printBorder)();
477
- logger_1.default.info(`Wrote ${redteamTests.length} new test cases to ${output}`);
478
- (0, util_1.printBorder)();
479
- }
480
- else if (write && configPath) {
481
- const existingConfig = js_yaml_1.default.load(fs_1.default.readFileSync(configPath, 'utf8'));
482
- existingConfig.tests = [...(existingConfig.tests || []), ...redteamTests];
483
- fs_1.default.writeFileSync(configPath, js_yaml_1.default.dump(existingConfig));
484
- logger_1.default.info(`Wrote ${redteamTests.length} new test cases to ${configPath}`);
485
- }
486
- else {
487
- logger_1.default.info(js_yaml_1.default.dump(redteamTests, { skipInvalid: true }));
488
- }
489
- telemetry_1.default.record('command_used', {
490
- name: 'generate redteam',
491
- numPrompts: testSuite.prompts.length,
492
- numTestsExisting: (testSuite.tests || []).length,
493
- numTestsGenerated: redteamTests.length,
494
- duration: Math.round((Date.now() - startTime) / 1000),
495
- });
496
- await telemetry_1.default.send();
497
- });
498
- program
499
- .command('eval')
500
- .description('Evaluate prompts')
501
- .option('-p, --prompts <paths...>', 'Paths to prompt files (.txt)')
502
- .option('-r, --providers <name or path...>', 'One of: openai:chat, openai:completion, openai:<model name>, or path to custom API caller module')
503
- .option('-c, --config <paths...>', 'Path to configuration file. Automatically loads promptfooconfig.js/json/yaml')
504
- .option(
505
- // TODO(ian): Remove `vars` for v1
506
- '-v, --vars, -t, --tests <path>', 'Path to CSV with test cases', defaultConfig?.commandLineOptions?.vars)
507
- .option('-a, --assertions <path>', 'Path to assertions file')
508
- .option('--model-outputs <path>', 'Path to JSON containing list of LLM output strings')
509
- .option('-t, --tests <path>', 'Path to CSV with test cases')
510
- .option('-o, --output <paths...>', 'Path to output file (csv, txt, json, yaml, yml, html), default is no output file')
511
- .option('-j, --max-concurrency <number>', 'Maximum number of concurrent API calls', defaultConfig.evaluateOptions?.maxConcurrency
512
- ? String(defaultConfig.evaluateOptions.maxConcurrency)
513
- : `${evaluator_1.DEFAULT_MAX_CONCURRENCY}`)
514
- .option('--repeat <number>', 'Number of times to run each test', defaultConfig.evaluateOptions?.repeat ? String(defaultConfig.evaluateOptions.repeat) : '1')
515
- .option('--delay <number>', 'Delay between each test (in milliseconds)', defaultConfig.evaluateOptions?.delay ? String(defaultConfig.evaluateOptions.delay) : '0')
516
- .option('--table-cell-max-length <number>', 'Truncate console table cells to this length', '250')
517
- .option('--suggest-prompts <number>', 'Generate N new prompts and append them to the prompt list')
518
- .option('--prompt-prefix <path>', 'This prefix is prepended to every prompt', defaultConfig.defaultTest?.options?.prefix)
519
- .option('--prompt-suffix <path>', 'This suffix is append to every prompt', defaultConfig.defaultTest?.options?.suffix)
520
- .option('--no-write', 'Do not write results to promptfoo directory', defaultConfig?.commandLineOptions?.write)
521
- .option('--no-cache', 'Do not read or write results to disk cache',
522
- // TODO(ian): Remove commandLineOptions.cache in v1
523
- defaultConfig?.commandLineOptions?.cache ?? defaultConfig?.evaluateOptions?.cache)
524
- .option('--no-progress-bar', 'Do not show progress bar')
525
- .option('--table', 'Output table in CLI', defaultConfig?.commandLineOptions?.table ?? true)
526
- .option('--no-table', 'Do not output table in CLI', defaultConfig?.commandLineOptions?.table)
527
- .option('--share', 'Create a shareable URL', defaultConfig?.commandLineOptions?.share)
528
- .option('--grader <provider>', 'Model that will grade outputs', defaultConfig?.commandLineOptions?.grader)
529
- .option('--verbose', 'Show debug logs', defaultConfig?.commandLineOptions?.verbose)
530
- .option('-w, --watch', 'Watch for changes in config and re-run')
531
- .option('--env-file <path>', 'Path to .env file')
532
- .option('--interactive-providers', 'Run providers interactively, one at a time', defaultConfig?.evaluateOptions?.interactiveProviders)
533
- .option('-n, --filter-first-n <number>', 'Only run the first N tests')
534
- .option('--filter-pattern <pattern>', 'Only run tests whose description matches the regular expression pattern')
535
- .option('--filter-providers <providers>', 'Only run tests with these providers')
536
- .option('--filter-failing <path>', 'Path to json output file')
537
- .option('--var <key=value>', 'Set a variable in key=value format', (value, previous = {}) => {
538
- const [key, val] = value.split('=');
539
- if (!key || val === undefined) {
540
- throw new Error('--var must be specified in key=value format.');
541
- }
542
- previous[key] = val;
543
- return previous;
544
- }, {})
545
- .action(async (cmdObj) => {
546
- (0, util_1.setupEnv)(cmdObj.envFile);
547
- let config = undefined;
548
- let testSuite = undefined;
549
- let basePath = undefined;
550
- const runEvaluation = async (initialization) => {
551
- const startTime = Date.now();
552
- telemetry_1.default.record('command_used', {
553
- name: 'eval - started',
554
- watch: Boolean(cmdObj.watch),
555
- });
556
- await telemetry_1.default.send();
557
- // Misc settings
558
- if (cmdObj.verbose) {
559
- (0, logger_1.setLogLevel)('debug');
560
- }
561
- const iterations = parseInt(cmdObj.repeat || '', 10);
562
- const repeat = !isNaN(iterations) && iterations > 0 ? iterations : 1;
563
- if (!cmdObj.cache || repeat > 1) {
564
- logger_1.default.info('Cache is disabled.');
565
- (0, cache_1.disableCache)();
566
- }
567
- ({ config, testSuite, basePath } = await resolveConfigs(cmdObj, defaultConfig));
568
- cliState_1.default.basePath = basePath;
569
- let maxConcurrency = parseInt(cmdObj.maxConcurrency || '', 10);
570
- const delay = parseInt(cmdObj.delay || '', 0);
571
- if (delay > 0) {
572
- maxConcurrency = 1;
573
- logger_1.default.info(`Running at concurrency=1 because ${delay}ms delay was requested between API calls`);
574
- }
575
- testSuite.tests = await (0, filterTests_1.filterTests)(testSuite, {
576
- firstN: cmdObj.filterFirstN,
577
- pattern: cmdObj.filterPattern,
578
- failing: cmdObj.filterFailing,
579
- });
580
- testSuite.providers = (0, filterProviders_1.filterProviders)(testSuite.providers, cmdObj.filterProviders);
581
- const options = {
582
- showProgressBar: (0, logger_1.getLogLevel)() === 'debug' ? false : cmdObj.progressBar,
583
- maxConcurrency: !isNaN(maxConcurrency) && maxConcurrency > 0 ? maxConcurrency : undefined,
584
- repeat,
585
- delay: !isNaN(delay) && delay > 0 ? delay : undefined,
586
- interactiveProviders: cmdObj.interactiveProviders,
587
- ...evaluateOptions,
588
- };
589
- if (cmdObj.grader) {
590
- testSuite.defaultTest = testSuite.defaultTest || {};
591
- testSuite.defaultTest.options = testSuite.defaultTest.options || {};
592
- testSuite.defaultTest.options.provider = await (0, providers_1.loadApiProvider)(cmdObj.grader);
593
- }
594
- if (cmdObj.var) {
595
- testSuite.defaultTest = testSuite.defaultTest || {};
596
- testSuite.defaultTest.vars = { ...testSuite.defaultTest.vars, ...cmdObj.var };
597
- }
598
- if (cmdObj.generateSuggestions) {
599
- options.generateSuggestions = true;
600
- }
601
- const summary = await (0, evaluator_1.evaluate)(testSuite, {
602
- ...options,
603
- eventSource: 'cli',
604
- });
605
- const shareableUrl = cmdObj.share && config.sharing ? await (0, share_1.createShareableUrl)(summary, config) : null;
606
- if (cmdObj.table && (0, logger_1.getLogLevel)() !== 'debug') {
607
- // Output CLI table
608
- const table = (0, table_1.generateTable)(summary, parseInt(cmdObj.tableCellMaxLength || '', 10));
609
- logger_1.default.info('\n' + table.toString());
610
- if (summary.table.body.length > 25) {
611
- const rowsLeft = summary.table.body.length - 25;
612
- logger_1.default.info(`... ${rowsLeft} more row${rowsLeft === 1 ? '' : 's'} not shown ...\n`);
613
- }
614
- }
615
- else if (summary.stats.failures !== 0) {
616
- logger_1.default.debug(`At least one evaluation failure occurred. This might be caused by the underlying call to the provider, or a test failure. Context: \n${JSON.stringify(summary.results)}`);
617
- }
618
- await (0, util_1.migrateResultsFromFileSystemToDatabase)();
619
- let evalId = null;
620
- if (cmdObj.write) {
621
- evalId = await (0, util_1.writeResultsToDatabase)(summary, config);
622
- }
623
- const { outputPath } = config;
624
- if (outputPath) {
625
- // Write output to file
626
- if (typeof outputPath === 'string') {
627
- await (0, util_1.writeOutput)(outputPath, evalId, summary, config, shareableUrl);
628
- }
629
- else if (Array.isArray(outputPath)) {
630
- await (0, util_1.writeMultipleOutputs)(outputPath, evalId, summary, config, shareableUrl);
631
- }
632
- logger_1.default.info(chalk_1.default.yellow(`Writing output to ${outputPath}`));
633
- }
634
- telemetry_1.default.maybeShowNotice();
635
- (0, util_1.printBorder)();
636
- if (!cmdObj.write) {
637
- logger_1.default.info(`${chalk_1.default.green('✔')} Evaluation complete`);
638
- }
639
- else {
640
- if (shareableUrl) {
641
- logger_1.default.info(`${chalk_1.default.green('✔')} Evaluation complete: ${shareableUrl}`);
642
- }
643
- else {
644
- logger_1.default.info(`${chalk_1.default.green('✔')} Evaluation complete.\n`);
645
- logger_1.default.info(`» Run ${chalk_1.default.greenBright.bold('promptfoo view')} to use the local web viewer`);
646
- logger_1.default.info(`» Run ${chalk_1.default.greenBright.bold('promptfoo share')} to create a shareable URL`);
647
- logger_1.default.info(`» This project needs your feedback. What's one thing we can improve? ${chalk_1.default.greenBright.bold('https://forms.gle/YFLgTe1dKJKNSCsU7')}`);
648
- }
649
- }
650
- (0, util_1.printBorder)();
651
- logger_1.default.info(chalk_1.default.green.bold(`Successes: ${summary.stats.successes}`));
652
- logger_1.default.info(chalk_1.default.red.bold(`Failures: ${summary.stats.failures}`));
653
- logger_1.default.info(`Token usage: Total ${summary.stats.tokenUsage.total}, Prompt ${summary.stats.tokenUsage.prompt}, Completion ${summary.stats.tokenUsage.completion}, Cached ${summary.stats.tokenUsage.cached}`);
654
- telemetry_1.default.record('command_used', {
655
- name: 'eval',
656
- watch: Boolean(cmdObj.watch),
657
- duration: Math.round((Date.now() - startTime) / 1000),
658
- });
659
- await telemetry_1.default.send();
660
- if (cmdObj.watch) {
661
- if (initialization) {
662
- const configPaths = (cmdObj.config || [defaultConfigPath]).filter(Boolean);
663
- if (!configPaths.length) {
664
- logger_1.default.error('Could not locate config file(s) to watch');
665
- process.exit(1);
666
- }
667
- const basePath = path_1.default.dirname(configPaths[0]);
668
- const promptPaths = Array.isArray(config.prompts)
669
- ? config.prompts
670
- .map((p) => {
671
- if (typeof p === 'string' && p.startsWith('file://')) {
672
- return path_1.default.resolve(basePath, p.slice('file://'.length));
673
- }
674
- else if (typeof p === 'object' && p.id && p.id.startsWith('file://')) {
675
- return path_1.default.resolve(basePath, p.id.slice('file://'.length));
676
- }
677
- return null;
678
- })
679
- .filter(Boolean)
680
- : [];
681
- const providerPaths = Array.isArray(config.providers)
682
- ? config.providers
683
- .map((p) => typeof p === 'string' && p.startsWith('file://')
684
- ? path_1.default.resolve(basePath, p.slice('file://'.length))
685
- : null)
686
- .filter(Boolean)
687
- : [];
688
- const varPaths = Array.isArray(config.tests)
689
- ? config.tests
690
- .flatMap((t) => {
691
- if (typeof t === 'string' && t.startsWith('file://')) {
692
- return path_1.default.resolve(basePath, t.slice('file://'.length));
693
- }
694
- else if (typeof t !== 'string' && t.vars) {
695
- return Object.values(t.vars).flatMap((v) => {
696
- if (typeof v === 'string' && v.startsWith('file://')) {
697
- return path_1.default.resolve(basePath, v.slice('file://'.length));
698
- }
699
- return [];
700
- });
701
- }
702
- return [];
703
- })
704
- .filter(Boolean)
705
- : [];
706
- const watchPaths = Array.from(new Set([...configPaths, ...promptPaths, ...providerPaths, ...varPaths]));
707
- const watcher = chokidar_1.default.watch(watchPaths, { ignored: /^\./, persistent: true });
708
- watcher
709
- .on('change', async (path) => {
710
- (0, util_1.printBorder)();
711
- logger_1.default.info(`File change detected: ${path}`);
712
- (0, util_1.printBorder)();
713
- await runEvaluation();
714
- })
715
- .on('error', (error) => logger_1.default.error(`Watcher error: ${error}`))
716
- .on('ready', () => watchPaths.forEach((watchPath) => logger_1.default.info(`Watching for file changes on ${watchPath} ...`)));
717
- }
718
- }
719
- else {
720
- logger_1.default.info('Done.');
721
- if (summary.stats.failures > 0) {
722
- const exitCode = Number(process.env.PROMPTFOO_FAILED_TEST_EXIT_CODE);
723
- process.exit(isNaN(exitCode) ? 100 : exitCode);
724
- }
725
- }
726
- };
727
- await runEvaluation(true /* initialization */);
728
- });
729
- (0, list_1.listCommand)(program);
730
- (0, show_1.showCommand)(program);
170
+ (0, config_1.configCommand)(program);
731
171
  (0, delete_1.deleteCommand)(program);
732
- (0, import_1.importCommand)(program);
172
+ (0, eval_1.evalCommand)(program, defaultConfig, defaultConfigPath, evaluateOptions);
733
173
  (0, export_1.exportCommand)(program);
734
- (0, config_1.configCommand)(program);
174
+ (0, generate_1.generateCommand)(program, defaultConfig, defaultConfigPath);
175
+ (0, import_1.importCommand)(program);
176
+ (0, list_1.listCommand)(program);
177
+ (0, redteam_1.redteamCommand)(program);
178
+ (0, show_1.showCommand)(program);
735
179
  program.parse(process.argv);
736
180
  if (!process.argv.slice(2).length) {
737
181
  program.outputHelp();