promptfoo 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/README.md +1 -1
  2. package/dist/package.json +4 -4
  3. package/dist/src/assertions.d.ts.map +1 -1
  4. package/dist/src/assertions.js +5 -0
  5. package/dist/src/assertions.js.map +1 -1
  6. package/dist/src/evaluator.js +1 -1
  7. package/dist/src/evaluator.js.map +1 -1
  8. package/dist/src/index.d.ts +1 -5
  9. package/dist/src/index.d.ts.map +1 -1
  10. package/dist/src/index.js +1 -1
  11. package/dist/src/index.js.map +1 -1
  12. package/dist/src/matchers.d.ts +3 -2
  13. package/dist/src/matchers.d.ts.map +1 -1
  14. package/dist/src/matchers.js +37 -9
  15. package/dist/src/matchers.js.map +1 -1
  16. package/dist/src/providers/anthropic.d.ts +5 -3
  17. package/dist/src/providers/anthropic.d.ts.map +1 -1
  18. package/dist/src/providers/anthropic.js +8 -10
  19. package/dist/src/providers/anthropic.js.map +1 -1
  20. package/dist/src/providers/azureopenai.d.ts +9 -8
  21. package/dist/src/providers/azureopenai.d.ts.map +1 -1
  22. package/dist/src/providers/azureopenai.js +33 -36
  23. package/dist/src/providers/azureopenai.js.map +1 -1
  24. package/dist/src/providers/openai.d.ts +12 -12
  25. package/dist/src/providers/openai.d.ts.map +1 -1
  26. package/dist/src/providers/openai.js +54 -65
  27. package/dist/src/providers/openai.js.map +1 -1
  28. package/dist/src/providers/replicate.d.ts +4 -2
  29. package/dist/src/providers/replicate.d.ts.map +1 -1
  30. package/dist/src/providers/replicate.js +10 -8
  31. package/dist/src/providers/replicate.js.map +1 -1
  32. package/dist/src/providers/webhook.d.ts +9 -0
  33. package/dist/src/providers/webhook.d.ts.map +1 -0
  34. package/dist/src/providers/webhook.js +54 -0
  35. package/dist/src/providers/webhook.js.map +1 -0
  36. package/dist/src/providers.d.ts +1 -1
  37. package/dist/src/providers.d.ts.map +1 -1
  38. package/dist/src/providers.js +36 -28
  39. package/dist/src/providers.js.map +1 -1
  40. package/dist/src/suggestions.d.ts.map +1 -1
  41. package/dist/src/suggestions.js +1 -3
  42. package/dist/src/suggestions.js.map +1 -1
  43. package/dist/src/types.d.ts +7 -1
  44. package/dist/src/types.d.ts.map +1 -1
  45. package/dist/src/util.js +1 -1
  46. package/dist/src/util.js.map +1 -1
  47. package/dist/src/web/nextui/404/index.html +1 -1
  48. package/dist/src/web/nextui/404.html +1 -1
  49. package/dist/src/web/nextui/_next/static/Bl3o5lF4ON7Fjki46lPhr/_buildManifest.js +1 -0
  50. package/dist/src/web/nextui/_next/static/chunks/226-7bbb6c98a19542fd.js +37 -0
  51. package/dist/src/web/nextui/_next/static/chunks/249-ea9c0f034888ccff.js +125 -0
  52. package/dist/src/web/nextui/_next/static/chunks/339-501c32916b785ef1.js +1 -0
  53. package/dist/src/web/nextui/_next/static/chunks/365-e426ea5bc7e815fc.js +8 -0
  54. package/dist/src/web/nextui/_next/static/chunks/396-0a51429a01e24cdd.js +1 -0
  55. package/dist/src/web/nextui/_next/static/chunks/596-297f7ff4a0436e87.js +25 -0
  56. package/dist/src/web/nextui/_next/static/chunks/613-572c22424de64659.js +1 -0
  57. package/dist/src/web/nextui/_next/static/chunks/706-ae1d3352d28419e9.js +9 -0
  58. package/dist/src/web/nextui/_next/static/chunks/891-7035926a62c1c4e0.js +1 -0
  59. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-366629541fd598e9.js +1 -0
  60. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-319d2ee38d37574e.js +1 -0
  61. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-a6b1ff91723b7beb.js +1 -0
  62. package/dist/src/web/nextui/_next/static/chunks/app/layout-024c4adc71c9feb0.js +1 -0
  63. package/dist/src/web/nextui/_next/static/chunks/app/page-1ae60660130041b2.js +1 -0
  64. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-6ef16148040bf4f4.js +1 -0
  65. package/dist/src/web/nextui/_next/static/chunks/{ca377847-cb6ae6a6a073aebb.js → ca377847-26b462611379a4f7.js} +3 -3
  66. package/dist/src/web/nextui/_next/static/chunks/{fd9d1056-ac777be631f5a9e9.js → fd9d1056-fba4b53a2f01213b.js} +1 -1
  67. package/dist/src/web/nextui/_next/static/chunks/framework-8883d1e9be70c3da.js +25 -0
  68. package/dist/src/web/nextui/_next/static/chunks/main-8ea85465d428ecfe.js +1 -0
  69. package/dist/src/web/nextui/_next/static/chunks/main-app-581ccf0003955b21.js +1 -0
  70. package/dist/src/web/nextui/_next/static/chunks/pages/_app-52924524f99094ab.js +1 -0
  71. package/dist/src/web/nextui/_next/static/chunks/pages/_error-c92d5c4bb2b49926.js +1 -0
  72. package/dist/src/web/nextui/_next/static/chunks/webpack-55c264ce2fd85eb7.js +1 -0
  73. package/dist/src/web/nextui/_next/static/css/4d399fceacd06992.css +1 -0
  74. package/dist/src/web/nextui/eval/index.html +1 -1
  75. package/dist/src/web/nextui/eval/index.txt +6 -6
  76. package/dist/src/web/nextui/index.html +1 -1
  77. package/dist/src/web/nextui/index.txt +5 -5
  78. package/dist/src/web/nextui/setup/index.html +27 -1
  79. package/dist/src/web/nextui/setup/index.txt +9 -9
  80. package/dist/src/web/server.d.ts.map +1 -1
  81. package/dist/src/web/server.js +9 -5
  82. package/dist/src/web/server.js.map +1 -1
  83. package/package.json +4 -4
  84. package/dist/src/web/nextui/_next/static/US6gOx8LHTX_Hzm9aYNrC/_buildManifest.js +0 -1
  85. package/dist/src/web/nextui/_next/static/chunks/339-4fc8a80fa840e771.js +0 -1
  86. package/dist/src/web/nextui/_next/static/chunks/373-8a280796c0f2d1af.js +0 -1
  87. package/dist/src/web/nextui/_next/static/chunks/583-125d32af505e9bc4.js +0 -1
  88. package/dist/src/web/nextui/_next/static/chunks/596-07e4a23a5c6cdf04.js +0 -25
  89. package/dist/src/web/nextui/_next/static/chunks/658-a62210d07dc4dcb6.js +0 -15
  90. package/dist/src/web/nextui/_next/static/chunks/707-699cbd84b259c37b.js +0 -37
  91. package/dist/src/web/nextui/_next/static/chunks/858-ceb6fa22e614492b.js +0 -125
  92. package/dist/src/web/nextui/_next/static/chunks/891-3000ea7c0a292558.js +0 -1
  93. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/not-found-50e40614fa05600e.js +0 -1
  94. package/dist/src/web/nextui/_next/static/chunks/app/eval/[id]/page-c19c44ed1b2dfb58.js +0 -1
  95. package/dist/src/web/nextui/_next/static/chunks/app/eval/page-d4a1813b2f8c4532.js +0 -1
  96. package/dist/src/web/nextui/_next/static/chunks/app/layout-664a8d716d2d24b1.js +0 -1
  97. package/dist/src/web/nextui/_next/static/chunks/app/page-1f8ef6a00a2355f0.js +0 -1
  98. package/dist/src/web/nextui/_next/static/chunks/app/setup/page-182018a3c6397345.js +0 -1
  99. package/dist/src/web/nextui/_next/static/chunks/framework-43665103d101a22d.js +0 -25
  100. package/dist/src/web/nextui/_next/static/chunks/main-50cc0a98559591ce.js +0 -1
  101. package/dist/src/web/nextui/_next/static/chunks/main-app-c9dc13756d166550.js +0 -1
  102. package/dist/src/web/nextui/_next/static/chunks/pages/_app-6b79a29ad0d63b21.js +0 -1
  103. package/dist/src/web/nextui/_next/static/chunks/pages/_error-9aeb3e4d490fe4b8.js +0 -1
  104. package/dist/src/web/nextui/_next/static/chunks/webpack-6e474e42be502dd7.js +0 -1
  105. package/dist/src/web/nextui/_next/static/css/a35c840ac696f161.css +0 -1
  106. package/dist/src/web/nextui/api +0 -1
  107. package/src/__mocks__/esm.ts +0 -3
  108. package/src/assertions.ts +0 -580
  109. package/src/cache.ts +0 -109
  110. package/src/esm.ts +0 -13
  111. package/src/evaluator.ts +0 -500
  112. package/src/index.ts +0 -52
  113. package/src/logger.ts +0 -46
  114. package/src/main.ts +0 -442
  115. package/src/matchers.ts +0 -120
  116. package/src/onboarding.ts +0 -69
  117. package/src/prompts.ts +0 -39
  118. package/src/providers/anthropic.ts +0 -88
  119. package/src/providers/azureopenai.ts +0 -299
  120. package/src/providers/llama.ts +0 -95
  121. package/src/providers/localai.ts +0 -111
  122. package/src/providers/ollama.ts +0 -89
  123. package/src/providers/openai.ts +0 -337
  124. package/src/providers/replicate.ts +0 -99
  125. package/src/providers/scriptCompletion.ts +0 -35
  126. package/src/providers/shared.ts +0 -34
  127. package/src/providers.ts +0 -192
  128. package/src/share.ts +0 -27
  129. package/src/suggestions.ts +0 -63
  130. package/src/table.ts +0 -43
  131. package/src/tableOutput.html +0 -52
  132. package/src/telemetry.ts +0 -70
  133. package/src/types.ts +0 -299
  134. package/src/updates.ts +0 -46
  135. package/src/util.ts +0 -543
  136. package/src/web/nextui/.eslintrc.json +0 -3
  137. package/src/web/nextui/next.config.js +0 -14
  138. package/src/web/nextui/package-lock.json +0 -4644
  139. package/src/web/nextui/package.json +0 -47
  140. package/src/web/nextui/public/favicon.ico +0 -0
  141. package/src/web/nextui/public/logo.svg +0 -30
  142. package/src/web/nextui/src/app/Home.css +0 -3
  143. package/src/web/nextui/src/app/api/route.ts +0 -6
  144. package/src/web/nextui/src/app/components/DarkMode.css +0 -22
  145. package/src/web/nextui/src/app/components/DarkMode.tsx +0 -17
  146. package/src/web/nextui/src/app/components/Logo.css +0 -32
  147. package/src/web/nextui/src/app/components/Logo.tsx +0 -11
  148. package/src/web/nextui/src/app/components/PageShell.css +0 -33
  149. package/src/web/nextui/src/app/components/PageShell.tsx +0 -87
  150. package/src/web/nextui/src/app/eval/ConfigModal.tsx +0 -84
  151. package/src/web/nextui/src/app/eval/Eval.css +0 -13
  152. package/src/web/nextui/src/app/eval/Eval.tsx +0 -79
  153. package/src/web/nextui/src/app/eval/EvalOutputPromptDialog.tsx +0 -127
  154. package/src/web/nextui/src/app/eval/ResultsCharts.tsx +0 -355
  155. package/src/web/nextui/src/app/eval/ResultsTable.css +0 -179
  156. package/src/web/nextui/src/app/eval/ResultsTable.tsx +0 -503
  157. package/src/web/nextui/src/app/eval/ResultsView.tsx +0 -301
  158. package/src/web/nextui/src/app/eval/ShareModal.tsx +0 -70
  159. package/src/web/nextui/src/app/eval/[id]/not-found.tsx +0 -5
  160. package/src/web/nextui/src/app/eval/[id]/page.css +0 -9
  161. package/src/web/nextui/src/app/eval/[id]/page.tsx +0 -20
  162. package/src/web/nextui/src/app/eval/index.css +0 -0
  163. package/src/web/nextui/src/app/eval/page.tsx +0 -8
  164. package/src/web/nextui/src/app/eval/store.ts +0 -18
  165. package/src/web/nextui/src/app/eval/types.ts +0 -20
  166. package/src/web/nextui/src/app/globals.css +0 -58
  167. package/src/web/nextui/src/app/layout.tsx +0 -25
  168. package/src/web/nextui/src/app/page.tsx +0 -7
  169. package/src/web/nextui/src/app/setup/AssertsForm.tsx +0 -118
  170. package/src/web/nextui/src/app/setup/PromptDialog.tsx +0 -77
  171. package/src/web/nextui/src/app/setup/PromptsSection.tsx +0 -190
  172. package/src/web/nextui/src/app/setup/ProviderConfigDialog.tsx +0 -99
  173. package/src/web/nextui/src/app/setup/ProviderSelector.tsx +0 -149
  174. package/src/web/nextui/src/app/setup/RunTestSuiteButton.tsx +0 -88
  175. package/src/web/nextui/src/app/setup/TestCaseDialog.tsx +0 -108
  176. package/src/web/nextui/src/app/setup/TestCasesSection.tsx +0 -154
  177. package/src/web/nextui/src/app/setup/VarsForm.tsx +0 -57
  178. package/src/web/nextui/src/app/setup/page.css +0 -3
  179. package/src/web/nextui/src/app/setup/page.tsx +0 -160
  180. package/src/web/nextui/src/util/api.ts +0 -1
  181. package/src/web/nextui/src/util/store.ts +0 -53
  182. package/src/web/nextui/tsconfig.json +0 -28
  183. package/src/web/server.ts +0 -151
  184. /package/dist/src/web/nextui/_next/static/{US6gOx8LHTX_Hzm9aYNrC → Bl3o5lF4ON7Fjki46lPhr}/_ssgManifest.js +0 -0
package/src/share.ts DELETED
@@ -1,27 +0,0 @@
1
- import fetch from 'node-fetch';
2
-
3
- import type { EvaluateSummary, SharedResults, UnifiedConfig } from './types';
4
-
5
- export async function createShareableUrl(
6
- results: EvaluateSummary,
7
- config: Partial<UnifiedConfig>,
8
- ): Promise<string> {
9
- const sharedResults: SharedResults = {
10
- data: {
11
- version: 1,
12
- results,
13
- config,
14
- },
15
- };
16
-
17
- const response = await fetch('https://api.promptfoo.dev/eval', {
18
- method: 'POST',
19
- headers: {
20
- 'Content-Type': 'application/json',
21
- },
22
- body: JSON.stringify(sharedResults),
23
- });
24
-
25
- const { id } = (await response.json()) as { id: string };
26
- return `https://app.promptfoo.dev/eval/${id}`;
27
- }
@@ -1,63 +0,0 @@
1
- import { SUGGEST_PROMPTS_SYSTEM_MESSAGE } from './prompts';
2
- import { DefaultSuggestionsProvider } from './providers/openai';
3
-
4
- import type { TokenUsage } from './types';
5
-
6
- const DEFAULT_TEMPERATURE = 0.9;
7
-
8
- interface GeneratePromptsOutput {
9
- prompts?: string[];
10
- error?: string;
11
- tokensUsed: TokenUsage;
12
- }
13
-
14
- export async function generatePrompts(prompt: string, num: number): Promise<GeneratePromptsOutput> {
15
- const provider = DefaultSuggestionsProvider;
16
-
17
- const resp = await provider.callApi(
18
- JSON.stringify([
19
- SUGGEST_PROMPTS_SYSTEM_MESSAGE,
20
- {
21
- role: 'user',
22
- content: 'Generate a variant for the following prompt:',
23
- },
24
- {
25
- role: 'user',
26
- content: prompt,
27
- },
28
- ]),
29
- {
30
- temperature: DEFAULT_TEMPERATURE,
31
- },
32
- );
33
- if (resp.error || !resp.output) {
34
- return {
35
- error: resp.error || 'Unknown error',
36
- tokensUsed: {
37
- total: resp.tokenUsage?.total || 0,
38
- prompt: resp.tokenUsage?.prompt || 0,
39
- completion: resp.tokenUsage?.completion || 0,
40
- },
41
- };
42
- }
43
-
44
- try {
45
- return {
46
- prompts: [resp.output],
47
- tokensUsed: {
48
- total: resp.tokenUsage?.total || 0,
49
- prompt: resp.tokenUsage?.prompt || 0,
50
- completion: resp.tokenUsage?.completion || 0,
51
- },
52
- };
53
- } catch (err) {
54
- return {
55
- error: `Output is not valid JSON: ${resp.output}`,
56
- tokensUsed: {
57
- total: resp.tokenUsage?.total || 0,
58
- prompt: resp.tokenUsage?.prompt || 0,
59
- completion: resp.tokenUsage?.completion || 0,
60
- },
61
- };
62
- }
63
- }
package/src/table.ts DELETED
@@ -1,43 +0,0 @@
1
- import Table from 'cli-table3';
2
- import chalk from 'chalk';
3
- import type { EvaluateSummary } from './types';
4
-
5
- export function generateTable(summary: EvaluateSummary, tableCellMaxLength = 250, maxRows = 25) {
6
- const maxWidth = process.stdout.columns ? process.stdout.columns - 10 : 120;
7
- const head = summary.table.head;
8
- const headLength = head.prompts.length + head.vars.length;
9
- const table = new Table({
10
- head: [...head.vars, ...head.prompts.map((prompt) => prompt.display)],
11
- colWidths: Array(headLength).fill(Math.floor(maxWidth / headLength)),
12
- wordWrap: true,
13
- wrapOnWordBoundary: false,
14
- style: {
15
- head: ['blue', 'bold'],
16
- },
17
- });
18
- // Skip first row (header) and add the rest. Color PASS/FAIL
19
- for (const row of summary.table.body.slice(0, maxRows)) {
20
- table.push([
21
- ...row.vars,
22
- ...row.outputs.map(({ pass, score, text }) => {
23
- if (text.length > tableCellMaxLength) {
24
- text = text.slice(0, tableCellMaxLength) + '...';
25
- }
26
- if (pass) {
27
- return chalk.green('[PASS] ') + text;
28
- } else if (!pass) {
29
- // color everything red up until '---'
30
- return (
31
- chalk.red('[FAIL] ') +
32
- text
33
- .split('---')
34
- .map((c, idx) => (idx === 0 ? chalk.red.bold(c) : c))
35
- .join('---')
36
- );
37
- }
38
- return text;
39
- }),
40
- ]);
41
- }
42
- return table;
43
- }
@@ -1,52 +0,0 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>Table Output</title>
7
- <style>
8
- body {
9
- font-family: -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Helvetica, Arial,
10
- sans-serif;
11
- }
12
- table,
13
- th,
14
- td {
15
- border: 1px solid black;
16
- border-collapse: collapse;
17
- text-align: left;
18
- word-break: break-all;
19
- }
20
- th,
21
- td {
22
- padding: 5px;
23
- min-width: 200px;
24
- }
25
-
26
- tr > td[data-content^='[PASS]'] {
27
- color: green;
28
- }
29
- tr > td[data-content^='[FAIL]'] {
30
- color: #ad0000;
31
- }
32
- </style>
33
- </head>
34
- <body>
35
- <table>
36
- <thead>
37
- {% for header in table[0] %}
38
- <th>{{ header }}</th>
39
- {% endfor %}
40
- </thead>
41
- <tbody>
42
- {% for row in table.slice(1) %}
43
- <tr>
44
- {% for cell in row %}
45
- <td data-content="{{cell}}">{{ cell }}</td>
46
- {% endfor %}
47
- </tr>
48
- {% endfor %}
49
- </tbody>
50
- </table>
51
- </body>
52
- </html>
package/src/telemetry.ts DELETED
@@ -1,70 +0,0 @@
1
- import chalk from 'chalk';
2
-
3
- import packageJson from '../package.json';
4
- import logger from './logger';
5
- import { fetchWithTimeout, maybeRecordFirstRun } from './util';
6
-
7
- type TelemetryEvent = {
8
- event: string;
9
- packageVersion: string;
10
- properties: Record<string, string | number>;
11
- };
12
-
13
- type TelemetryEventTypes = 'eval_ran' | 'assertion_used' | 'command_used';
14
-
15
- const TELEMETRY_ENDPOINT = 'https://api.promptfoo.dev/telemetry';
16
-
17
- const TELEMETRY_TIMEOUT_MS = 1000;
18
-
19
- export class Telemetry {
20
- private events: TelemetryEvent[] = [];
21
-
22
- get disabled() {
23
- return process.env.PROMPTFOO_DISABLE_TELEMETRY === '1';
24
- }
25
-
26
- record(eventName: TelemetryEventTypes, properties: Record<string, string | number>): void {
27
- if (!this.disabled) {
28
- this.events.push({
29
- event: eventName,
30
- packageVersion: packageJson.version,
31
- properties,
32
- });
33
- }
34
- }
35
-
36
- maybeShowNotice(): void {
37
- if (maybeRecordFirstRun()) {
38
- logger.info(
39
- chalk.gray(
40
- 'Anonymous telemetry is enabled. For more info, see https://www.promptfoo.dev/docs/configuration/telemetry',
41
- ),
42
- );
43
- }
44
- }
45
-
46
- async send(): Promise<void> {
47
- if (!this.disabled && this.events.length > 0) {
48
- try {
49
- const response = await fetchWithTimeout(
50
- TELEMETRY_ENDPOINT,
51
- {
52
- method: 'POST',
53
- headers: {
54
- 'Content-Type': 'application/json',
55
- },
56
- body: JSON.stringify(this.events),
57
- },
58
- TELEMETRY_TIMEOUT_MS,
59
- );
60
-
61
- if (response.ok) {
62
- this.events = [];
63
- }
64
- } catch (err) {}
65
- }
66
- }
67
- }
68
-
69
- const telemetry = new Telemetry();
70
- export default telemetry;
package/src/types.ts DELETED
@@ -1,299 +0,0 @@
1
- export interface CommandLineOptions {
2
- // Shared with TestSuite
3
- prompts: string[];
4
- providers: string[];
5
- output: string;
6
-
7
- // Shared with EvaluateOptions
8
- maxConcurrency: string;
9
- repeat: string;
10
-
11
- // Command line only
12
- vars?: string;
13
- tests?: string;
14
- config?: string;
15
- verbose?: boolean;
16
- grader?: string;
17
- view?: string;
18
- tableCellMaxLength?: string;
19
- write?: boolean;
20
- cache?: boolean;
21
- table?: boolean;
22
- share?: boolean;
23
- progressBar?: boolean;
24
-
25
- generateSuggestions?: boolean;
26
- promptPrefix?: string;
27
- promptSuffix?: string;
28
- }
29
-
30
- export interface ProviderOptions {
31
- id?: ProviderId;
32
- config?: any;
33
- prompts?: string[]; // List of prompt display strings
34
- }
35
-
36
- export interface ApiProvider {
37
- id: () => string;
38
- callApi: (prompt: string) => Promise<ProviderResponse>;
39
- }
40
-
41
- export interface TokenUsage {
42
- total: number;
43
- prompt: number;
44
- completion: number;
45
- cached?: number;
46
- }
47
-
48
- export interface ProviderResponse {
49
- error?: string;
50
- output?: string;
51
- tokenUsage?: Partial<TokenUsage>;
52
- }
53
-
54
- export interface ProviderEmbeddingResponse {
55
- error?: string;
56
- embedding?: number[];
57
- tokenUsage?: Partial<TokenUsage>;
58
- }
59
-
60
- export interface CsvRow {
61
- [key: string]: string;
62
- }
63
-
64
- export type VarMapping = Record<string, string>;
65
-
66
- export interface GradingConfig {
67
- rubricPrompt?: string;
68
- provider?: string | ApiProvider;
69
- }
70
-
71
- export interface PromptConfig {
72
- prefix?: string;
73
- suffix?: string;
74
- }
75
-
76
- export interface OutputConfig {
77
- postprocess?: string;
78
- }
79
-
80
- export interface EvaluateOptions {
81
- maxConcurrency?: number;
82
- showProgressBar?: boolean;
83
- progressCallback?: (progress: number, total: number) => void;
84
- generateSuggestions?: boolean;
85
- repeat?: number;
86
- }
87
-
88
- export interface Prompt {
89
- raw: string;
90
- display: string;
91
- }
92
-
93
- export interface EvaluateResult {
94
- prompt: Prompt;
95
- vars: Record<string, string | object>;
96
- response?: ProviderResponse;
97
- error?: string;
98
- success: boolean;
99
- score: number;
100
- latencyMs: number;
101
- gradingResult?: GradingResult;
102
- }
103
-
104
- export interface EvaluateTableOutput {
105
- pass: boolean;
106
- score: number;
107
- text: string;
108
- prompt: string;
109
- latencyMs: number;
110
- tokenUsage?: Partial<TokenUsage>;
111
- gradingResult?: GradingResult;
112
- }
113
-
114
- export interface EvaluateTable {
115
- head: {
116
- prompts: Prompt[];
117
- vars: string[];
118
- };
119
-
120
- body: {
121
- outputs: EvaluateTableOutput[];
122
- vars: string[];
123
- }[];
124
- }
125
-
126
- export interface EvaluateStats {
127
- successes: number;
128
- failures: number;
129
- tokenUsage: Required<TokenUsage>;
130
- }
131
-
132
- export interface EvaluateSummary {
133
- version: number;
134
- results: EvaluateResult[];
135
- table: EvaluateTable;
136
- stats: EvaluateStats;
137
- }
138
-
139
- export interface GradingResult {
140
- pass: boolean;
141
- score: number;
142
- reason: string;
143
- tokensUsed?: TokenUsage;
144
- componentResults?: GradingResult[];
145
- assertion: Assertion | null;
146
- }
147
-
148
- type BaseAssertionTypes =
149
- | 'equals'
150
- | 'contains'
151
- | 'icontains'
152
- | 'contains-all'
153
- | 'contains-any'
154
- | 'starts-with'
155
- | 'regex'
156
- | 'is-json'
157
- | 'contains-json'
158
- | 'javascript'
159
- | 'python'
160
- | 'similar'
161
- | 'llm-rubric'
162
- | 'webhook'
163
- | 'rouge-n'
164
- | 'rouge-s'
165
- | 'rouge-l'
166
- | 'levenshtein';
167
-
168
- type NotPrefixed<T extends string> = `not-${T}`;
169
-
170
- export type AssertionType = BaseAssertionTypes | NotPrefixed<BaseAssertionTypes>;
171
-
172
- // TODO(ian): maybe Assertion should support {type: config} to make the yaml cleaner
173
- export interface Assertion {
174
- // Type of assertion
175
- type: AssertionType;
176
-
177
- // The expected value, if applicable
178
- value?:
179
- | string
180
- | string[]
181
- | object
182
- | ((output: string, testCase: AtomicTestCase, assertion: Assertion) => Promise<GradingResult>);
183
-
184
- // The threshold value, only applicable for similarity (cosine distance)
185
- threshold?: number;
186
-
187
- // The weight of this assertion compared to other assertions in the test case. Defaults to 1.
188
- weight?: number;
189
-
190
- // Some assertions (similarity, llm-rubric) require an LLM provider
191
- provider?: GradingConfig['provider'];
192
- }
193
-
194
- // Each test case is graded pass/fail. A test case represents a unique input to the LLM after substituting `vars` in the prompt.
195
- export interface TestCase {
196
- // Optional description of what you're testing
197
- description?: string;
198
-
199
- // Key-value pairs to substitute in the prompt
200
- vars?: Record<string, string | string[] | object>;
201
-
202
- // Optional list of automatic checks to run on the LLM output
203
- assert?: Assertion[];
204
-
205
- // Additional configuration settings for the prompt
206
- options?: PromptConfig & OutputConfig & GradingConfig;
207
- }
208
-
209
- export interface Scenario {
210
- // Optional description of what you're testing
211
- description?: string;
212
-
213
- // Default test case config
214
- config: Partial<TestCase>[];
215
-
216
- // Optional list of automatic checks to run on the LLM output
217
- tests: TestCase[];
218
- }
219
-
220
- // Same as a TestCase, except the `vars` object has been flattened into its final form.
221
- export interface AtomicTestCase extends TestCase {
222
- vars?: Record<string, string | object>;
223
- }
224
-
225
- // The test suite defines the "knobs" that we are tuning in prompt engineering: providers and prompts
226
- export interface TestSuite {
227
- // Optional description of what your LLM is trying to do
228
- description?: string;
229
-
230
- // One or more LLM APIs to use
231
- providers: ApiProvider[];
232
-
233
- // One or more prompt strings
234
- prompts: Prompt[];
235
-
236
- // Optional mapping of provider to prompt display strings. If not provided,
237
- // all prompts are used for all providers.
238
- providerPromptMap?: Record<string, string[]>;
239
-
240
- // Test cases
241
- tests?: TestCase[];
242
-
243
- // scenarios
244
- scenarios?: Scenario[];
245
-
246
- // Default test case config
247
- defaultTest?: Partial<TestCase>;
248
- }
249
-
250
- export type ProviderId = string;
251
-
252
- export type ProviderFunction = (prompt: string) => Promise<ProviderResponse>;
253
-
254
- export type ProviderOptionsMap = Record<ProviderId, ProviderOptions>;
255
-
256
- // TestSuiteConfig = Test Suite, but before everything is parsed and resolved. Providers are just strings, prompts are filepaths, tests can be filepath or inline.
257
- export interface TestSuiteConfig {
258
- // Optional description of what your LLM is trying to do
259
- description?: string;
260
-
261
- // One or more LLM APIs to use, for example: openai:gpt-3.5-turbo, openai:gpt-4, localai:chat:vicuna
262
- providers:
263
- | ProviderId
264
- | ProviderId[]
265
- | ProviderOptionsMap[]
266
- | ProviderOptions[]
267
- | ProviderFunction;
268
-
269
- // One or more prompt files to load
270
- prompts: string | string[];
271
-
272
- // Path to a test file, OR list of LLM prompt variations (aka "test case")
273
- tests: string | string[] | TestCase[];
274
-
275
- // Scenarios, groupings of data and tests to be evaluated
276
- scenarios?: Scenario[];
277
-
278
- // Sets the default properties for each test case. Useful for setting an assertion, on all test cases, for example.
279
- defaultTest?: Omit<TestCase, 'description'>;
280
-
281
- // Path to write output. Writes to console/web viewer if not set.
282
- outputPath?: string;
283
-
284
- // Determines whether or not sharing is enabled.
285
- sharing?: boolean;
286
- }
287
-
288
- export type UnifiedConfig = TestSuiteConfig & {
289
- evaluateOptions: EvaluateOptions;
290
- commandLineOptions: Partial<CommandLineOptions>;
291
- };
292
-
293
- export interface SharedResults {
294
- data: {
295
- version: number;
296
- results: EvaluateSummary;
297
- config: Partial<UnifiedConfig>;
298
- };
299
- }
package/src/updates.ts DELETED
@@ -1,46 +0,0 @@
1
- import chalk from 'chalk';
2
- import semverGt from 'semver/functions/gt';
3
-
4
- import logger from './logger';
5
- import { fetchWithTimeout } from './util';
6
- import packageJson from '../package.json';
7
-
8
- const VERSION = packageJson.version;
9
-
10
- export async function getLatestVersion(packageName: string) {
11
- const response = await fetchWithTimeout(`https://registry.npmjs.org/${packageName}`, {}, 1000);
12
- if (!response.ok) {
13
- throw new Error(`Failed to fetch package information for ${packageName}`);
14
- }
15
- const data = await response.json();
16
- return data['dist-tags'].latest;
17
- }
18
-
19
- export async function checkForUpdates(): Promise<boolean> {
20
- if (process.env.PROMPTFOO_DISABLE_UPDATE) {
21
- return false;
22
- }
23
-
24
- let latestVersion: string;
25
- try {
26
- latestVersion = await getLatestVersion('promptfoo');
27
- } catch {
28
- return false;
29
- }
30
- if (semverGt(latestVersion, VERSION)) {
31
- const border = '='.repeat(process.stdout.columns - 10);
32
- logger.info(
33
- `\n${border}
34
- ${chalk.yellow('⚠️')} The current version of promptfoo ${chalk.yellow(
35
- VERSION,
36
- )} is lower than the latest available version ${chalk.green(latestVersion)}.
37
-
38
- Please run ${chalk.green('npx promptfoo@latest')} or ${chalk.green(
39
- 'npm install -g promptfoo@latest',
40
- )} to update.
41
- ${border}\n`,
42
- );
43
- return true;
44
- }
45
- return false;
46
- }