promptfoo 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -5
- package/dist/assertions.d.ts +1 -1
- package/dist/assertions.d.ts.map +1 -1
- package/dist/assertions.js +10 -10
- package/dist/assertions.js.map +1 -1
- package/dist/cache.js +9 -9
- package/dist/cache.js.map +1 -1
- package/dist/evaluator.d.ts +1 -1
- package/dist/evaluator.d.ts.map +1 -1
- package/dist/evaluator.js +29 -22
- package/dist/evaluator.js.map +1 -1
- package/dist/index.d.ts +10 -10
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +18 -14
- package/dist/index.js.map +1 -1
- package/dist/main.js +41 -40
- package/dist/main.js.map +1 -1
- package/dist/providers/localai.js +11 -11
- package/dist/providers/localai.js.map +1 -1
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +30 -21
- package/dist/providers/openai.js.map +1 -1
- package/dist/providers.d.ts +3 -3
- package/dist/providers.d.ts.map +1 -1
- package/dist/providers.js +15 -15
- package/dist/providers.js.map +1 -1
- package/dist/types.d.ts +1 -1
- package/dist/util.d.ts +2 -2
- package/dist/util.d.ts.map +1 -1
- package/dist/util.js +43 -15
- package/dist/util.js.map +1 -1
- package/dist/web/client/assets/index-9a9ba400.css +1 -0
- package/dist/web/client/assets/{index-8751749f.js → index-b72d3ca9.js} +12 -12
- package/dist/web/client/index.html +2 -2
- package/dist/web/server.js +9 -9
- package/dist/web/server.js.map +1 -1
- package/package.json +1 -1
- package/src/assertions.ts +5 -5
- package/src/cache.ts +2 -2
- package/src/evaluator.ts +24 -17
- package/src/index.ts +13 -8
- package/src/main.ts +10 -9
- package/src/providers/localai.ts +3 -3
- package/src/providers/openai.ts +16 -8
- package/src/providers.ts +3 -3
- package/src/types.ts +1 -1
- package/src/util.ts +42 -14
- package/src/web/client/package-lock.json +5729 -0
- package/src/web/client/src/ResultsTable.css +19 -0
- package/src/web/client/src/ResultsTable.tsx +51 -37
- package/src/web/client/src/ResultsView.tsx +7 -7
- package/src/web/server.ts +3 -3
- package/dist/web/client/assets/index-207192fc.css +0 -1
|
@@ -5,8 +5,8 @@
|
|
|
5
5
|
<link rel="icon" type="image/svg+xml" href="favicon.ico" />
|
|
6
6
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
7
7
|
<title>promptfoo web viewer</title>
|
|
8
|
-
<script type="module" crossorigin src="/assets/index-
|
|
9
|
-
<link rel="stylesheet" href="/assets/index-
|
|
8
|
+
<script type="module" crossorigin src="/assets/index-b72d3ca9.js"></script>
|
|
9
|
+
<link rel="stylesheet" href="/assets/index-9a9ba400.css">
|
|
10
10
|
</head>
|
|
11
11
|
<body>
|
|
12
12
|
<div id="root"></div>
|
package/dist/web/server.js
CHANGED
|
@@ -13,12 +13,12 @@ const express_1 = __importDefault(require("express"));
|
|
|
13
13
|
const cors_1 = __importDefault(require("cors"));
|
|
14
14
|
const opener_1 = __importDefault(require("opener"));
|
|
15
15
|
const socket_io_1 = require("socket.io");
|
|
16
|
-
const
|
|
17
|
-
const
|
|
18
|
-
const
|
|
16
|
+
const logger_1 = __importDefault(require("../logger"));
|
|
17
|
+
const esm_1 = require("../esm");
|
|
18
|
+
const util_1 = require("../util");
|
|
19
19
|
function init(port = 15500) {
|
|
20
20
|
const app = (0, express_1.default)();
|
|
21
|
-
const staticDir = node_path_1.default.join((0,
|
|
21
|
+
const staticDir = node_path_1.default.join((0, esm_1.getDirectory)(), 'web', 'client');
|
|
22
22
|
app.use((0, cors_1.default)());
|
|
23
23
|
app.use(express_1.default.json());
|
|
24
24
|
app.use(express_1.default.static(staticDir));
|
|
@@ -28,7 +28,7 @@ function init(port = 15500) {
|
|
|
28
28
|
origin: '*',
|
|
29
29
|
},
|
|
30
30
|
});
|
|
31
|
-
const latestJsonPath = (0,
|
|
31
|
+
const latestJsonPath = (0, util_1.getLatestResultsPath)();
|
|
32
32
|
const readLatestJson = () => {
|
|
33
33
|
const data = fs_1.default.readFileSync(latestJsonPath, 'utf8');
|
|
34
34
|
const jsonData = JSON.parse(data);
|
|
@@ -46,7 +46,7 @@ function init(port = 15500) {
|
|
|
46
46
|
});
|
|
47
47
|
httpServer.listen(port, () => {
|
|
48
48
|
const url = `http://localhost:${port}`;
|
|
49
|
-
|
|
49
|
+
logger_1.default.info(`Server listening at ${url}`);
|
|
50
50
|
const rl = node_readline_1.default.createInterface({
|
|
51
51
|
input: process.stdin,
|
|
52
52
|
output: process.stdout,
|
|
@@ -55,14 +55,14 @@ function init(port = 15500) {
|
|
|
55
55
|
if (answer.toLowerCase().startsWith('y')) {
|
|
56
56
|
try {
|
|
57
57
|
await (0, opener_1.default)(url);
|
|
58
|
-
|
|
58
|
+
logger_1.default.info(`Opening browser to: ${url}`);
|
|
59
59
|
}
|
|
60
60
|
catch (err) {
|
|
61
|
-
|
|
61
|
+
logger_1.default.error(`Failed to open browser: ${String(err)}`);
|
|
62
62
|
}
|
|
63
63
|
}
|
|
64
64
|
rl.close();
|
|
65
|
-
|
|
65
|
+
logger_1.default.info('Press Ctrl+C to stop the server');
|
|
66
66
|
});
|
|
67
67
|
});
|
|
68
68
|
}
|
package/dist/web/server.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"server.js","sourceRoot":"","sources":["../../src/web/server.ts"],"names":[],"mappings":";;;;;;AAAA,4CAAoB;AACpB,0DAA6B;AAC7B,kEAAqC;AACrC,0DAA6B;AAE7B,wDAAgC;AAChC,sDAA8B;AAC9B,gDAAwB;AACxB,oDAA4B;AAC5B,yCAAqD;AAGrD,
|
|
1
|
+
{"version":3,"file":"server.js","sourceRoot":"","sources":["../../src/web/server.ts"],"names":[],"mappings":";;;;;;AAAA,4CAAoB;AACpB,0DAA6B;AAC7B,kEAAqC;AACrC,0DAA6B;AAE7B,wDAAgC;AAChC,sDAA8B;AAC9B,gDAAwB;AACxB,oDAA4B;AAC5B,yCAAqD;AAGrD,uDAA+B;AAC/B,gCAAsC;AACtC,kCAA+C;AAI/C,SAAgB,IAAI,CAAC,IAAI,GAAG,KAAK;IAC/B,MAAM,GAAG,GAAG,IAAA,iBAAO,GAAE,CAAC;IAEtB,MAAM,SAAS,GAAG,mBAAI,CAAC,IAAI,CAAC,IAAA,kBAAY,GAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC;IAE7D,GAAG,CAAC,GAAG,CAAC,IAAA,cAAI,GAAE,CAAC,CAAC;IAChB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACxB,GAAG,CAAC,GAAG,CAAC,iBAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC;IAEnC,MAAM,UAAU,GAAG,mBAAI,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC;IAC1C,MAAM,EAAE,GAAG,IAAI,kBAAc,CAAC,UAAU,EAAE;QACxC,IAAI,EAAE;YACJ,MAAM,EAAE,GAAG;SACZ;KACF,CAAC,CAAC;IAEH,MAAM,cAAc,GAAG,IAAA,2BAAoB,GAAE,CAAC;IAC9C,MAAM,cAAc,GAAG,GAAG,EAAE;QAC1B,MAAM,IAAI,GAAG,YAAE,CAAC,YAAY,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;QACrD,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO,QAAQ,CAAC,KAAK,CAAC;IACxB,CAAC,CAAC;IAEF,EAAE,CAAC,EAAE,CAAC,YAAY,EAAE,CAAC,MAAM,EAAE,EAAE;QAC7B,qDAAqD;QACrD,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,KAAK,EAAE,cAAc,EAAE,EAAE,CAAC,CAAC;QAEjD,6DAA6D;QAC7D,YAAE,CAAC,KAAK,CACN,cAAc,EACd,IAAA,kBAAQ,EAAC,CAAC,KAAa,EAAE,EAAE;YACzB,IAAI,KAAK,KAAK,QAAQ,EAAE;gBACtB,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,EAAE,KAAK,EAAE,cAAc,EAAE,EAAE,CAAC,CAAC;aACpD;QACH,CAAC,EAAE,GAAG,CAAC,CACR,CAAC;IACJ,CAAC,CAAC,CAAC;IAEH,UAAU,CAAC,MAAM,CAAC,IAAI,EAAE,GAAG,EAAE;QAC3B,MAAM,GAAG,GAAG,oBAAoB,IAAI,EAAE,CAAC;QACvC,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;QAE1C,MAAM,EAAE,GAAG,uBAAQ,CAAC,eAAe,CAAC;YAClC,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,EAAE,CAAC,QAAQ,CAAC,qDAAqD,EAAE,KAAK,EAAE,MAAM,EAAE,EAAE;YAClF,IAAI,MAAM,CAAC,WAAW,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;gBACxC,IAAI;oBACF,MAAM,IAAA,gBAAM,EAAC,GAAG,CAAC,CAAC;oBAClB,gBAAM,CAAC,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;iBAC3C;gBAAC,OAAO,GAAG,EAAE;oBACZ,gBAAM,CAAC,KAAK,CAAC,2BAA2B,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;iBACxD;aACF;YACD,EAAE,CAAC,KAAK,EAAE,CAAC;YACX,gBAAM,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AA3DD,oBA2DC"}
|
package/package.json
CHANGED
package/src/assertions.ts
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
import invariant from 'tiny-invariant';
|
|
2
2
|
import nunjucks from 'nunjucks';
|
|
3
3
|
|
|
4
|
-
import { DefaultEmbeddingProvider, DefaultGradingProvider } from './providers/openai
|
|
5
|
-
import { cosineSimilarity } from './util
|
|
6
|
-
import { loadApiProvider } from './providers
|
|
7
|
-
import { DEFAULT_GRADING_PROMPT } from './prompts
|
|
4
|
+
import { DefaultEmbeddingProvider, DefaultGradingProvider } from './providers/openai';
|
|
5
|
+
import { cosineSimilarity } from './util';
|
|
6
|
+
import { loadApiProvider } from './providers';
|
|
7
|
+
import { DEFAULT_GRADING_PROMPT } from './prompts';
|
|
8
8
|
|
|
9
|
-
import type { Assertion, GradingConfig, TestCase, GradingResult, AtomicTestCase } from './types
|
|
9
|
+
import type { Assertion, GradingConfig, TestCase, GradingResult, AtomicTestCase } from './types';
|
|
10
10
|
|
|
11
11
|
const SIMILAR_REGEX = /similar(?::|\((\d+(\.\d+)?)\):)/;
|
|
12
12
|
|
package/src/cache.ts
CHANGED
|
@@ -4,8 +4,8 @@ import path from 'node:path';
|
|
|
4
4
|
import cacheManager from 'cache-manager';
|
|
5
5
|
import fsStore from 'cache-manager-fs-hash';
|
|
6
6
|
|
|
7
|
-
import logger from './logger
|
|
8
|
-
import { getConfigDirectoryPath, fetchWithTimeout } from './util
|
|
7
|
+
import logger from './logger';
|
|
8
|
+
import { getConfigDirectoryPath, fetchWithTimeout } from './util';
|
|
9
9
|
|
|
10
10
|
import type { Cache } from 'cache-manager';
|
|
11
11
|
import type { RequestInfo, RequestInit } from 'node-fetch';
|
package/src/evaluator.ts
CHANGED
|
@@ -4,8 +4,8 @@ import async from 'async';
|
|
|
4
4
|
import chalk from 'chalk';
|
|
5
5
|
import nunjucks from 'nunjucks';
|
|
6
6
|
|
|
7
|
-
import logger from './logger
|
|
8
|
-
import { runAssertions } from './assertions
|
|
7
|
+
import logger from './logger';
|
|
8
|
+
import { runAssertions } from './assertions';
|
|
9
9
|
|
|
10
10
|
import type { SingleBar } from 'cli-progress';
|
|
11
11
|
import type {
|
|
@@ -19,12 +19,12 @@ import type {
|
|
|
19
19
|
Prompt,
|
|
20
20
|
TestCase,
|
|
21
21
|
AtomicTestCase,
|
|
22
|
-
} from './types
|
|
23
|
-
import { generatePrompts } from './suggestions
|
|
22
|
+
} from './types';
|
|
23
|
+
import { generatePrompts } from './suggestions';
|
|
24
24
|
|
|
25
25
|
interface RunEvalOptions {
|
|
26
26
|
provider: ApiProvider;
|
|
27
|
-
prompt:
|
|
27
|
+
prompt: Prompt;
|
|
28
28
|
|
|
29
29
|
test: AtomicTestCase;
|
|
30
30
|
|
|
@@ -86,10 +86,13 @@ class Evaluator {
|
|
|
86
86
|
includeProviderId,
|
|
87
87
|
}: RunEvalOptions): Promise<EvaluateResult> {
|
|
88
88
|
const vars = test.vars || {};
|
|
89
|
-
const renderedPrompt = nunjucks.renderString(prompt, vars);
|
|
89
|
+
const renderedPrompt = nunjucks.renderString(prompt.raw, vars);
|
|
90
90
|
|
|
91
91
|
// Note that we're using original prompt, not renderedPrompt
|
|
92
|
-
|
|
92
|
+
let promptDisplay = prompt.display;
|
|
93
|
+
if (includeProviderId) {
|
|
94
|
+
promptDisplay = `[${provider.id()}] ${promptDisplay}`;
|
|
95
|
+
}
|
|
93
96
|
|
|
94
97
|
const setup = {
|
|
95
98
|
prompt: {
|
|
@@ -155,7 +158,7 @@ class Evaluator {
|
|
|
155
158
|
if (options.generateSuggestions) {
|
|
156
159
|
// TODO(ian): Move this into its own command/file
|
|
157
160
|
logger.info(`Generating prompt variations...`);
|
|
158
|
-
const { prompts: newPrompts, error } = await generatePrompts(testSuite.prompts[0], 1);
|
|
161
|
+
const { prompts: newPrompts, error } = await generatePrompts(testSuite.prompts[0].raw, 1);
|
|
159
162
|
if (error || !newPrompts) {
|
|
160
163
|
throw new Error(`Failed to generate prompts: ${error}`);
|
|
161
164
|
}
|
|
@@ -178,7 +181,7 @@ class Evaluator {
|
|
|
178
181
|
async (answer) => {
|
|
179
182
|
rl.close();
|
|
180
183
|
if (answer.toLowerCase().startsWith('y')) {
|
|
181
|
-
testSuite.prompts.push(prompt);
|
|
184
|
+
testSuite.prompts.push({ raw: prompt, display: prompt });
|
|
182
185
|
numAdded++;
|
|
183
186
|
} else {
|
|
184
187
|
logger.info('Skipping this prompt.');
|
|
@@ -196,13 +199,13 @@ class Evaluator {
|
|
|
196
199
|
}
|
|
197
200
|
|
|
198
201
|
// Split prompts by provider
|
|
199
|
-
for (const
|
|
202
|
+
for (const prompt of testSuite.prompts) {
|
|
200
203
|
for (const provider of testSuite.providers) {
|
|
201
|
-
const
|
|
202
|
-
testSuite.providers.length > 1 ? `[${provider.id()}] ${
|
|
204
|
+
const updatedDisplay =
|
|
205
|
+
testSuite.providers.length > 1 ? `[${provider.id()}] ${prompt.display}` : prompt.display;
|
|
203
206
|
prompts.push({
|
|
204
|
-
|
|
205
|
-
display,
|
|
207
|
+
...prompt,
|
|
208
|
+
display: updatedDisplay,
|
|
206
209
|
});
|
|
207
210
|
}
|
|
208
211
|
}
|
|
@@ -248,6 +251,7 @@ class Evaluator {
|
|
|
248
251
|
// And progress bar...
|
|
249
252
|
let progressbar: SingleBar | undefined;
|
|
250
253
|
if (options.showProgressBar) {
|
|
254
|
+
// FIXME(ian): Add var combinations too
|
|
251
255
|
const totalNumRuns =
|
|
252
256
|
testSuite.prompts.length * testSuite.providers.length * (tests.length || 1);
|
|
253
257
|
const cliProgress = await import('cli-progress');
|
|
@@ -284,11 +288,14 @@ class Evaluator {
|
|
|
284
288
|
const varCombinations = generateVarCombinations(testCase.vars || {});
|
|
285
289
|
for (const vars of varCombinations) {
|
|
286
290
|
let colIndex = 0;
|
|
287
|
-
for (const
|
|
291
|
+
for (const prompt of testSuite.prompts) {
|
|
288
292
|
for (const provider of testSuite.providers) {
|
|
289
293
|
runEvalOptions.push({
|
|
290
294
|
provider,
|
|
291
|
-
prompt:
|
|
295
|
+
prompt: {
|
|
296
|
+
...prompt,
|
|
297
|
+
raw: prependToPrompt + prompt.raw + appendToPrompt,
|
|
298
|
+
},
|
|
292
299
|
test: { ...testCase, vars },
|
|
293
300
|
includeProviderId: testSuite.providers.length > 1,
|
|
294
301
|
rowIndex,
|
|
@@ -314,7 +321,7 @@ class Evaluator {
|
|
|
314
321
|
if (progressbar) {
|
|
315
322
|
progressbar.increment({
|
|
316
323
|
provider: options.provider.id(),
|
|
317
|
-
prompt: options.prompt.slice(0, 10),
|
|
324
|
+
prompt: options.prompt.raw.slice(0, 10),
|
|
318
325
|
vars: Object.entries(options.test.vars || {})
|
|
319
326
|
.map(([k, v]) => `${k}=${v}`)
|
|
320
327
|
.join(' ')
|
package/src/index.ts
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import { evaluate as doEvaluate } from './evaluator
|
|
2
|
-
import { loadApiProviders } from './providers
|
|
3
|
-
import assertions from './assertions
|
|
4
|
-
import providers from './providers
|
|
1
|
+
import { evaluate as doEvaluate } from './evaluator';
|
|
2
|
+
import { loadApiProviders } from './providers';
|
|
3
|
+
import assertions from './assertions';
|
|
4
|
+
import providers from './providers';
|
|
5
5
|
|
|
6
|
-
import type { EvaluateOptions, TestSuite, TestSuiteConfig } from './types
|
|
7
|
-
import { readTests } from './util
|
|
6
|
+
import type { EvaluateOptions, TestSuite, TestSuiteConfig } from './types';
|
|
7
|
+
import { readTests } from './util';
|
|
8
8
|
|
|
9
|
-
export * from './types
|
|
9
|
+
export * from './types';
|
|
10
10
|
|
|
11
11
|
interface EvaluateTestSuite extends TestSuiteConfig {
|
|
12
12
|
prompts: string[];
|
|
@@ -15,9 +15,14 @@ interface EvaluateTestSuite extends TestSuiteConfig {
|
|
|
15
15
|
async function evaluate(testSuite: EvaluateTestSuite, options: EvaluateOptions = {}) {
|
|
16
16
|
const constructedTestSuite: TestSuite = {
|
|
17
17
|
...testSuite,
|
|
18
|
-
prompts: testSuite.prompts, // raw prompts expected
|
|
19
18
|
providers: await loadApiProviders(testSuite.providers),
|
|
20
19
|
tests: await readTests(testSuite.tests),
|
|
20
|
+
|
|
21
|
+
// Full prompts expected (not filepaths)
|
|
22
|
+
prompts: testSuite.prompts.map((promptContent) => ({
|
|
23
|
+
raw: promptContent,
|
|
24
|
+
display: promptContent,
|
|
25
|
+
})),
|
|
21
26
|
};
|
|
22
27
|
return doEvaluate(constructedTestSuite, options);
|
|
23
28
|
}
|
package/src/main.ts
CHANGED
|
@@ -6,9 +6,9 @@ import Table from 'cli-table3';
|
|
|
6
6
|
import chalk from 'chalk';
|
|
7
7
|
import { Command } from 'commander';
|
|
8
8
|
|
|
9
|
-
import logger, { setLogLevel } from './logger
|
|
10
|
-
import { loadApiProvider, loadApiProviders } from './providers
|
|
11
|
-
import { evaluate } from './evaluator
|
|
9
|
+
import logger, { setLogLevel } from './logger';
|
|
10
|
+
import { loadApiProvider, loadApiProviders } from './providers';
|
|
11
|
+
import { evaluate } from './evaluator';
|
|
12
12
|
import {
|
|
13
13
|
maybeReadConfig,
|
|
14
14
|
readConfig,
|
|
@@ -16,10 +16,10 @@ import {
|
|
|
16
16
|
readTests,
|
|
17
17
|
writeLatestResults,
|
|
18
18
|
writeOutput,
|
|
19
|
-
} from './util
|
|
20
|
-
import { getDirectory } from './esm
|
|
21
|
-
import { init } from './web/server
|
|
22
|
-
import { disableCache } from './cache
|
|
19
|
+
} from './util';
|
|
20
|
+
import { getDirectory } from './esm';
|
|
21
|
+
import { init } from './web/server';
|
|
22
|
+
import { disableCache } from './cache';
|
|
23
23
|
|
|
24
24
|
import type {
|
|
25
25
|
CommandLineOptions,
|
|
@@ -27,8 +27,8 @@ import type {
|
|
|
27
27
|
TestCase,
|
|
28
28
|
TestSuite,
|
|
29
29
|
UnifiedConfig,
|
|
30
|
-
} from './types
|
|
31
|
-
import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding
|
|
30
|
+
} from './types';
|
|
31
|
+
import { DEFAULT_README, DEFAULT_YAML_CONFIG, DEFAULT_PROMPTS } from './onboarding';
|
|
32
32
|
|
|
33
33
|
function createDummyFiles(directory: string | null) {
|
|
34
34
|
if (directory) {
|
|
@@ -178,6 +178,7 @@ async function main() {
|
|
|
178
178
|
prompts: cmdObj.prompts || config.prompts,
|
|
179
179
|
providers: cmdObj.providers || config.providers,
|
|
180
180
|
tests: cmdObj.tests || cmdObj.vars || config.tests,
|
|
181
|
+
defaultTest: config.defaultTest,
|
|
181
182
|
};
|
|
182
183
|
}
|
|
183
184
|
|
package/src/providers/localai.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import logger from '../logger
|
|
2
|
-
import { fetchJsonWithCache } from '../cache
|
|
3
|
-
import { REQUEST_TIMEOUT_MS } from './shared
|
|
1
|
+
import logger from '../logger';
|
|
2
|
+
import { fetchJsonWithCache } from '../cache';
|
|
3
|
+
import { REQUEST_TIMEOUT_MS } from './shared';
|
|
4
4
|
|
|
5
5
|
import type { ApiProvider, ProviderResponse } from '../types.js';
|
|
6
6
|
|
package/src/providers/openai.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import logger from '../logger
|
|
2
|
-
import { fetchJsonWithCache } from '../cache
|
|
3
|
-
import { REQUEST_TIMEOUT_MS } from './shared
|
|
1
|
+
import logger from '../logger';
|
|
2
|
+
import { fetchJsonWithCache } from '../cache';
|
|
3
|
+
import { REQUEST_TIMEOUT_MS } from './shared';
|
|
4
4
|
|
|
5
5
|
import type { ApiProvider, ProviderEmbeddingResponse, ProviderResponse } from '../types.js';
|
|
6
6
|
|
|
@@ -126,12 +126,20 @@ export class OpenAiCompletionProvider extends OpenAiGenericProvider {
|
|
|
126
126
|
);
|
|
127
127
|
}
|
|
128
128
|
|
|
129
|
+
let stop: string;
|
|
130
|
+
try {
|
|
131
|
+
stop = process.env.OPENAI_STOP
|
|
132
|
+
? JSON.parse(process.env.OPENAI_STOP)
|
|
133
|
+
: ['<|im_end|>', '<|endoftext|>'];
|
|
134
|
+
} catch (err) {
|
|
135
|
+
throw new Error(`OPENAI_STOP is not a valid JSON string: ${err}`);
|
|
136
|
+
}
|
|
129
137
|
const body = {
|
|
130
138
|
model: this.modelName,
|
|
131
139
|
prompt,
|
|
132
|
-
max_tokens: process.env.OPENAI_MAX_TOKENS || 1024,
|
|
133
|
-
temperature: options?.temperature ?? (process.env.
|
|
134
|
-
stop
|
|
140
|
+
max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
|
|
141
|
+
temperature: options?.temperature ?? parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
|
|
142
|
+
stop,
|
|
135
143
|
};
|
|
136
144
|
logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
|
|
137
145
|
let data,
|
|
@@ -210,8 +218,8 @@ export class OpenAiChatCompletionProvider extends OpenAiGenericProvider {
|
|
|
210
218
|
const body = {
|
|
211
219
|
model: this.modelName,
|
|
212
220
|
messages: messages,
|
|
213
|
-
max_tokens: process.env.OPENAI_MAX_TOKENS || 1024,
|
|
214
|
-
temperature: options?.temperature ?? (process.env.
|
|
221
|
+
max_tokens: parseInt(process.env.OPENAI_MAX_TOKENS || '1024'),
|
|
222
|
+
temperature: options?.temperature ?? parseFloat(process.env.OPENAI_TEMPERATURE || '0'),
|
|
215
223
|
};
|
|
216
224
|
logger.debug(`Calling OpenAI API: ${JSON.stringify(body)}`);
|
|
217
225
|
|
package/src/providers.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
2
|
|
|
3
|
-
import { ApiProvider } from './types
|
|
3
|
+
import { ApiProvider } from './types';
|
|
4
4
|
|
|
5
|
-
import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai
|
|
6
|
-
import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai
|
|
5
|
+
import { OpenAiCompletionProvider, OpenAiChatCompletionProvider } from './providers/openai';
|
|
6
|
+
import { LocalAiCompletionProvider, LocalAiChatProvider } from './providers/localai';
|
|
7
7
|
|
|
8
8
|
export async function loadApiProviders(providerPaths: string | string[]): Promise<ApiProvider[]> {
|
|
9
9
|
if (typeof providerPaths === 'string') {
|
package/src/types.ts
CHANGED
package/src/util.ts
CHANGED
|
@@ -10,13 +10,13 @@ import { parse as parsePath } from 'path';
|
|
|
10
10
|
import { parse as parseCsv } from 'csv-parse/sync';
|
|
11
11
|
import { stringify } from 'csv-stringify/sync';
|
|
12
12
|
|
|
13
|
-
import logger from './logger
|
|
14
|
-
import { getDirectory } from './esm
|
|
13
|
+
import logger from './logger';
|
|
14
|
+
import { getDirectory } from './esm';
|
|
15
15
|
|
|
16
16
|
import type { RequestInfo, RequestInit, Response } from 'node-fetch';
|
|
17
17
|
|
|
18
|
-
import type { Assertion, CsvRow, EvaluateSummary, UnifiedConfig, TestCase } from './types
|
|
19
|
-
import { assertionFromString } from './assertions
|
|
18
|
+
import type { Assertion, CsvRow, EvaluateSummary, UnifiedConfig, TestCase, Prompt } from './types';
|
|
19
|
+
import { assertionFromString } from './assertions';
|
|
20
20
|
|
|
21
21
|
const PROMPT_DELIMITER = '---';
|
|
22
22
|
|
|
@@ -44,17 +44,36 @@ export function readConfig(configPath: string): UnifiedConfig {
|
|
|
44
44
|
case '.js':
|
|
45
45
|
return require(configPath) as UnifiedConfig;
|
|
46
46
|
case '.yaml':
|
|
47
|
+
case '.yml':
|
|
47
48
|
return yaml.load(fs.readFileSync(configPath, 'utf-8')) as UnifiedConfig;
|
|
48
49
|
default:
|
|
49
50
|
throw new Error(`Unsupported configuration file format: ${ext}`);
|
|
50
51
|
}
|
|
51
52
|
}
|
|
52
53
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
54
|
+
enum PromptInputType {
|
|
55
|
+
STRING = 1,
|
|
56
|
+
ARRAY = 2,
|
|
57
|
+
NAMED = 3,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function readPrompts(
|
|
61
|
+
promptPathOrGlobs: string | string[] | Record<string, string>,
|
|
62
|
+
): Prompt[] {
|
|
63
|
+
let promptPaths: string[] = [];
|
|
64
|
+
let promptContents: Prompt[] = [];
|
|
65
|
+
|
|
66
|
+
let inputType: PromptInputType | undefined;
|
|
67
|
+
if (typeof promptPathOrGlobs === 'string') {
|
|
68
|
+
promptPaths = [promptPathOrGlobs];
|
|
69
|
+
inputType = PromptInputType.STRING;
|
|
70
|
+
} else if (Array.isArray(promptPathOrGlobs)) {
|
|
71
|
+
promptPaths = promptPathOrGlobs.flatMap((pathOrGlob) => globSync(pathOrGlob));
|
|
72
|
+
inputType = PromptInputType.ARRAY;
|
|
73
|
+
} else if (typeof promptPathOrGlobs === 'object') {
|
|
74
|
+
promptPaths = Object.keys(promptPathOrGlobs);
|
|
75
|
+
inputType = PromptInputType.NAMED;
|
|
76
|
+
}
|
|
58
77
|
|
|
59
78
|
for (const promptPath of promptPaths) {
|
|
60
79
|
const stat = fs.statSync(promptPath);
|
|
@@ -63,18 +82,27 @@ export function readPrompts(promptPathsOrGlobs: string | string[]): string[] {
|
|
|
63
82
|
const fileContents = filesInDirectory.map((fileName) =>
|
|
64
83
|
fs.readFileSync(path.join(promptPath, fileName), 'utf-8'),
|
|
65
84
|
);
|
|
66
|
-
promptContents.push(...fileContents);
|
|
85
|
+
promptContents.push(...fileContents.map((content) => ({ raw: content, display: content })));
|
|
67
86
|
} else {
|
|
68
87
|
const fileContent = fs.readFileSync(promptPath, 'utf-8');
|
|
69
|
-
|
|
88
|
+
let display;
|
|
89
|
+
if (inputType === PromptInputType.NAMED) {
|
|
90
|
+
display = (promptPathOrGlobs as Record<string, string>)[promptPath];
|
|
91
|
+
} else {
|
|
92
|
+
display = fileContent.length > 200 ? promptPath : fileContent;
|
|
93
|
+
}
|
|
94
|
+
promptContents.push({ raw: fileContent, display });
|
|
70
95
|
}
|
|
71
96
|
}
|
|
72
97
|
|
|
73
|
-
if (promptContents.length === 1) {
|
|
74
|
-
|
|
98
|
+
if (promptContents.length === 1 && inputType !== PromptInputType.NAMED) {
|
|
99
|
+
const content = promptContents[0].raw;
|
|
100
|
+
promptContents = content
|
|
101
|
+
.split(PROMPT_DELIMITER)
|
|
102
|
+
.map((p) => ({ raw: p.trim(), display: p.trim() }));
|
|
75
103
|
}
|
|
76
104
|
if (promptContents.length === 0) {
|
|
77
|
-
throw new Error(`There are no prompts in ${
|
|
105
|
+
throw new Error(`There are no prompts in ${JSON.stringify(promptPathOrGlobs)}`);
|
|
78
106
|
}
|
|
79
107
|
return promptContents;
|
|
80
108
|
}
|