promptfoo 0.103.2 → 0.103.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +9 -9
- package/dist/src/app/assets/index-BR1tgrAf.css +1 -0
- package/dist/src/app/assets/{index-ziw_4_A9.js → index-Br_ykdEV.js} +231 -231
- package/dist/src/app/assets/{index.es-XehlSHxK.js → index.es-lNQS-wgf.js} +1 -1
- package/dist/src/app/assets/{sync-DDIaa9Ut.js → sync-CRhW4wge.js} +1 -1
- package/dist/src/app/index.html +2 -2
- package/dist/src/assertions/index.d.ts +2 -1
- package/dist/src/assertions/index.d.ts.map +1 -1
- package/dist/src/assertions/index.js +29 -5
- package/dist/src/assertions/index.js.map +1 -1
- package/dist/src/assertions/utils.d.ts +1 -0
- package/dist/src/assertions/utils.d.ts.map +1 -1
- package/dist/src/commands/debug.d.ts.map +1 -1
- package/dist/src/commands/debug.js +6 -0
- package/dist/src/commands/debug.js.map +1 -1
- package/dist/src/commands/eval/filterErrorTests.d.ts +5 -0
- package/dist/src/commands/eval/filterErrorTests.d.ts.map +1 -0
- package/dist/src/commands/eval/filterErrorTests.js +19 -0
- package/dist/src/commands/eval/filterErrorTests.js.map +1 -0
- package/dist/src/commands/eval/filterTests.d.ts +1 -0
- package/dist/src/commands/eval/filterTests.d.ts.map +1 -1
- package/dist/src/commands/eval/filterTests.js +4 -0
- package/dist/src/commands/eval/filterTests.js.map +1 -1
- package/dist/src/commands/eval.d.ts.map +1 -1
- package/dist/src/commands/eval.js +1 -0
- package/dist/src/commands/eval.js.map +1 -1
- package/dist/src/database/tables.d.ts +6 -0
- package/dist/src/database/tables.d.ts.map +1 -1
- package/dist/src/envars.d.ts +2 -0
- package/dist/src/envars.d.ts.map +1 -1
- package/dist/src/envars.js.map +1 -1
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +2 -1
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/models/evalResult.d.ts.map +1 -1
- package/dist/src/models/evalResult.js +9 -1
- package/dist/src/models/evalResult.js.map +1 -1
- package/dist/src/providers/llama.d.ts.map +1 -1
- package/dist/src/providers/llama.js +8 -1
- package/dist/src/providers/llama.js.map +1 -1
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +6 -13
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/vertex.d.ts +3 -0
- package/dist/src/providers/vertex.d.ts.map +1 -1
- package/dist/src/providers/vertex.js +23 -9
- package/dist/src/providers/vertex.js.map +1 -1
- package/dist/src/providers/watsonx.d.ts.map +1 -1
- package/dist/src/providers/watsonx.js +9 -0
- package/dist/src/providers/watsonx.js.map +1 -1
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +15 -0
- package/dist/src/providers.js.map +1 -1
- package/dist/src/redteam/commands/generate.d.ts.map +1 -1
- package/dist/src/redteam/commands/generate.js +4 -0
- package/dist/src/redteam/commands/generate.js.map +1 -1
- package/dist/src/redteam/commands/run.d.ts.map +1 -1
- package/dist/src/redteam/commands/run.js +5 -4
- package/dist/src/redteam/commands/run.js.map +1 -1
- package/dist/src/redteam/constants.d.ts +6 -2
- package/dist/src/redteam/constants.d.ts.map +1 -1
- package/dist/src/redteam/constants.js +78 -4
- package/dist/src/redteam/constants.js.map +1 -1
- package/dist/src/redteam/index.d.ts.map +1 -1
- package/dist/src/redteam/index.js +2 -3
- package/dist/src/redteam/index.js.map +1 -1
- package/dist/src/redteam/plugins/cyberseceval.d.ts +9 -0
- package/dist/src/redteam/plugins/cyberseceval.d.ts.map +1 -0
- package/dist/src/redteam/plugins/cyberseceval.js +86 -0
- package/dist/src/redteam/plugins/cyberseceval.js.map +1 -0
- package/dist/src/redteam/plugins/index.d.ts.map +1 -1
- package/dist/src/redteam/plugins/index.js +10 -7
- package/dist/src/redteam/plugins/index.js.map +1 -1
- package/dist/src/redteam/providers/iterative.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterative.js +59 -5
- package/dist/src/redteam/providers/iterative.js.map +1 -1
- package/dist/src/redteam/providers/iterativeImage.d.ts +6 -2
- package/dist/src/redteam/providers/iterativeImage.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterativeImage.js +322 -131
- package/dist/src/redteam/providers/iterativeImage.js.map +1 -1
- package/dist/src/redteam/providers/iterativeTree.d.ts +37 -26
- package/dist/src/redteam/providers/iterativeTree.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterativeTree.js +193 -85
- package/dist/src/redteam/providers/iterativeTree.js.map +1 -1
- package/dist/src/redteam/util.d.ts.map +1 -1
- package/dist/src/redteam/util.js +14 -0
- package/dist/src/redteam/util.js.map +1 -1
- package/dist/src/server/routes/providers.js +11 -6
- package/dist/src/server/routes/providers.js.map +1 -1
- package/dist/src/types/env.d.ts +2 -0
- package/dist/src/types/env.d.ts.map +1 -1
- package/dist/src/types/index.d.ts +161 -9
- package/dist/src/types/index.d.ts.map +1 -1
- package/dist/src/types/index.js +4 -1
- package/dist/src/types/index.js.map +1 -1
- package/dist/src/types/providers.d.ts +22 -0
- package/dist/src/types/providers.d.ts.map +1 -1
- package/dist/src/types/providers.js.map +1 -1
- package/dist/src/util/apiHealth.d.ts.map +1 -1
- package/dist/src/util/apiHealth.js +48 -3
- package/dist/src/util/apiHealth.js.map +1 -1
- package/dist/src/util/config/manage.d.ts +1 -1
- package/dist/src/util/config/manage.d.ts.map +1 -1
- package/dist/src/util/config/manage.js.map +1 -1
- package/dist/src/util/index.d.ts +2 -0
- package/dist/src/util/index.d.ts.map +1 -1
- package/dist/test/assertions/index.test.js +56 -475
- package/dist/test/assertions/index.test.js.map +1 -1
- package/dist/test/assertions/javascript.test.d.ts +2 -0
- package/dist/test/assertions/javascript.test.d.ts.map +1 -0
- package/dist/test/assertions/javascript.test.js +679 -0
- package/dist/test/assertions/javascript.test.js.map +1 -0
- package/dist/test/assertions/python.test.d.ts +2 -0
- package/dist/test/assertions/python.test.d.ts.map +1 -0
- package/dist/test/assertions/python.test.js +342 -0
- package/dist/test/assertions/python.test.js.map +1 -0
- package/dist/test/cache.test.js +297 -100
- package/dist/test/cache.test.js.map +1 -1
- package/dist/test/commands/eval/filterErrorTests.test.d.ts +2 -0
- package/dist/test/commands/eval/filterErrorTests.test.d.ts.map +1 -0
- package/dist/test/commands/eval/filterErrorTests.test.js +110 -0
- package/dist/test/commands/eval/filterErrorTests.test.js.map +1 -0
- package/dist/test/evaluator.test.js +10 -0
- package/dist/test/evaluator.test.js.map +1 -1
- package/dist/test/factories/evalFactory.d.ts +4 -0
- package/dist/test/factories/evalFactory.d.ts.map +1 -1
- package/dist/test/models/eval.test.js +12 -0
- package/dist/test/models/eval.test.js.map +1 -1
- package/dist/test/providers/index.test.js +6 -3
- package/dist/test/providers/index.test.js.map +1 -1
- package/dist/test/providers/mistral.test.js +28 -19
- package/dist/test/providers/mistral.test.js.map +1 -1
- package/dist/test/providers/watsonx.test.js +58 -0
- package/dist/test/providers/watsonx.test.js.map +1 -1
- package/dist/test/redteam/index.test.js +0 -11
- package/dist/test/redteam/index.test.js.map +1 -1
- package/dist/test/redteam/providers/iterativeTree.test.js +329 -98
- package/dist/test/redteam/providers/iterativeTree.test.js.map +1 -1
- package/dist/test/server/providers.test.js +4 -4
- package/dist/test/server/providers.test.js.map +1 -1
- package/dist/test/util/config/main.test.js +3 -0
- package/dist/test/util/config/main.test.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +9 -9
- package/dist/src/app/assets/index-DdUNCsxz.css +0 -1
|
@@ -38,15 +38,12 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
38
38
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
39
|
const dedent_1 = __importDefault(require("dedent"));
|
|
40
40
|
const fs = __importStar(require("fs"));
|
|
41
|
-
const node_module_1 = require("node:module");
|
|
42
41
|
const path = __importStar(require("path"));
|
|
43
42
|
const assertions_1 = require("../../src/assertions");
|
|
44
43
|
const xml_1 = require("../../src/assertions/xml");
|
|
45
44
|
const fetch_1 = require("../../src/fetch");
|
|
46
45
|
const openai_1 = require("../../src/providers/openai");
|
|
47
46
|
const replicate_1 = require("../../src/providers/replicate");
|
|
48
|
-
const pythonUtils_1 = require("../../src/python/pythonUtils");
|
|
49
|
-
const wrapper_1 = require("../../src/python/wrapper");
|
|
50
47
|
const utils_1 = require("../util/utils");
|
|
51
48
|
jest.mock('../../src/redteam/remoteGeneration', () => ({
|
|
52
49
|
shouldGenerateRemote: jest.fn().mockReturnValue(false),
|
|
@@ -69,20 +66,6 @@ jest.mock('../../src/fetch', () => {
|
|
|
69
66
|
fetchWithRetries: jest.fn(actual.fetchWithRetries),
|
|
70
67
|
};
|
|
71
68
|
});
|
|
72
|
-
jest.mock('../../src/python/wrapper', () => {
|
|
73
|
-
const actual = jest.requireActual('../../src/python/wrapper');
|
|
74
|
-
return {
|
|
75
|
-
...actual,
|
|
76
|
-
runPythonCode: jest.fn(actual.runPythonCode),
|
|
77
|
-
};
|
|
78
|
-
});
|
|
79
|
-
jest.mock('../../src/python/pythonUtils', () => {
|
|
80
|
-
const actual = jest.requireActual('../../src/python/pythonUtils');
|
|
81
|
-
return {
|
|
82
|
-
...actual,
|
|
83
|
-
runPython: jest.fn(actual.runPython),
|
|
84
|
-
};
|
|
85
|
-
});
|
|
86
69
|
jest.mock('glob', () => ({
|
|
87
70
|
globSync: jest.fn(),
|
|
88
71
|
}));
|
|
@@ -551,22 +534,6 @@ describe('runAssertion', () => {
|
|
|
551
534
|
type: 'javascript',
|
|
552
535
|
value: 'output === "Expected output"',
|
|
553
536
|
};
|
|
554
|
-
const javascriptMultilineStringAssertion = {
|
|
555
|
-
type: 'javascript',
|
|
556
|
-
value: `
|
|
557
|
-
if (output === "Expected output") {
|
|
558
|
-
return {
|
|
559
|
-
pass: true,
|
|
560
|
-
score: 0.5,
|
|
561
|
-
reason: 'Assertion passed',
|
|
562
|
-
};
|
|
563
|
-
}
|
|
564
|
-
return {
|
|
565
|
-
pass: false,
|
|
566
|
-
score: 0,
|
|
567
|
-
reason: 'Assertion failed',
|
|
568
|
-
};`,
|
|
569
|
-
};
|
|
570
537
|
const javascriptStringAssertionWithNumber = {
|
|
571
538
|
type: 'javascript',
|
|
572
539
|
value: 'output.length * 10',
|
|
@@ -578,29 +545,15 @@ describe('runAssertion', () => {
|
|
|
578
545
|
maximumOutputSize: 20,
|
|
579
546
|
},
|
|
580
547
|
};
|
|
548
|
+
const javascriptBooleanAssertionWithoutConfig = {
|
|
549
|
+
type: 'javascript',
|
|
550
|
+
value: 'output.length <= context.config.maximumOutputSize',
|
|
551
|
+
};
|
|
581
552
|
const javascriptStringAssertionWithNumberAndThreshold = {
|
|
582
553
|
type: 'javascript',
|
|
583
554
|
value: 'output.length * 10',
|
|
584
555
|
threshold: 0.5,
|
|
585
556
|
};
|
|
586
|
-
const javascriptFunctionAssertion = {
|
|
587
|
-
type: 'javascript',
|
|
588
|
-
value: async (output) => ({
|
|
589
|
-
pass: true,
|
|
590
|
-
score: 0.5,
|
|
591
|
-
reason: 'Assertion passed',
|
|
592
|
-
assertion: null,
|
|
593
|
-
}),
|
|
594
|
-
};
|
|
595
|
-
const javascriptFunctionFailAssertion = {
|
|
596
|
-
type: 'javascript',
|
|
597
|
-
value: async (output) => ({
|
|
598
|
-
pass: false,
|
|
599
|
-
score: 0.5,
|
|
600
|
-
reason: 'Assertion failed',
|
|
601
|
-
assertion: null,
|
|
602
|
-
}),
|
|
603
|
-
};
|
|
604
557
|
it('should pass when the equality assertion passes', async () => {
|
|
605
558
|
const output = 'Expected output';
|
|
606
559
|
const result = await (0, assertions_1.runAssertion)({
|
|
@@ -1419,6 +1372,58 @@ describe('runAssertion', () => {
|
|
|
1419
1372
|
reason: 'Assertion passed',
|
|
1420
1373
|
});
|
|
1421
1374
|
});
|
|
1375
|
+
it('should disregard invalid inputs for assert index', async () => {
|
|
1376
|
+
const output = 'Expected output';
|
|
1377
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1378
|
+
prompt: 'Some prompt',
|
|
1379
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1380
|
+
assertion: javascriptBooleanAssertionWithConfig,
|
|
1381
|
+
test: {
|
|
1382
|
+
assert: [
|
|
1383
|
+
{
|
|
1384
|
+
type: 'javascript',
|
|
1385
|
+
value: 'output.length <= context.config.maximumOutputSize',
|
|
1386
|
+
config: {
|
|
1387
|
+
maximumOutputSize: 1,
|
|
1388
|
+
},
|
|
1389
|
+
},
|
|
1390
|
+
],
|
|
1391
|
+
},
|
|
1392
|
+
providerResponse: { output },
|
|
1393
|
+
assertIndex: 45,
|
|
1394
|
+
});
|
|
1395
|
+
expect(result).toMatchObject({
|
|
1396
|
+
pass: true,
|
|
1397
|
+
score: 1.0,
|
|
1398
|
+
reason: 'Assertion passed',
|
|
1399
|
+
});
|
|
1400
|
+
});
|
|
1401
|
+
it('should correctly set configuration from the test case when assert index is valid', async () => {
|
|
1402
|
+
const output = 'Expected output';
|
|
1403
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1404
|
+
prompt: 'Some prompt',
|
|
1405
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1406
|
+
assertion: javascriptBooleanAssertionWithoutConfig,
|
|
1407
|
+
test: {
|
|
1408
|
+
assert: [
|
|
1409
|
+
{
|
|
1410
|
+
type: 'javascript',
|
|
1411
|
+
value: 'output.length <= context.config.maximumOutputSize',
|
|
1412
|
+
config: {
|
|
1413
|
+
maximumOutputSize: 50,
|
|
1414
|
+
},
|
|
1415
|
+
},
|
|
1416
|
+
],
|
|
1417
|
+
},
|
|
1418
|
+
providerResponse: { output },
|
|
1419
|
+
assertIndex: 0,
|
|
1420
|
+
});
|
|
1421
|
+
expect(result).toMatchObject({
|
|
1422
|
+
pass: true,
|
|
1423
|
+
score: 1.0,
|
|
1424
|
+
reason: 'Assertion passed',
|
|
1425
|
+
});
|
|
1426
|
+
});
|
|
1422
1427
|
it('should fail when javascript returns an output string that is larger than the maximum size threshold', async () => {
|
|
1423
1428
|
const output = 'Expected output with some extra characters';
|
|
1424
1429
|
const result = await (0, assertions_1.runAssertion)({
|
|
@@ -1515,100 +1520,6 @@ describe('runAssertion', () => {
|
|
|
1515
1520
|
reason: 'Assertion passed',
|
|
1516
1521
|
});
|
|
1517
1522
|
});
|
|
1518
|
-
it('should pass when javascript function assertion passes - with vars', async () => {
|
|
1519
|
-
const output = 'Expected output';
|
|
1520
|
-
const javascriptStringAssertionWithVars = {
|
|
1521
|
-
type: 'javascript',
|
|
1522
|
-
value: 'output === "Expected output" && context.vars.foo === "bar"',
|
|
1523
|
-
};
|
|
1524
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
1525
|
-
prompt: 'Some prompt',
|
|
1526
|
-
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1527
|
-
assertion: javascriptStringAssertionWithVars,
|
|
1528
|
-
test: { vars: { foo: 'bar' } },
|
|
1529
|
-
providerResponse: { output },
|
|
1530
|
-
});
|
|
1531
|
-
expect(result).toMatchObject({
|
|
1532
|
-
pass: true,
|
|
1533
|
-
reason: 'Assertion passed',
|
|
1534
|
-
});
|
|
1535
|
-
});
|
|
1536
|
-
it('should fail when the javascript does not match vars', async () => {
|
|
1537
|
-
const output = 'Expected output';
|
|
1538
|
-
const javascriptStringAssertionWithVars = {
|
|
1539
|
-
type: 'javascript',
|
|
1540
|
-
value: 'output === "Expected output" && context.vars.foo === "something else"',
|
|
1541
|
-
};
|
|
1542
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
1543
|
-
prompt: 'Some prompt',
|
|
1544
|
-
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1545
|
-
assertion: javascriptStringAssertionWithVars,
|
|
1546
|
-
test: { vars: { foo: 'bar' } },
|
|
1547
|
-
providerResponse: { output },
|
|
1548
|
-
});
|
|
1549
|
-
expect(result).toMatchObject({
|
|
1550
|
-
pass: false,
|
|
1551
|
-
reason: 'Custom function returned false\noutput === "Expected output" && context.vars.foo === "something else"',
|
|
1552
|
-
});
|
|
1553
|
-
});
|
|
1554
|
-
it('should pass when the function returns pass', async () => {
|
|
1555
|
-
const output = 'Expected output';
|
|
1556
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
1557
|
-
prompt: 'Some prompt',
|
|
1558
|
-
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1559
|
-
assertion: javascriptFunctionAssertion,
|
|
1560
|
-
test: {},
|
|
1561
|
-
providerResponse: { output },
|
|
1562
|
-
});
|
|
1563
|
-
expect(result).toMatchObject({
|
|
1564
|
-
pass: true,
|
|
1565
|
-
score: 0.5,
|
|
1566
|
-
reason: 'Assertion passed',
|
|
1567
|
-
});
|
|
1568
|
-
});
|
|
1569
|
-
it('should fail when the function returns fail', async () => {
|
|
1570
|
-
const output = 'Expected output';
|
|
1571
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
1572
|
-
prompt: 'Some prompt',
|
|
1573
|
-
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1574
|
-
assertion: javascriptFunctionFailAssertion,
|
|
1575
|
-
test: {},
|
|
1576
|
-
providerResponse: { output },
|
|
1577
|
-
});
|
|
1578
|
-
expect(result).toMatchObject({
|
|
1579
|
-
pass: false,
|
|
1580
|
-
score: 0.5,
|
|
1581
|
-
reason: 'Assertion failed',
|
|
1582
|
-
});
|
|
1583
|
-
});
|
|
1584
|
-
it('should pass when the multiline javascript assertion passes', async () => {
|
|
1585
|
-
const output = 'Expected output';
|
|
1586
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
1587
|
-
prompt: 'Some prompt',
|
|
1588
|
-
assertion: javascriptMultilineStringAssertion,
|
|
1589
|
-
test: {},
|
|
1590
|
-
providerResponse: { output },
|
|
1591
|
-
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1592
|
-
});
|
|
1593
|
-
expect(result).toMatchObject({
|
|
1594
|
-
pass: true,
|
|
1595
|
-
reason: 'Assertion passed',
|
|
1596
|
-
});
|
|
1597
|
-
});
|
|
1598
|
-
it('should pass when the multiline javascript assertion fails', async () => {
|
|
1599
|
-
const output = 'Not the expected output';
|
|
1600
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
1601
|
-
prompt: 'Some prompt',
|
|
1602
|
-
assertion: javascriptMultilineStringAssertion,
|
|
1603
|
-
test: {},
|
|
1604
|
-
providerResponse: { output },
|
|
1605
|
-
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1606
|
-
});
|
|
1607
|
-
expect(result).toMatchObject({
|
|
1608
|
-
pass: false,
|
|
1609
|
-
reason: 'Assertion failed',
|
|
1610
|
-
});
|
|
1611
|
-
});
|
|
1612
1523
|
const notContainsAssertion = {
|
|
1613
1524
|
type: 'not-contains',
|
|
1614
1525
|
value: 'Unexpected output',
|
|
@@ -2103,336 +2014,6 @@ describe('runAssertion', () => {
|
|
|
2103
2014
|
reason: 'Levenshtein distance 8 is greater than threshold 5',
|
|
2104
2015
|
});
|
|
2105
2016
|
});
|
|
2106
|
-
it.each([
|
|
2107
|
-
[
|
|
2108
|
-
'boolean',
|
|
2109
|
-
jest.fn((output) => output === 'Expected output'),
|
|
2110
|
-
true,
|
|
2111
|
-
'Assertion passed',
|
|
2112
|
-
],
|
|
2113
|
-
['number', jest.fn((output) => output.length), true, 'Assertion passed'],
|
|
2114
|
-
[
|
|
2115
|
-
'GradingResult',
|
|
2116
|
-
jest.fn((output) => ({ pass: true, score: 1, reason: 'Custom reason' })),
|
|
2117
|
-
true,
|
|
2118
|
-
'Custom reason',
|
|
2119
|
-
],
|
|
2120
|
-
[
|
|
2121
|
-
'boolean',
|
|
2122
|
-
jest.fn((output) => output !== 'Expected output'),
|
|
2123
|
-
false,
|
|
2124
|
-
'Custom function returned false',
|
|
2125
|
-
],
|
|
2126
|
-
['number', jest.fn((output) => 0), false, 'Custom function returned false'],
|
|
2127
|
-
[
|
|
2128
|
-
'GradingResult',
|
|
2129
|
-
jest.fn((output) => ({ pass: false, score: 0.1, reason: 'Custom reason' })),
|
|
2130
|
-
false,
|
|
2131
|
-
'Custom reason',
|
|
2132
|
-
],
|
|
2133
|
-
[
|
|
2134
|
-
'boolean Promise',
|
|
2135
|
-
jest.fn((output) => Promise.resolve(true)),
|
|
2136
|
-
true,
|
|
2137
|
-
'Assertion passed',
|
|
2138
|
-
],
|
|
2139
|
-
])('should pass when the file:// assertion with .js file returns a %s', async (type, mockFn, expectedPass, expectedReason) => {
|
|
2140
|
-
const output = 'Expected output';
|
|
2141
|
-
jest.doMock(path.resolve('/path/to/assert.js'), () => mockFn, { virtual: true });
|
|
2142
|
-
const fileAssertion = {
|
|
2143
|
-
type: 'javascript',
|
|
2144
|
-
value: 'file:///path/to/assert.js',
|
|
2145
|
-
};
|
|
2146
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2147
|
-
const providerResponse = { output };
|
|
2148
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2149
|
-
prompt: 'Some prompt',
|
|
2150
|
-
provider,
|
|
2151
|
-
assertion: fileAssertion,
|
|
2152
|
-
test: {},
|
|
2153
|
-
providerResponse,
|
|
2154
|
-
});
|
|
2155
|
-
expect(mockFn).toHaveBeenCalledWith('Expected output', {
|
|
2156
|
-
prompt: 'Some prompt',
|
|
2157
|
-
vars: {},
|
|
2158
|
-
test: {},
|
|
2159
|
-
provider,
|
|
2160
|
-
providerResponse,
|
|
2161
|
-
});
|
|
2162
|
-
expect(result).toMatchObject({
|
|
2163
|
-
pass: expectedPass,
|
|
2164
|
-
reason: expect.stringContaining(expectedReason),
|
|
2165
|
-
});
|
|
2166
|
-
});
|
|
2167
|
-
it.each([
|
|
2168
|
-
[
|
|
2169
|
-
'boolean',
|
|
2170
|
-
jest.fn((output) => output === 'Expected output'),
|
|
2171
|
-
true,
|
|
2172
|
-
'Assertion passed',
|
|
2173
|
-
],
|
|
2174
|
-
['number', jest.fn((output) => output.length), true, 'Assertion passed'],
|
|
2175
|
-
[
|
|
2176
|
-
'GradingResult',
|
|
2177
|
-
jest.fn((output) => ({ pass: true, score: 1, reason: 'Custom reason' })),
|
|
2178
|
-
true,
|
|
2179
|
-
'Custom reason',
|
|
2180
|
-
],
|
|
2181
|
-
[
|
|
2182
|
-
'boolean',
|
|
2183
|
-
jest.fn((output) => output !== 'Expected output'),
|
|
2184
|
-
false,
|
|
2185
|
-
'Custom function returned false',
|
|
2186
|
-
],
|
|
2187
|
-
['number', jest.fn((output) => 0), false, 'Custom function returned false'],
|
|
2188
|
-
[
|
|
2189
|
-
'GradingResult',
|
|
2190
|
-
jest.fn((output) => ({ pass: false, score: 0.1, reason: 'Custom reason' })),
|
|
2191
|
-
false,
|
|
2192
|
-
'Custom reason',
|
|
2193
|
-
],
|
|
2194
|
-
[
|
|
2195
|
-
'boolean Promise',
|
|
2196
|
-
jest.fn((output) => Promise.resolve(true)),
|
|
2197
|
-
true,
|
|
2198
|
-
'Assertion passed',
|
|
2199
|
-
],
|
|
2200
|
-
])('should pass when assertion is a package path', async (type, mockFn, expectedPass, expectedReason) => {
|
|
2201
|
-
const output = 'Expected output';
|
|
2202
|
-
const require = (0, node_module_1.createRequire)('');
|
|
2203
|
-
jest.spyOn(require, 'resolve').mockReturnValueOnce('/node_modules/@promptfoo/fake/index.js');
|
|
2204
|
-
jest.doMock(path.resolve('/node_modules/@promptfoo/fake/index.js'), () => {
|
|
2205
|
-
return {
|
|
2206
|
-
assertionFunction: mockFn,
|
|
2207
|
-
};
|
|
2208
|
-
}, { virtual: true });
|
|
2209
|
-
const fileAssertion = {
|
|
2210
|
-
type: 'javascript',
|
|
2211
|
-
value: 'package:@promptfoo/fake:assertionFunction',
|
|
2212
|
-
};
|
|
2213
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2214
|
-
const providerResponse = { output };
|
|
2215
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2216
|
-
prompt: 'Some prompt',
|
|
2217
|
-
provider,
|
|
2218
|
-
assertion: fileAssertion,
|
|
2219
|
-
test: {},
|
|
2220
|
-
providerResponse,
|
|
2221
|
-
});
|
|
2222
|
-
expect(mockFn).toHaveBeenCalledWith('Expected output', {
|
|
2223
|
-
prompt: 'Some prompt',
|
|
2224
|
-
vars: {},
|
|
2225
|
-
test: {},
|
|
2226
|
-
provider,
|
|
2227
|
-
providerResponse,
|
|
2228
|
-
});
|
|
2229
|
-
expect(result).toMatchObject({
|
|
2230
|
-
pass: expectedPass,
|
|
2231
|
-
reason: expect.stringContaining(expectedReason),
|
|
2232
|
-
});
|
|
2233
|
-
});
|
|
2234
|
-
it('should resolve js paths relative to the configuration file', async () => {
|
|
2235
|
-
const output = 'Expected output';
|
|
2236
|
-
const mockFn = jest.fn((output) => output === 'Expected output');
|
|
2237
|
-
jest.doMock(path.resolve('/base/path/path/to/assert.js'), () => mockFn, { virtual: true });
|
|
2238
|
-
const fileAssertion = {
|
|
2239
|
-
type: 'javascript',
|
|
2240
|
-
value: 'file://./path/to/assert.js',
|
|
2241
|
-
};
|
|
2242
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2243
|
-
const providerResponse = { output };
|
|
2244
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2245
|
-
prompt: 'Some prompt',
|
|
2246
|
-
provider,
|
|
2247
|
-
assertion: fileAssertion,
|
|
2248
|
-
test: {},
|
|
2249
|
-
providerResponse,
|
|
2250
|
-
});
|
|
2251
|
-
expect(mockFn).toHaveBeenCalledWith('Expected output', {
|
|
2252
|
-
prompt: 'Some prompt',
|
|
2253
|
-
vars: {},
|
|
2254
|
-
test: {},
|
|
2255
|
-
provider,
|
|
2256
|
-
providerResponse,
|
|
2257
|
-
});
|
|
2258
|
-
expect(result).toMatchObject({
|
|
2259
|
-
pass: true,
|
|
2260
|
-
reason: 'Assertion passed',
|
|
2261
|
-
});
|
|
2262
|
-
});
|
|
2263
|
-
it('should handle output strings with both single and double quotes correctly in python assertion', async () => {
|
|
2264
|
-
const expectedPythonValue = '0.5';
|
|
2265
|
-
jest.mocked(wrapper_1.runPythonCode).mockResolvedValueOnce(expectedPythonValue);
|
|
2266
|
-
const output = 'This is a string with "double quotes"\n and \'single quotes\' \n\n and some \n\t newlines.';
|
|
2267
|
-
const pythonAssertion = {
|
|
2268
|
-
type: 'python',
|
|
2269
|
-
value: expectedPythonValue,
|
|
2270
|
-
};
|
|
2271
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2272
|
-
const providerResponse = { output };
|
|
2273
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2274
|
-
prompt: 'Some prompt',
|
|
2275
|
-
provider,
|
|
2276
|
-
assertion: pythonAssertion,
|
|
2277
|
-
test: {},
|
|
2278
|
-
providerResponse,
|
|
2279
|
-
});
|
|
2280
|
-
expect(wrapper_1.runPythonCode).toHaveBeenCalledTimes(1);
|
|
2281
|
-
expect(wrapper_1.runPythonCode).toHaveBeenCalledWith(expect.anything(), 'main', [
|
|
2282
|
-
output,
|
|
2283
|
-
{ prompt: 'Some prompt', test: {}, vars: {}, provider, providerResponse },
|
|
2284
|
-
]);
|
|
2285
|
-
expect(result).toMatchObject({
|
|
2286
|
-
pass: true,
|
|
2287
|
-
reason: 'Assertion passed',
|
|
2288
|
-
score: Number(expectedPythonValue),
|
|
2289
|
-
});
|
|
2290
|
-
});
|
|
2291
|
-
it.each([
|
|
2292
|
-
['boolean', false, 0, 'Python code returned false', false, undefined],
|
|
2293
|
-
['number', 0, 0, 'Python code returned false', false, undefined],
|
|
2294
|
-
[
|
|
2295
|
-
'GradingResult',
|
|
2296
|
-
`{"pass": false, "score": 0, "reason": "Custom error"}`,
|
|
2297
|
-
0,
|
|
2298
|
-
'Custom error',
|
|
2299
|
-
false,
|
|
2300
|
-
undefined,
|
|
2301
|
-
],
|
|
2302
|
-
['boolean', true, 1, 'Assertion passed', true, undefined],
|
|
2303
|
-
['number', 1, 1, 'Assertion passed', true, undefined],
|
|
2304
|
-
[
|
|
2305
|
-
'GradingResult',
|
|
2306
|
-
`{"pass": true, "score": 1, "reason": "Custom success"}`,
|
|
2307
|
-
1,
|
|
2308
|
-
'Custom success',
|
|
2309
|
-
true,
|
|
2310
|
-
undefined,
|
|
2311
|
-
],
|
|
2312
|
-
[
|
|
2313
|
-
'GradingResult',
|
|
2314
|
-
// This score is less than the assertion threshold in the test
|
|
2315
|
-
`{"pass": true, "score": 0.4, "reason": "Foo bar"}`,
|
|
2316
|
-
0.4,
|
|
2317
|
-
'Python score 0.4 is less than threshold 0.5',
|
|
2318
|
-
false,
|
|
2319
|
-
0.5,
|
|
2320
|
-
],
|
|
2321
|
-
])('should handle inline return type %s with return value: %p', async (type, returnValue, expectedScore, expectedReason, expectedPass, threshold) => {
|
|
2322
|
-
const output = 'This is a string with "double quotes"\n and \'single quotes\' \n\n and some \n\t newlines.';
|
|
2323
|
-
let resolvedValue;
|
|
2324
|
-
if (type === 'GradingResult') {
|
|
2325
|
-
resolvedValue = JSON.parse(returnValue);
|
|
2326
|
-
}
|
|
2327
|
-
else {
|
|
2328
|
-
resolvedValue = returnValue;
|
|
2329
|
-
}
|
|
2330
|
-
const pythonAssertion = {
|
|
2331
|
-
type: 'python',
|
|
2332
|
-
value: returnValue.toString(),
|
|
2333
|
-
threshold,
|
|
2334
|
-
};
|
|
2335
|
-
jest.mocked(wrapper_1.runPythonCode).mockResolvedValueOnce(resolvedValue);
|
|
2336
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2337
|
-
const providerResponse = { output };
|
|
2338
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2339
|
-
prompt: 'Some prompt',
|
|
2340
|
-
provider,
|
|
2341
|
-
assertion: pythonAssertion,
|
|
2342
|
-
test: {},
|
|
2343
|
-
providerResponse,
|
|
2344
|
-
});
|
|
2345
|
-
expect(wrapper_1.runPythonCode).toHaveBeenCalledTimes(1);
|
|
2346
|
-
expect(wrapper_1.runPythonCode).toHaveBeenCalledWith(expect.anything(), 'main', [
|
|
2347
|
-
output,
|
|
2348
|
-
{ prompt: 'Some prompt', test: {}, vars: {}, provider, providerResponse },
|
|
2349
|
-
]);
|
|
2350
|
-
expect(result).toMatchObject({
|
|
2351
|
-
pass: expectedPass,
|
|
2352
|
-
reason: expect.stringMatching(expectedReason),
|
|
2353
|
-
score: expectedScore,
|
|
2354
|
-
});
|
|
2355
|
-
});
|
|
2356
|
-
it.each([
|
|
2357
|
-
['boolean', 'True', true, 'Assertion passed'],
|
|
2358
|
-
['number', '0.5', true, 'Assertion passed'],
|
|
2359
|
-
['boolean', true, true, 'Assertion passed'],
|
|
2360
|
-
['number', 0.5, true, 'Assertion passed'],
|
|
2361
|
-
[
|
|
2362
|
-
'GradingResult',
|
|
2363
|
-
'{"pass": true, "score": 1, "reason": "Custom reason"}',
|
|
2364
|
-
true,
|
|
2365
|
-
'Custom reason',
|
|
2366
|
-
],
|
|
2367
|
-
['boolean', 'False', false, 'Python code returned false'],
|
|
2368
|
-
['number', '0', false, 'Python code returned false'],
|
|
2369
|
-
[
|
|
2370
|
-
'GradingResult',
|
|
2371
|
-
'{"pass": false, "score": 0, "reason": "Custom reason"}',
|
|
2372
|
-
false,
|
|
2373
|
-
'Custom reason',
|
|
2374
|
-
],
|
|
2375
|
-
])('should handle when the file:// assertion with .py file returns a %s', async (type, pythonOutput, expectedPass, expectedReason) => {
|
|
2376
|
-
const output = 'Expected output';
|
|
2377
|
-
jest.mocked(pythonUtils_1.runPython).mockResolvedValueOnce(pythonOutput);
|
|
2378
|
-
const fileAssertion = {
|
|
2379
|
-
type: 'python',
|
|
2380
|
-
value: 'file:///path/to/assert.py',
|
|
2381
|
-
};
|
|
2382
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2383
|
-
const providerResponse = { output };
|
|
2384
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2385
|
-
prompt: 'Some prompt that includes "double quotes" and \'single quotes\'',
|
|
2386
|
-
provider,
|
|
2387
|
-
assertion: fileAssertion,
|
|
2388
|
-
test: {},
|
|
2389
|
-
providerResponse,
|
|
2390
|
-
});
|
|
2391
|
-
expect(pythonUtils_1.runPython).toHaveBeenCalledWith(path.resolve('/path/to/assert.py'), 'get_assert', [
|
|
2392
|
-
output,
|
|
2393
|
-
{
|
|
2394
|
-
prompt: 'Some prompt that includes "double quotes" and \'single quotes\'',
|
|
2395
|
-
vars: {},
|
|
2396
|
-
test: {},
|
|
2397
|
-
provider,
|
|
2398
|
-
providerResponse,
|
|
2399
|
-
},
|
|
2400
|
-
]);
|
|
2401
|
-
expect(result).toMatchObject({
|
|
2402
|
-
pass: expectedPass,
|
|
2403
|
-
reason: expect.stringContaining(expectedReason),
|
|
2404
|
-
});
|
|
2405
|
-
expect(pythonUtils_1.runPython).toHaveBeenCalledTimes(1);
|
|
2406
|
-
});
|
|
2407
|
-
it('should handle when python file assertions throw an error', async () => {
|
|
2408
|
-
const output = 'Expected output';
|
|
2409
|
-
jest
|
|
2410
|
-
.mocked(pythonUtils_1.runPython)
|
|
2411
|
-
.mockRejectedValue(new Error('The Python script `call_api` function must return a dict with an `output`'));
|
|
2412
|
-
const fileAssertion = {
|
|
2413
|
-
type: 'python',
|
|
2414
|
-
value: 'file:///path/to/assert.py',
|
|
2415
|
-
};
|
|
2416
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2417
|
-
const providerResponse = { output };
|
|
2418
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2419
|
-
prompt: 'Some prompt that includes "double quotes" and \'single quotes\'',
|
|
2420
|
-
provider,
|
|
2421
|
-
assertion: fileAssertion,
|
|
2422
|
-
test: {},
|
|
2423
|
-
providerResponse,
|
|
2424
|
-
});
|
|
2425
|
-
expect(pythonUtils_1.runPython).toHaveBeenCalledTimes(1);
|
|
2426
|
-
expect(result).toEqual({
|
|
2427
|
-
assertion: {
|
|
2428
|
-
type: 'python',
|
|
2429
|
-
value: 'file:///path/to/assert.py',
|
|
2430
|
-
},
|
|
2431
|
-
pass: false,
|
|
2432
|
-
reason: 'The Python script `call_api` function must return a dict with an `output`',
|
|
2433
|
-
score: 0,
|
|
2434
|
-
});
|
|
2435
|
-
});
|
|
2436
2017
|
describe('latency assertion', () => {
|
|
2437
2018
|
it('should pass when the latency assertion passes', async () => {
|
|
2438
2019
|
const output = 'Expected output';
|