promptfoo 0.103.3 → 0.103.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +10 -1
- package/dist/package.json +13 -13
- package/dist/src/app/assets/index-BR1tgrAf.css +1 -0
- package/dist/src/app/assets/{index-XXoiz61D.js → index-CmPQAxfe.js} +276 -276
- package/dist/src/app/assets/{index.es-DTKpmNcZ.js → index.es-DfqJ7zdu.js} +1 -1
- package/dist/src/app/assets/{sync-ClbHj3jr.js → sync-C-aW1Mpw.js} +1 -1
- package/dist/src/app/index.html +2 -2
- package/dist/src/assertions/index.d.ts +3 -2
- package/dist/src/assertions/index.d.ts.map +1 -1
- package/dist/src/assertions/index.js +21 -6
- package/dist/src/assertions/index.js.map +1 -1
- package/dist/src/assertions/utils.d.ts +6 -2
- package/dist/src/assertions/utils.d.ts.map +1 -1
- package/dist/src/commands/eval/filterErrorTests.d.ts +5 -0
- package/dist/src/commands/eval/filterErrorTests.d.ts.map +1 -0
- package/dist/src/commands/eval/filterErrorTests.js +19 -0
- package/dist/src/commands/eval/filterErrorTests.js.map +1 -0
- package/dist/src/commands/eval/filterTests.d.ts +1 -0
- package/dist/src/commands/eval/filterTests.d.ts.map +1 -1
- package/dist/src/commands/eval/filterTests.js +4 -0
- package/dist/src/commands/eval/filterTests.js.map +1 -1
- package/dist/src/commands/eval.d.ts.map +1 -1
- package/dist/src/commands/eval.js +1 -0
- package/dist/src/commands/eval.js.map +1 -1
- package/dist/src/database/tables.d.ts +51 -12
- package/dist/src/database/tables.d.ts.map +1 -1
- package/dist/src/envars.d.ts +1 -0
- package/dist/src/envars.d.ts.map +1 -1
- package/dist/src/envars.js.map +1 -1
- package/dist/src/evaluator.d.ts.map +1 -1
- package/dist/src/evaluator.js +1 -0
- package/dist/src/evaluator.js.map +1 -1
- package/dist/src/fetch.d.ts.map +1 -1
- package/dist/src/fetch.js +20 -3
- package/dist/src/fetch.js.map +1 -1
- package/dist/src/models/evalResult.d.ts.map +1 -1
- package/dist/src/models/evalResult.js +9 -1
- package/dist/src/models/evalResult.js.map +1 -1
- package/dist/src/providers/browser.js +1 -1
- package/dist/src/providers/browser.js.map +1 -1
- package/dist/src/providers/defaults.d.ts +1 -0
- package/dist/src/providers/defaults.d.ts.map +1 -1
- package/dist/src/providers/defaults.js +11 -0
- package/dist/src/providers/defaults.js.map +1 -1
- package/dist/src/providers/http.d.ts.map +1 -1
- package/dist/src/providers/http.js +39 -63
- package/dist/src/providers/http.js.map +1 -1
- package/dist/src/providers/llama.d.ts.map +1 -1
- package/dist/src/providers/llama.js +8 -1
- package/dist/src/providers/llama.js.map +1 -1
- package/dist/src/providers/openai.d.ts.map +1 -1
- package/dist/src/providers/openai.js +6 -13
- package/dist/src/providers/openai.js.map +1 -1
- package/dist/src/providers/watsonx.d.ts.map +1 -1
- package/dist/src/providers/watsonx.js +9 -0
- package/dist/src/providers/watsonx.js.map +1 -1
- package/dist/src/providers.d.ts.map +1 -1
- package/dist/src/providers.js +15 -0
- package/dist/src/providers.js.map +1 -1
- package/dist/src/redteam/commands/generate.d.ts.map +1 -1
- package/dist/src/redteam/commands/generate.js +4 -0
- package/dist/src/redteam/commands/generate.js.map +1 -1
- package/dist/src/redteam/constants.d.ts +4 -2
- package/dist/src/redteam/constants.d.ts.map +1 -1
- package/dist/src/redteam/constants.js +11 -7
- package/dist/src/redteam/constants.js.map +1 -1
- package/dist/src/redteam/plugins/base.d.ts.map +1 -1
- package/dist/src/redteam/plugins/base.js +3 -0
- package/dist/src/redteam/plugins/base.js.map +1 -1
- package/dist/src/redteam/plugins/cyberseceval.d.ts.map +1 -1
- package/dist/src/redteam/plugins/cyberseceval.js +13 -3
- package/dist/src/redteam/plugins/cyberseceval.js.map +1 -1
- package/dist/src/redteam/providers/crescendo/index.d.ts +1 -0
- package/dist/src/redteam/providers/crescendo/index.d.ts.map +1 -1
- package/dist/src/redteam/providers/crescendo/index.js +58 -3
- package/dist/src/redteam/providers/crescendo/index.js.map +1 -1
- package/dist/src/redteam/providers/iterative.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterative.js +59 -5
- package/dist/src/redteam/providers/iterative.js.map +1 -1
- package/dist/src/redteam/providers/iterativeImage.d.ts +6 -2
- package/dist/src/redteam/providers/iterativeImage.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterativeImage.js +322 -131
- package/dist/src/redteam/providers/iterativeImage.js.map +1 -1
- package/dist/src/redteam/providers/iterativeTree.d.ts +37 -26
- package/dist/src/redteam/providers/iterativeTree.d.ts.map +1 -1
- package/dist/src/redteam/providers/iterativeTree.js +193 -85
- package/dist/src/redteam/providers/iterativeTree.js.map +1 -1
- package/dist/src/redteam/shared.d.ts.map +1 -1
- package/dist/src/redteam/shared.js +4 -1
- package/dist/src/redteam/shared.js.map +1 -1
- package/dist/src/server/routes/providers.js +11 -6
- package/dist/src/server/routes/providers.js.map +1 -1
- package/dist/src/types/env.d.ts +3 -0
- package/dist/src/types/env.d.ts.map +1 -1
- package/dist/src/types/index.d.ts +1376 -351
- package/dist/src/types/index.d.ts.map +1 -1
- package/dist/src/types/index.js +4 -1
- package/dist/src/types/index.js.map +1 -1
- package/dist/src/types/providers.d.ts +22 -0
- package/dist/src/types/providers.d.ts.map +1 -1
- package/dist/src/types/providers.js.map +1 -1
- package/dist/src/util/config/manage.d.ts +1 -1
- package/dist/src/util/config/manage.d.ts.map +1 -1
- package/dist/src/util/config/manage.js.map +1 -1
- package/dist/src/util/convertEvalResultsToTable.d.ts.map +1 -1
- package/dist/src/util/convertEvalResultsToTable.js +14 -0
- package/dist/src/util/convertEvalResultsToTable.js.map +1 -1
- package/dist/src/util/index.d.ts +12 -4
- package/dist/src/util/index.d.ts.map +1 -1
- package/dist/src/validators/providers.d.ts +71 -2
- package/dist/src/validators/providers.d.ts.map +1 -1
- package/dist/src/validators/providers.js +3 -0
- package/dist/src/validators/providers.js.map +1 -1
- package/dist/src/validators/redteam.d.ts +24 -0
- package/dist/src/validators/redteam.d.ts.map +1 -1
- package/dist/test/assertions/index.test.js +26 -475
- package/dist/test/assertions/index.test.js.map +1 -1
- package/dist/test/assertions/javascript.test.d.ts +2 -0
- package/dist/test/assertions/javascript.test.d.ts.map +1 -0
- package/dist/test/assertions/javascript.test.js +679 -0
- package/dist/test/assertions/javascript.test.js.map +1 -0
- package/dist/test/assertions/python.test.d.ts +2 -0
- package/dist/test/assertions/python.test.d.ts.map +1 -0
- package/dist/test/assertions/python.test.js +377 -0
- package/dist/test/assertions/python.test.js.map +1 -0
- package/dist/test/cache.test.js +297 -100
- package/dist/test/cache.test.js.map +1 -1
- package/dist/test/commands/eval/filterErrorTests.test.d.ts +2 -0
- package/dist/test/commands/eval/filterErrorTests.test.d.ts.map +1 -0
- package/dist/test/commands/eval/filterErrorTests.test.js +110 -0
- package/dist/test/commands/eval/filterErrorTests.test.js.map +1 -0
- package/dist/test/evaluator.test.js +10 -0
- package/dist/test/evaluator.test.js.map +1 -1
- package/dist/test/factories/evalFactory.d.ts +39 -8
- package/dist/test/factories/evalFactory.d.ts.map +1 -1
- package/dist/test/fetch.test.js +147 -19
- package/dist/test/fetch.test.js.map +1 -1
- package/dist/test/models/eval.test.js +12 -0
- package/dist/test/models/eval.test.js.map +1 -1
- package/dist/test/providers/defaults.test.d.ts +2 -0
- package/dist/test/providers/defaults.test.d.ts.map +1 -0
- package/dist/test/providers/defaults.test.js +77 -0
- package/dist/test/providers/defaults.test.js.map +1 -0
- package/dist/test/providers/http.test.js +65 -9
- package/dist/test/providers/http.test.js.map +1 -1
- package/dist/test/providers/index.test.js +6 -3
- package/dist/test/providers/index.test.js.map +1 -1
- package/dist/test/providers/mistral.test.js +28 -19
- package/dist/test/providers/mistral.test.js.map +1 -1
- package/dist/test/providers/watsonx.test.js +58 -0
- package/dist/test/providers/watsonx.test.js.map +1 -1
- package/dist/test/redteam/providers/iterativeTree.test.js +329 -98
- package/dist/test/redteam/providers/iterativeTree.test.js.map +1 -1
- package/dist/test/server/providers.test.js +4 -4
- package/dist/test/server/providers.test.js.map +1 -1
- package/dist/test/util/config/main.test.js +3 -0
- package/dist/test/util/config/main.test.js.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +13 -13
- package/dist/src/app/assets/index-DdUNCsxz.css +0 -1
|
@@ -38,15 +38,12 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
|
38
38
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
39
|
const dedent_1 = __importDefault(require("dedent"));
|
|
40
40
|
const fs = __importStar(require("fs"));
|
|
41
|
-
const node_module_1 = require("node:module");
|
|
42
41
|
const path = __importStar(require("path"));
|
|
43
42
|
const assertions_1 = require("../../src/assertions");
|
|
44
43
|
const xml_1 = require("../../src/assertions/xml");
|
|
45
44
|
const fetch_1 = require("../../src/fetch");
|
|
46
45
|
const openai_1 = require("../../src/providers/openai");
|
|
47
46
|
const replicate_1 = require("../../src/providers/replicate");
|
|
48
|
-
const pythonUtils_1 = require("../../src/python/pythonUtils");
|
|
49
|
-
const wrapper_1 = require("../../src/python/wrapper");
|
|
50
47
|
const utils_1 = require("../util/utils");
|
|
51
48
|
jest.mock('../../src/redteam/remoteGeneration', () => ({
|
|
52
49
|
shouldGenerateRemote: jest.fn().mockReturnValue(false),
|
|
@@ -69,20 +66,6 @@ jest.mock('../../src/fetch', () => {
|
|
|
69
66
|
fetchWithRetries: jest.fn(actual.fetchWithRetries),
|
|
70
67
|
};
|
|
71
68
|
});
|
|
72
|
-
jest.mock('../../src/python/wrapper', () => {
|
|
73
|
-
const actual = jest.requireActual('../../src/python/wrapper');
|
|
74
|
-
return {
|
|
75
|
-
...actual,
|
|
76
|
-
runPythonCode: jest.fn(actual.runPythonCode),
|
|
77
|
-
};
|
|
78
|
-
});
|
|
79
|
-
jest.mock('../../src/python/pythonUtils', () => {
|
|
80
|
-
const actual = jest.requireActual('../../src/python/pythonUtils');
|
|
81
|
-
return {
|
|
82
|
-
...actual,
|
|
83
|
-
runPython: jest.fn(actual.runPython),
|
|
84
|
-
};
|
|
85
|
-
});
|
|
86
69
|
jest.mock('glob', () => ({
|
|
87
70
|
globSync: jest.fn(),
|
|
88
71
|
}));
|
|
@@ -551,22 +534,6 @@ describe('runAssertion', () => {
|
|
|
551
534
|
type: 'javascript',
|
|
552
535
|
value: 'output === "Expected output"',
|
|
553
536
|
};
|
|
554
|
-
const javascriptMultilineStringAssertion = {
|
|
555
|
-
type: 'javascript',
|
|
556
|
-
value: `
|
|
557
|
-
if (output === "Expected output") {
|
|
558
|
-
return {
|
|
559
|
-
pass: true,
|
|
560
|
-
score: 0.5,
|
|
561
|
-
reason: 'Assertion passed',
|
|
562
|
-
};
|
|
563
|
-
}
|
|
564
|
-
return {
|
|
565
|
-
pass: false,
|
|
566
|
-
score: 0,
|
|
567
|
-
reason: 'Assertion failed',
|
|
568
|
-
};`,
|
|
569
|
-
};
|
|
570
537
|
const javascriptStringAssertionWithNumber = {
|
|
571
538
|
type: 'javascript',
|
|
572
539
|
value: 'output.length * 10',
|
|
@@ -583,24 +550,6 @@ describe('runAssertion', () => {
|
|
|
583
550
|
value: 'output.length * 10',
|
|
584
551
|
threshold: 0.5,
|
|
585
552
|
};
|
|
586
|
-
const javascriptFunctionAssertion = {
|
|
587
|
-
type: 'javascript',
|
|
588
|
-
value: async (output) => ({
|
|
589
|
-
pass: true,
|
|
590
|
-
score: 0.5,
|
|
591
|
-
reason: 'Assertion passed',
|
|
592
|
-
assertion: null,
|
|
593
|
-
}),
|
|
594
|
-
};
|
|
595
|
-
const javascriptFunctionFailAssertion = {
|
|
596
|
-
type: 'javascript',
|
|
597
|
-
value: async (output) => ({
|
|
598
|
-
pass: false,
|
|
599
|
-
score: 0.5,
|
|
600
|
-
reason: 'Assertion failed',
|
|
601
|
-
assertion: null,
|
|
602
|
-
}),
|
|
603
|
-
};
|
|
604
553
|
it('should pass when the equality assertion passes', async () => {
|
|
605
554
|
const output = 'Expected output';
|
|
606
555
|
const result = await (0, assertions_1.runAssertion)({
|
|
@@ -1419,6 +1368,32 @@ describe('runAssertion', () => {
|
|
|
1419
1368
|
reason: 'Assertion passed',
|
|
1420
1369
|
});
|
|
1421
1370
|
});
|
|
1371
|
+
it('should disregard invalid inputs for assert index', async () => {
|
|
1372
|
+
const output = 'Expected output';
|
|
1373
|
+
const result = await (0, assertions_1.runAssertion)({
|
|
1374
|
+
prompt: 'Some prompt',
|
|
1375
|
+
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1376
|
+
assertion: javascriptBooleanAssertionWithConfig,
|
|
1377
|
+
test: {
|
|
1378
|
+
assert: [
|
|
1379
|
+
{
|
|
1380
|
+
type: 'javascript',
|
|
1381
|
+
value: 'output.length <= context.config.maximumOutputSize',
|
|
1382
|
+
config: {
|
|
1383
|
+
maximumOutputSize: 1,
|
|
1384
|
+
},
|
|
1385
|
+
},
|
|
1386
|
+
],
|
|
1387
|
+
},
|
|
1388
|
+
providerResponse: { output },
|
|
1389
|
+
assertIndex: 45,
|
|
1390
|
+
});
|
|
1391
|
+
expect(result).toMatchObject({
|
|
1392
|
+
pass: true,
|
|
1393
|
+
score: 1.0,
|
|
1394
|
+
reason: 'Assertion passed',
|
|
1395
|
+
});
|
|
1396
|
+
});
|
|
1422
1397
|
it('should fail when javascript returns an output string that is larger than the maximum size threshold', async () => {
|
|
1423
1398
|
const output = 'Expected output with some extra characters';
|
|
1424
1399
|
const result = await (0, assertions_1.runAssertion)({
|
|
@@ -1515,100 +1490,6 @@ describe('runAssertion', () => {
|
|
|
1515
1490
|
reason: 'Assertion passed',
|
|
1516
1491
|
});
|
|
1517
1492
|
});
|
|
1518
|
-
it('should pass when javascript function assertion passes - with vars', async () => {
|
|
1519
|
-
const output = 'Expected output';
|
|
1520
|
-
const javascriptStringAssertionWithVars = {
|
|
1521
|
-
type: 'javascript',
|
|
1522
|
-
value: 'output === "Expected output" && context.vars.foo === "bar"',
|
|
1523
|
-
};
|
|
1524
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
1525
|
-
prompt: 'Some prompt',
|
|
1526
|
-
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1527
|
-
assertion: javascriptStringAssertionWithVars,
|
|
1528
|
-
test: { vars: { foo: 'bar' } },
|
|
1529
|
-
providerResponse: { output },
|
|
1530
|
-
});
|
|
1531
|
-
expect(result).toMatchObject({
|
|
1532
|
-
pass: true,
|
|
1533
|
-
reason: 'Assertion passed',
|
|
1534
|
-
});
|
|
1535
|
-
});
|
|
1536
|
-
it('should fail when the javascript does not match vars', async () => {
|
|
1537
|
-
const output = 'Expected output';
|
|
1538
|
-
const javascriptStringAssertionWithVars = {
|
|
1539
|
-
type: 'javascript',
|
|
1540
|
-
value: 'output === "Expected output" && context.vars.foo === "something else"',
|
|
1541
|
-
};
|
|
1542
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
1543
|
-
prompt: 'Some prompt',
|
|
1544
|
-
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1545
|
-
assertion: javascriptStringAssertionWithVars,
|
|
1546
|
-
test: { vars: { foo: 'bar' } },
|
|
1547
|
-
providerResponse: { output },
|
|
1548
|
-
});
|
|
1549
|
-
expect(result).toMatchObject({
|
|
1550
|
-
pass: false,
|
|
1551
|
-
reason: 'Custom function returned false\noutput === "Expected output" && context.vars.foo === "something else"',
|
|
1552
|
-
});
|
|
1553
|
-
});
|
|
1554
|
-
it('should pass when the function returns pass', async () => {
|
|
1555
|
-
const output = 'Expected output';
|
|
1556
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
1557
|
-
prompt: 'Some prompt',
|
|
1558
|
-
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1559
|
-
assertion: javascriptFunctionAssertion,
|
|
1560
|
-
test: {},
|
|
1561
|
-
providerResponse: { output },
|
|
1562
|
-
});
|
|
1563
|
-
expect(result).toMatchObject({
|
|
1564
|
-
pass: true,
|
|
1565
|
-
score: 0.5,
|
|
1566
|
-
reason: 'Assertion passed',
|
|
1567
|
-
});
|
|
1568
|
-
});
|
|
1569
|
-
it('should fail when the function returns fail', async () => {
|
|
1570
|
-
const output = 'Expected output';
|
|
1571
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
1572
|
-
prompt: 'Some prompt',
|
|
1573
|
-
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1574
|
-
assertion: javascriptFunctionFailAssertion,
|
|
1575
|
-
test: {},
|
|
1576
|
-
providerResponse: { output },
|
|
1577
|
-
});
|
|
1578
|
-
expect(result).toMatchObject({
|
|
1579
|
-
pass: false,
|
|
1580
|
-
score: 0.5,
|
|
1581
|
-
reason: 'Assertion failed',
|
|
1582
|
-
});
|
|
1583
|
-
});
|
|
1584
|
-
it('should pass when the multiline javascript assertion passes', async () => {
|
|
1585
|
-
const output = 'Expected output';
|
|
1586
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
1587
|
-
prompt: 'Some prompt',
|
|
1588
|
-
assertion: javascriptMultilineStringAssertion,
|
|
1589
|
-
test: {},
|
|
1590
|
-
providerResponse: { output },
|
|
1591
|
-
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1592
|
-
});
|
|
1593
|
-
expect(result).toMatchObject({
|
|
1594
|
-
pass: true,
|
|
1595
|
-
reason: 'Assertion passed',
|
|
1596
|
-
});
|
|
1597
|
-
});
|
|
1598
|
-
it('should pass when the multiline javascript assertion fails', async () => {
|
|
1599
|
-
const output = 'Not the expected output';
|
|
1600
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
1601
|
-
prompt: 'Some prompt',
|
|
1602
|
-
assertion: javascriptMultilineStringAssertion,
|
|
1603
|
-
test: {},
|
|
1604
|
-
providerResponse: { output },
|
|
1605
|
-
provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
|
|
1606
|
-
});
|
|
1607
|
-
expect(result).toMatchObject({
|
|
1608
|
-
pass: false,
|
|
1609
|
-
reason: 'Assertion failed',
|
|
1610
|
-
});
|
|
1611
|
-
});
|
|
1612
1493
|
const notContainsAssertion = {
|
|
1613
1494
|
type: 'not-contains',
|
|
1614
1495
|
value: 'Unexpected output',
|
|
@@ -2103,336 +1984,6 @@ describe('runAssertion', () => {
|
|
|
2103
1984
|
reason: 'Levenshtein distance 8 is greater than threshold 5',
|
|
2104
1985
|
});
|
|
2105
1986
|
});
|
|
2106
|
-
it.each([
|
|
2107
|
-
[
|
|
2108
|
-
'boolean',
|
|
2109
|
-
jest.fn((output) => output === 'Expected output'),
|
|
2110
|
-
true,
|
|
2111
|
-
'Assertion passed',
|
|
2112
|
-
],
|
|
2113
|
-
['number', jest.fn((output) => output.length), true, 'Assertion passed'],
|
|
2114
|
-
[
|
|
2115
|
-
'GradingResult',
|
|
2116
|
-
jest.fn((output) => ({ pass: true, score: 1, reason: 'Custom reason' })),
|
|
2117
|
-
true,
|
|
2118
|
-
'Custom reason',
|
|
2119
|
-
],
|
|
2120
|
-
[
|
|
2121
|
-
'boolean',
|
|
2122
|
-
jest.fn((output) => output !== 'Expected output'),
|
|
2123
|
-
false,
|
|
2124
|
-
'Custom function returned false',
|
|
2125
|
-
],
|
|
2126
|
-
['number', jest.fn((output) => 0), false, 'Custom function returned false'],
|
|
2127
|
-
[
|
|
2128
|
-
'GradingResult',
|
|
2129
|
-
jest.fn((output) => ({ pass: false, score: 0.1, reason: 'Custom reason' })),
|
|
2130
|
-
false,
|
|
2131
|
-
'Custom reason',
|
|
2132
|
-
],
|
|
2133
|
-
[
|
|
2134
|
-
'boolean Promise',
|
|
2135
|
-
jest.fn((output) => Promise.resolve(true)),
|
|
2136
|
-
true,
|
|
2137
|
-
'Assertion passed',
|
|
2138
|
-
],
|
|
2139
|
-
])('should pass when the file:// assertion with .js file returns a %s', async (type, mockFn, expectedPass, expectedReason) => {
|
|
2140
|
-
const output = 'Expected output';
|
|
2141
|
-
jest.doMock(path.resolve('/path/to/assert.js'), () => mockFn, { virtual: true });
|
|
2142
|
-
const fileAssertion = {
|
|
2143
|
-
type: 'javascript',
|
|
2144
|
-
value: 'file:///path/to/assert.js',
|
|
2145
|
-
};
|
|
2146
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2147
|
-
const providerResponse = { output };
|
|
2148
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2149
|
-
prompt: 'Some prompt',
|
|
2150
|
-
provider,
|
|
2151
|
-
assertion: fileAssertion,
|
|
2152
|
-
test: {},
|
|
2153
|
-
providerResponse,
|
|
2154
|
-
});
|
|
2155
|
-
expect(mockFn).toHaveBeenCalledWith('Expected output', {
|
|
2156
|
-
prompt: 'Some prompt',
|
|
2157
|
-
vars: {},
|
|
2158
|
-
test: {},
|
|
2159
|
-
provider,
|
|
2160
|
-
providerResponse,
|
|
2161
|
-
});
|
|
2162
|
-
expect(result).toMatchObject({
|
|
2163
|
-
pass: expectedPass,
|
|
2164
|
-
reason: expect.stringContaining(expectedReason),
|
|
2165
|
-
});
|
|
2166
|
-
});
|
|
2167
|
-
it.each([
|
|
2168
|
-
[
|
|
2169
|
-
'boolean',
|
|
2170
|
-
jest.fn((output) => output === 'Expected output'),
|
|
2171
|
-
true,
|
|
2172
|
-
'Assertion passed',
|
|
2173
|
-
],
|
|
2174
|
-
['number', jest.fn((output) => output.length), true, 'Assertion passed'],
|
|
2175
|
-
[
|
|
2176
|
-
'GradingResult',
|
|
2177
|
-
jest.fn((output) => ({ pass: true, score: 1, reason: 'Custom reason' })),
|
|
2178
|
-
true,
|
|
2179
|
-
'Custom reason',
|
|
2180
|
-
],
|
|
2181
|
-
[
|
|
2182
|
-
'boolean',
|
|
2183
|
-
jest.fn((output) => output !== 'Expected output'),
|
|
2184
|
-
false,
|
|
2185
|
-
'Custom function returned false',
|
|
2186
|
-
],
|
|
2187
|
-
['number', jest.fn((output) => 0), false, 'Custom function returned false'],
|
|
2188
|
-
[
|
|
2189
|
-
'GradingResult',
|
|
2190
|
-
jest.fn((output) => ({ pass: false, score: 0.1, reason: 'Custom reason' })),
|
|
2191
|
-
false,
|
|
2192
|
-
'Custom reason',
|
|
2193
|
-
],
|
|
2194
|
-
[
|
|
2195
|
-
'boolean Promise',
|
|
2196
|
-
jest.fn((output) => Promise.resolve(true)),
|
|
2197
|
-
true,
|
|
2198
|
-
'Assertion passed',
|
|
2199
|
-
],
|
|
2200
|
-
])('should pass when assertion is a package path', async (type, mockFn, expectedPass, expectedReason) => {
|
|
2201
|
-
const output = 'Expected output';
|
|
2202
|
-
const require = (0, node_module_1.createRequire)('');
|
|
2203
|
-
jest.spyOn(require, 'resolve').mockReturnValueOnce('/node_modules/@promptfoo/fake/index.js');
|
|
2204
|
-
jest.doMock(path.resolve('/node_modules/@promptfoo/fake/index.js'), () => {
|
|
2205
|
-
return {
|
|
2206
|
-
assertionFunction: mockFn,
|
|
2207
|
-
};
|
|
2208
|
-
}, { virtual: true });
|
|
2209
|
-
const fileAssertion = {
|
|
2210
|
-
type: 'javascript',
|
|
2211
|
-
value: 'package:@promptfoo/fake:assertionFunction',
|
|
2212
|
-
};
|
|
2213
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2214
|
-
const providerResponse = { output };
|
|
2215
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2216
|
-
prompt: 'Some prompt',
|
|
2217
|
-
provider,
|
|
2218
|
-
assertion: fileAssertion,
|
|
2219
|
-
test: {},
|
|
2220
|
-
providerResponse,
|
|
2221
|
-
});
|
|
2222
|
-
expect(mockFn).toHaveBeenCalledWith('Expected output', {
|
|
2223
|
-
prompt: 'Some prompt',
|
|
2224
|
-
vars: {},
|
|
2225
|
-
test: {},
|
|
2226
|
-
provider,
|
|
2227
|
-
providerResponse,
|
|
2228
|
-
});
|
|
2229
|
-
expect(result).toMatchObject({
|
|
2230
|
-
pass: expectedPass,
|
|
2231
|
-
reason: expect.stringContaining(expectedReason),
|
|
2232
|
-
});
|
|
2233
|
-
});
|
|
2234
|
-
it('should resolve js paths relative to the configuration file', async () => {
|
|
2235
|
-
const output = 'Expected output';
|
|
2236
|
-
const mockFn = jest.fn((output) => output === 'Expected output');
|
|
2237
|
-
jest.doMock(path.resolve('/base/path/path/to/assert.js'), () => mockFn, { virtual: true });
|
|
2238
|
-
const fileAssertion = {
|
|
2239
|
-
type: 'javascript',
|
|
2240
|
-
value: 'file://./path/to/assert.js',
|
|
2241
|
-
};
|
|
2242
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2243
|
-
const providerResponse = { output };
|
|
2244
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2245
|
-
prompt: 'Some prompt',
|
|
2246
|
-
provider,
|
|
2247
|
-
assertion: fileAssertion,
|
|
2248
|
-
test: {},
|
|
2249
|
-
providerResponse,
|
|
2250
|
-
});
|
|
2251
|
-
expect(mockFn).toHaveBeenCalledWith('Expected output', {
|
|
2252
|
-
prompt: 'Some prompt',
|
|
2253
|
-
vars: {},
|
|
2254
|
-
test: {},
|
|
2255
|
-
provider,
|
|
2256
|
-
providerResponse,
|
|
2257
|
-
});
|
|
2258
|
-
expect(result).toMatchObject({
|
|
2259
|
-
pass: true,
|
|
2260
|
-
reason: 'Assertion passed',
|
|
2261
|
-
});
|
|
2262
|
-
});
|
|
2263
|
-
it('should handle output strings with both single and double quotes correctly in python assertion', async () => {
|
|
2264
|
-
const expectedPythonValue = '0.5';
|
|
2265
|
-
jest.mocked(wrapper_1.runPythonCode).mockResolvedValueOnce(expectedPythonValue);
|
|
2266
|
-
const output = 'This is a string with "double quotes"\n and \'single quotes\' \n\n and some \n\t newlines.';
|
|
2267
|
-
const pythonAssertion = {
|
|
2268
|
-
type: 'python',
|
|
2269
|
-
value: expectedPythonValue,
|
|
2270
|
-
};
|
|
2271
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2272
|
-
const providerResponse = { output };
|
|
2273
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2274
|
-
prompt: 'Some prompt',
|
|
2275
|
-
provider,
|
|
2276
|
-
assertion: pythonAssertion,
|
|
2277
|
-
test: {},
|
|
2278
|
-
providerResponse,
|
|
2279
|
-
});
|
|
2280
|
-
expect(wrapper_1.runPythonCode).toHaveBeenCalledTimes(1);
|
|
2281
|
-
expect(wrapper_1.runPythonCode).toHaveBeenCalledWith(expect.anything(), 'main', [
|
|
2282
|
-
output,
|
|
2283
|
-
{ prompt: 'Some prompt', test: {}, vars: {}, provider, providerResponse },
|
|
2284
|
-
]);
|
|
2285
|
-
expect(result).toMatchObject({
|
|
2286
|
-
pass: true,
|
|
2287
|
-
reason: 'Assertion passed',
|
|
2288
|
-
score: Number(expectedPythonValue),
|
|
2289
|
-
});
|
|
2290
|
-
});
|
|
2291
|
-
it.each([
|
|
2292
|
-
['boolean', false, 0, 'Python code returned false', false, undefined],
|
|
2293
|
-
['number', 0, 0, 'Python code returned false', false, undefined],
|
|
2294
|
-
[
|
|
2295
|
-
'GradingResult',
|
|
2296
|
-
`{"pass": false, "score": 0, "reason": "Custom error"}`,
|
|
2297
|
-
0,
|
|
2298
|
-
'Custom error',
|
|
2299
|
-
false,
|
|
2300
|
-
undefined,
|
|
2301
|
-
],
|
|
2302
|
-
['boolean', true, 1, 'Assertion passed', true, undefined],
|
|
2303
|
-
['number', 1, 1, 'Assertion passed', true, undefined],
|
|
2304
|
-
[
|
|
2305
|
-
'GradingResult',
|
|
2306
|
-
`{"pass": true, "score": 1, "reason": "Custom success"}`,
|
|
2307
|
-
1,
|
|
2308
|
-
'Custom success',
|
|
2309
|
-
true,
|
|
2310
|
-
undefined,
|
|
2311
|
-
],
|
|
2312
|
-
[
|
|
2313
|
-
'GradingResult',
|
|
2314
|
-
// This score is less than the assertion threshold in the test
|
|
2315
|
-
`{"pass": true, "score": 0.4, "reason": "Foo bar"}`,
|
|
2316
|
-
0.4,
|
|
2317
|
-
'Python score 0.4 is less than threshold 0.5',
|
|
2318
|
-
false,
|
|
2319
|
-
0.5,
|
|
2320
|
-
],
|
|
2321
|
-
])('should handle inline return type %s with return value: %p', async (type, returnValue, expectedScore, expectedReason, expectedPass, threshold) => {
|
|
2322
|
-
const output = 'This is a string with "double quotes"\n and \'single quotes\' \n\n and some \n\t newlines.';
|
|
2323
|
-
let resolvedValue;
|
|
2324
|
-
if (type === 'GradingResult') {
|
|
2325
|
-
resolvedValue = JSON.parse(returnValue);
|
|
2326
|
-
}
|
|
2327
|
-
else {
|
|
2328
|
-
resolvedValue = returnValue;
|
|
2329
|
-
}
|
|
2330
|
-
const pythonAssertion = {
|
|
2331
|
-
type: 'python',
|
|
2332
|
-
value: returnValue.toString(),
|
|
2333
|
-
threshold,
|
|
2334
|
-
};
|
|
2335
|
-
jest.mocked(wrapper_1.runPythonCode).mockResolvedValueOnce(resolvedValue);
|
|
2336
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2337
|
-
const providerResponse = { output };
|
|
2338
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2339
|
-
prompt: 'Some prompt',
|
|
2340
|
-
provider,
|
|
2341
|
-
assertion: pythonAssertion,
|
|
2342
|
-
test: {},
|
|
2343
|
-
providerResponse,
|
|
2344
|
-
});
|
|
2345
|
-
expect(wrapper_1.runPythonCode).toHaveBeenCalledTimes(1);
|
|
2346
|
-
expect(wrapper_1.runPythonCode).toHaveBeenCalledWith(expect.anything(), 'main', [
|
|
2347
|
-
output,
|
|
2348
|
-
{ prompt: 'Some prompt', test: {}, vars: {}, provider, providerResponse },
|
|
2349
|
-
]);
|
|
2350
|
-
expect(result).toMatchObject({
|
|
2351
|
-
pass: expectedPass,
|
|
2352
|
-
reason: expect.stringMatching(expectedReason),
|
|
2353
|
-
score: expectedScore,
|
|
2354
|
-
});
|
|
2355
|
-
});
|
|
2356
|
-
it.each([
|
|
2357
|
-
['boolean', 'True', true, 'Assertion passed'],
|
|
2358
|
-
['number', '0.5', true, 'Assertion passed'],
|
|
2359
|
-
['boolean', true, true, 'Assertion passed'],
|
|
2360
|
-
['number', 0.5, true, 'Assertion passed'],
|
|
2361
|
-
[
|
|
2362
|
-
'GradingResult',
|
|
2363
|
-
'{"pass": true, "score": 1, "reason": "Custom reason"}',
|
|
2364
|
-
true,
|
|
2365
|
-
'Custom reason',
|
|
2366
|
-
],
|
|
2367
|
-
['boolean', 'False', false, 'Python code returned false'],
|
|
2368
|
-
['number', '0', false, 'Python code returned false'],
|
|
2369
|
-
[
|
|
2370
|
-
'GradingResult',
|
|
2371
|
-
'{"pass": false, "score": 0, "reason": "Custom reason"}',
|
|
2372
|
-
false,
|
|
2373
|
-
'Custom reason',
|
|
2374
|
-
],
|
|
2375
|
-
])('should handle when the file:// assertion with .py file returns a %s', async (type, pythonOutput, expectedPass, expectedReason) => {
|
|
2376
|
-
const output = 'Expected output';
|
|
2377
|
-
jest.mocked(pythonUtils_1.runPython).mockResolvedValueOnce(pythonOutput);
|
|
2378
|
-
const fileAssertion = {
|
|
2379
|
-
type: 'python',
|
|
2380
|
-
value: 'file:///path/to/assert.py',
|
|
2381
|
-
};
|
|
2382
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2383
|
-
const providerResponse = { output };
|
|
2384
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2385
|
-
prompt: 'Some prompt that includes "double quotes" and \'single quotes\'',
|
|
2386
|
-
provider,
|
|
2387
|
-
assertion: fileAssertion,
|
|
2388
|
-
test: {},
|
|
2389
|
-
providerResponse,
|
|
2390
|
-
});
|
|
2391
|
-
expect(pythonUtils_1.runPython).toHaveBeenCalledWith(path.resolve('/path/to/assert.py'), 'get_assert', [
|
|
2392
|
-
output,
|
|
2393
|
-
{
|
|
2394
|
-
prompt: 'Some prompt that includes "double quotes" and \'single quotes\'',
|
|
2395
|
-
vars: {},
|
|
2396
|
-
test: {},
|
|
2397
|
-
provider,
|
|
2398
|
-
providerResponse,
|
|
2399
|
-
},
|
|
2400
|
-
]);
|
|
2401
|
-
expect(result).toMatchObject({
|
|
2402
|
-
pass: expectedPass,
|
|
2403
|
-
reason: expect.stringContaining(expectedReason),
|
|
2404
|
-
});
|
|
2405
|
-
expect(pythonUtils_1.runPython).toHaveBeenCalledTimes(1);
|
|
2406
|
-
});
|
|
2407
|
-
it('should handle when python file assertions throw an error', async () => {
|
|
2408
|
-
const output = 'Expected output';
|
|
2409
|
-
jest
|
|
2410
|
-
.mocked(pythonUtils_1.runPython)
|
|
2411
|
-
.mockRejectedValue(new Error('The Python script `call_api` function must return a dict with an `output`'));
|
|
2412
|
-
const fileAssertion = {
|
|
2413
|
-
type: 'python',
|
|
2414
|
-
value: 'file:///path/to/assert.py',
|
|
2415
|
-
};
|
|
2416
|
-
const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
|
|
2417
|
-
const providerResponse = { output };
|
|
2418
|
-
const result = await (0, assertions_1.runAssertion)({
|
|
2419
|
-
prompt: 'Some prompt that includes "double quotes" and \'single quotes\'',
|
|
2420
|
-
provider,
|
|
2421
|
-
assertion: fileAssertion,
|
|
2422
|
-
test: {},
|
|
2423
|
-
providerResponse,
|
|
2424
|
-
});
|
|
2425
|
-
expect(pythonUtils_1.runPython).toHaveBeenCalledTimes(1);
|
|
2426
|
-
expect(result).toEqual({
|
|
2427
|
-
assertion: {
|
|
2428
|
-
type: 'python',
|
|
2429
|
-
value: 'file:///path/to/assert.py',
|
|
2430
|
-
},
|
|
2431
|
-
pass: false,
|
|
2432
|
-
reason: 'The Python script `call_api` function must return a dict with an `output`',
|
|
2433
|
-
score: 0,
|
|
2434
|
-
});
|
|
2435
|
-
});
|
|
2436
1987
|
describe('latency assertion', () => {
|
|
2437
1988
|
it('should pass when the latency assertion passes', async () => {
|
|
2438
1989
|
const output = 'Expected output';
|