promptfoo 0.103.3 → 0.103.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/LICENSE +10 -1
  2. package/dist/package.json +13 -13
  3. package/dist/src/app/assets/index-BR1tgrAf.css +1 -0
  4. package/dist/src/app/assets/{index-XXoiz61D.js → index-CmPQAxfe.js} +276 -276
  5. package/dist/src/app/assets/{index.es-DTKpmNcZ.js → index.es-DfqJ7zdu.js} +1 -1
  6. package/dist/src/app/assets/{sync-ClbHj3jr.js → sync-C-aW1Mpw.js} +1 -1
  7. package/dist/src/app/index.html +2 -2
  8. package/dist/src/assertions/index.d.ts +3 -2
  9. package/dist/src/assertions/index.d.ts.map +1 -1
  10. package/dist/src/assertions/index.js +21 -6
  11. package/dist/src/assertions/index.js.map +1 -1
  12. package/dist/src/assertions/utils.d.ts +6 -2
  13. package/dist/src/assertions/utils.d.ts.map +1 -1
  14. package/dist/src/commands/eval/filterErrorTests.d.ts +5 -0
  15. package/dist/src/commands/eval/filterErrorTests.d.ts.map +1 -0
  16. package/dist/src/commands/eval/filterErrorTests.js +19 -0
  17. package/dist/src/commands/eval/filterErrorTests.js.map +1 -0
  18. package/dist/src/commands/eval/filterTests.d.ts +1 -0
  19. package/dist/src/commands/eval/filterTests.d.ts.map +1 -1
  20. package/dist/src/commands/eval/filterTests.js +4 -0
  21. package/dist/src/commands/eval/filterTests.js.map +1 -1
  22. package/dist/src/commands/eval.d.ts.map +1 -1
  23. package/dist/src/commands/eval.js +1 -0
  24. package/dist/src/commands/eval.js.map +1 -1
  25. package/dist/src/database/tables.d.ts +51 -12
  26. package/dist/src/database/tables.d.ts.map +1 -1
  27. package/dist/src/envars.d.ts +1 -0
  28. package/dist/src/envars.d.ts.map +1 -1
  29. package/dist/src/envars.js.map +1 -1
  30. package/dist/src/evaluator.d.ts.map +1 -1
  31. package/dist/src/evaluator.js +1 -0
  32. package/dist/src/evaluator.js.map +1 -1
  33. package/dist/src/fetch.d.ts.map +1 -1
  34. package/dist/src/fetch.js +20 -3
  35. package/dist/src/fetch.js.map +1 -1
  36. package/dist/src/models/evalResult.d.ts.map +1 -1
  37. package/dist/src/models/evalResult.js +9 -1
  38. package/dist/src/models/evalResult.js.map +1 -1
  39. package/dist/src/providers/browser.js +1 -1
  40. package/dist/src/providers/browser.js.map +1 -1
  41. package/dist/src/providers/defaults.d.ts +1 -0
  42. package/dist/src/providers/defaults.d.ts.map +1 -1
  43. package/dist/src/providers/defaults.js +11 -0
  44. package/dist/src/providers/defaults.js.map +1 -1
  45. package/dist/src/providers/http.d.ts.map +1 -1
  46. package/dist/src/providers/http.js +39 -63
  47. package/dist/src/providers/http.js.map +1 -1
  48. package/dist/src/providers/llama.d.ts.map +1 -1
  49. package/dist/src/providers/llama.js +8 -1
  50. package/dist/src/providers/llama.js.map +1 -1
  51. package/dist/src/providers/openai.d.ts.map +1 -1
  52. package/dist/src/providers/openai.js +6 -13
  53. package/dist/src/providers/openai.js.map +1 -1
  54. package/dist/src/providers/watsonx.d.ts.map +1 -1
  55. package/dist/src/providers/watsonx.js +9 -0
  56. package/dist/src/providers/watsonx.js.map +1 -1
  57. package/dist/src/providers.d.ts.map +1 -1
  58. package/dist/src/providers.js +15 -0
  59. package/dist/src/providers.js.map +1 -1
  60. package/dist/src/redteam/commands/generate.d.ts.map +1 -1
  61. package/dist/src/redteam/commands/generate.js +4 -0
  62. package/dist/src/redteam/commands/generate.js.map +1 -1
  63. package/dist/src/redteam/constants.d.ts +4 -2
  64. package/dist/src/redteam/constants.d.ts.map +1 -1
  65. package/dist/src/redteam/constants.js +11 -7
  66. package/dist/src/redteam/constants.js.map +1 -1
  67. package/dist/src/redteam/plugins/base.d.ts.map +1 -1
  68. package/dist/src/redteam/plugins/base.js +3 -0
  69. package/dist/src/redteam/plugins/base.js.map +1 -1
  70. package/dist/src/redteam/plugins/cyberseceval.d.ts.map +1 -1
  71. package/dist/src/redteam/plugins/cyberseceval.js +13 -3
  72. package/dist/src/redteam/plugins/cyberseceval.js.map +1 -1
  73. package/dist/src/redteam/providers/crescendo/index.d.ts +1 -0
  74. package/dist/src/redteam/providers/crescendo/index.d.ts.map +1 -1
  75. package/dist/src/redteam/providers/crescendo/index.js +58 -3
  76. package/dist/src/redteam/providers/crescendo/index.js.map +1 -1
  77. package/dist/src/redteam/providers/iterative.d.ts.map +1 -1
  78. package/dist/src/redteam/providers/iterative.js +59 -5
  79. package/dist/src/redteam/providers/iterative.js.map +1 -1
  80. package/dist/src/redteam/providers/iterativeImage.d.ts +6 -2
  81. package/dist/src/redteam/providers/iterativeImage.d.ts.map +1 -1
  82. package/dist/src/redteam/providers/iterativeImage.js +322 -131
  83. package/dist/src/redteam/providers/iterativeImage.js.map +1 -1
  84. package/dist/src/redteam/providers/iterativeTree.d.ts +37 -26
  85. package/dist/src/redteam/providers/iterativeTree.d.ts.map +1 -1
  86. package/dist/src/redteam/providers/iterativeTree.js +193 -85
  87. package/dist/src/redteam/providers/iterativeTree.js.map +1 -1
  88. package/dist/src/redteam/shared.d.ts.map +1 -1
  89. package/dist/src/redteam/shared.js +4 -1
  90. package/dist/src/redteam/shared.js.map +1 -1
  91. package/dist/src/server/routes/providers.js +11 -6
  92. package/dist/src/server/routes/providers.js.map +1 -1
  93. package/dist/src/types/env.d.ts +3 -0
  94. package/dist/src/types/env.d.ts.map +1 -1
  95. package/dist/src/types/index.d.ts +1376 -351
  96. package/dist/src/types/index.d.ts.map +1 -1
  97. package/dist/src/types/index.js +4 -1
  98. package/dist/src/types/index.js.map +1 -1
  99. package/dist/src/types/providers.d.ts +22 -0
  100. package/dist/src/types/providers.d.ts.map +1 -1
  101. package/dist/src/types/providers.js.map +1 -1
  102. package/dist/src/util/config/manage.d.ts +1 -1
  103. package/dist/src/util/config/manage.d.ts.map +1 -1
  104. package/dist/src/util/config/manage.js.map +1 -1
  105. package/dist/src/util/convertEvalResultsToTable.d.ts.map +1 -1
  106. package/dist/src/util/convertEvalResultsToTable.js +14 -0
  107. package/dist/src/util/convertEvalResultsToTable.js.map +1 -1
  108. package/dist/src/util/index.d.ts +12 -4
  109. package/dist/src/util/index.d.ts.map +1 -1
  110. package/dist/src/validators/providers.d.ts +71 -2
  111. package/dist/src/validators/providers.d.ts.map +1 -1
  112. package/dist/src/validators/providers.js +3 -0
  113. package/dist/src/validators/providers.js.map +1 -1
  114. package/dist/src/validators/redteam.d.ts +24 -0
  115. package/dist/src/validators/redteam.d.ts.map +1 -1
  116. package/dist/test/assertions/index.test.js +26 -475
  117. package/dist/test/assertions/index.test.js.map +1 -1
  118. package/dist/test/assertions/javascript.test.d.ts +2 -0
  119. package/dist/test/assertions/javascript.test.d.ts.map +1 -0
  120. package/dist/test/assertions/javascript.test.js +679 -0
  121. package/dist/test/assertions/javascript.test.js.map +1 -0
  122. package/dist/test/assertions/python.test.d.ts +2 -0
  123. package/dist/test/assertions/python.test.d.ts.map +1 -0
  124. package/dist/test/assertions/python.test.js +377 -0
  125. package/dist/test/assertions/python.test.js.map +1 -0
  126. package/dist/test/cache.test.js +297 -100
  127. package/dist/test/cache.test.js.map +1 -1
  128. package/dist/test/commands/eval/filterErrorTests.test.d.ts +2 -0
  129. package/dist/test/commands/eval/filterErrorTests.test.d.ts.map +1 -0
  130. package/dist/test/commands/eval/filterErrorTests.test.js +110 -0
  131. package/dist/test/commands/eval/filterErrorTests.test.js.map +1 -0
  132. package/dist/test/evaluator.test.js +10 -0
  133. package/dist/test/evaluator.test.js.map +1 -1
  134. package/dist/test/factories/evalFactory.d.ts +39 -8
  135. package/dist/test/factories/evalFactory.d.ts.map +1 -1
  136. package/dist/test/fetch.test.js +147 -19
  137. package/dist/test/fetch.test.js.map +1 -1
  138. package/dist/test/models/eval.test.js +12 -0
  139. package/dist/test/models/eval.test.js.map +1 -1
  140. package/dist/test/providers/defaults.test.d.ts +2 -0
  141. package/dist/test/providers/defaults.test.d.ts.map +1 -0
  142. package/dist/test/providers/defaults.test.js +77 -0
  143. package/dist/test/providers/defaults.test.js.map +1 -0
  144. package/dist/test/providers/http.test.js +65 -9
  145. package/dist/test/providers/http.test.js.map +1 -1
  146. package/dist/test/providers/index.test.js +6 -3
  147. package/dist/test/providers/index.test.js.map +1 -1
  148. package/dist/test/providers/mistral.test.js +28 -19
  149. package/dist/test/providers/mistral.test.js.map +1 -1
  150. package/dist/test/providers/watsonx.test.js +58 -0
  151. package/dist/test/providers/watsonx.test.js.map +1 -1
  152. package/dist/test/redteam/providers/iterativeTree.test.js +329 -98
  153. package/dist/test/redteam/providers/iterativeTree.test.js.map +1 -1
  154. package/dist/test/server/providers.test.js +4 -4
  155. package/dist/test/server/providers.test.js.map +1 -1
  156. package/dist/test/util/config/main.test.js +3 -0
  157. package/dist/test/util/config/main.test.js.map +1 -1
  158. package/dist/tsconfig.tsbuildinfo +1 -1
  159. package/package.json +13 -13
  160. package/dist/src/app/assets/index-DdUNCsxz.css +0 -1
@@ -38,15 +38,12 @@ var __importDefault = (this && this.__importDefault) || function (mod) {
38
38
  Object.defineProperty(exports, "__esModule", { value: true });
39
39
  const dedent_1 = __importDefault(require("dedent"));
40
40
  const fs = __importStar(require("fs"));
41
- const node_module_1 = require("node:module");
42
41
  const path = __importStar(require("path"));
43
42
  const assertions_1 = require("../../src/assertions");
44
43
  const xml_1 = require("../../src/assertions/xml");
45
44
  const fetch_1 = require("../../src/fetch");
46
45
  const openai_1 = require("../../src/providers/openai");
47
46
  const replicate_1 = require("../../src/providers/replicate");
48
- const pythonUtils_1 = require("../../src/python/pythonUtils");
49
- const wrapper_1 = require("../../src/python/wrapper");
50
47
  const utils_1 = require("../util/utils");
51
48
  jest.mock('../../src/redteam/remoteGeneration', () => ({
52
49
  shouldGenerateRemote: jest.fn().mockReturnValue(false),
@@ -69,20 +66,6 @@ jest.mock('../../src/fetch', () => {
69
66
  fetchWithRetries: jest.fn(actual.fetchWithRetries),
70
67
  };
71
68
  });
72
- jest.mock('../../src/python/wrapper', () => {
73
- const actual = jest.requireActual('../../src/python/wrapper');
74
- return {
75
- ...actual,
76
- runPythonCode: jest.fn(actual.runPythonCode),
77
- };
78
- });
79
- jest.mock('../../src/python/pythonUtils', () => {
80
- const actual = jest.requireActual('../../src/python/pythonUtils');
81
- return {
82
- ...actual,
83
- runPython: jest.fn(actual.runPython),
84
- };
85
- });
86
69
  jest.mock('glob', () => ({
87
70
  globSync: jest.fn(),
88
71
  }));
@@ -551,22 +534,6 @@ describe('runAssertion', () => {
551
534
  type: 'javascript',
552
535
  value: 'output === "Expected output"',
553
536
  };
554
- const javascriptMultilineStringAssertion = {
555
- type: 'javascript',
556
- value: `
557
- if (output === "Expected output") {
558
- return {
559
- pass: true,
560
- score: 0.5,
561
- reason: 'Assertion passed',
562
- };
563
- }
564
- return {
565
- pass: false,
566
- score: 0,
567
- reason: 'Assertion failed',
568
- };`,
569
- };
570
537
  const javascriptStringAssertionWithNumber = {
571
538
  type: 'javascript',
572
539
  value: 'output.length * 10',
@@ -583,24 +550,6 @@ describe('runAssertion', () => {
583
550
  value: 'output.length * 10',
584
551
  threshold: 0.5,
585
552
  };
586
- const javascriptFunctionAssertion = {
587
- type: 'javascript',
588
- value: async (output) => ({
589
- pass: true,
590
- score: 0.5,
591
- reason: 'Assertion passed',
592
- assertion: null,
593
- }),
594
- };
595
- const javascriptFunctionFailAssertion = {
596
- type: 'javascript',
597
- value: async (output) => ({
598
- pass: false,
599
- score: 0.5,
600
- reason: 'Assertion failed',
601
- assertion: null,
602
- }),
603
- };
604
553
  it('should pass when the equality assertion passes', async () => {
605
554
  const output = 'Expected output';
606
555
  const result = await (0, assertions_1.runAssertion)({
@@ -1419,6 +1368,32 @@ describe('runAssertion', () => {
1419
1368
  reason: 'Assertion passed',
1420
1369
  });
1421
1370
  });
1371
+ it('should disregard invalid inputs for assert index', async () => {
1372
+ const output = 'Expected output';
1373
+ const result = await (0, assertions_1.runAssertion)({
1374
+ prompt: 'Some prompt',
1375
+ provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
1376
+ assertion: javascriptBooleanAssertionWithConfig,
1377
+ test: {
1378
+ assert: [
1379
+ {
1380
+ type: 'javascript',
1381
+ value: 'output.length <= context.config.maximumOutputSize',
1382
+ config: {
1383
+ maximumOutputSize: 1,
1384
+ },
1385
+ },
1386
+ ],
1387
+ },
1388
+ providerResponse: { output },
1389
+ assertIndex: 45,
1390
+ });
1391
+ expect(result).toMatchObject({
1392
+ pass: true,
1393
+ score: 1.0,
1394
+ reason: 'Assertion passed',
1395
+ });
1396
+ });
1422
1397
  it('should fail when javascript returns an output string that is larger than the maximum size threshold', async () => {
1423
1398
  const output = 'Expected output with some extra characters';
1424
1399
  const result = await (0, assertions_1.runAssertion)({
@@ -1515,100 +1490,6 @@ describe('runAssertion', () => {
1515
1490
  reason: 'Assertion passed',
1516
1491
  });
1517
1492
  });
1518
- it('should pass when javascript function assertion passes - with vars', async () => {
1519
- const output = 'Expected output';
1520
- const javascriptStringAssertionWithVars = {
1521
- type: 'javascript',
1522
- value: 'output === "Expected output" && context.vars.foo === "bar"',
1523
- };
1524
- const result = await (0, assertions_1.runAssertion)({
1525
- prompt: 'Some prompt',
1526
- provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
1527
- assertion: javascriptStringAssertionWithVars,
1528
- test: { vars: { foo: 'bar' } },
1529
- providerResponse: { output },
1530
- });
1531
- expect(result).toMatchObject({
1532
- pass: true,
1533
- reason: 'Assertion passed',
1534
- });
1535
- });
1536
- it('should fail when the javascript does not match vars', async () => {
1537
- const output = 'Expected output';
1538
- const javascriptStringAssertionWithVars = {
1539
- type: 'javascript',
1540
- value: 'output === "Expected output" && context.vars.foo === "something else"',
1541
- };
1542
- const result = await (0, assertions_1.runAssertion)({
1543
- prompt: 'Some prompt',
1544
- provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
1545
- assertion: javascriptStringAssertionWithVars,
1546
- test: { vars: { foo: 'bar' } },
1547
- providerResponse: { output },
1548
- });
1549
- expect(result).toMatchObject({
1550
- pass: false,
1551
- reason: 'Custom function returned false\noutput === "Expected output" && context.vars.foo === "something else"',
1552
- });
1553
- });
1554
- it('should pass when the function returns pass', async () => {
1555
- const output = 'Expected output';
1556
- const result = await (0, assertions_1.runAssertion)({
1557
- prompt: 'Some prompt',
1558
- provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
1559
- assertion: javascriptFunctionAssertion,
1560
- test: {},
1561
- providerResponse: { output },
1562
- });
1563
- expect(result).toMatchObject({
1564
- pass: true,
1565
- score: 0.5,
1566
- reason: 'Assertion passed',
1567
- });
1568
- });
1569
- it('should fail when the function returns fail', async () => {
1570
- const output = 'Expected output';
1571
- const result = await (0, assertions_1.runAssertion)({
1572
- prompt: 'Some prompt',
1573
- provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
1574
- assertion: javascriptFunctionFailAssertion,
1575
- test: {},
1576
- providerResponse: { output },
1577
- });
1578
- expect(result).toMatchObject({
1579
- pass: false,
1580
- score: 0.5,
1581
- reason: 'Assertion failed',
1582
- });
1583
- });
1584
- it('should pass when the multiline javascript assertion passes', async () => {
1585
- const output = 'Expected output';
1586
- const result = await (0, assertions_1.runAssertion)({
1587
- prompt: 'Some prompt',
1588
- assertion: javascriptMultilineStringAssertion,
1589
- test: {},
1590
- providerResponse: { output },
1591
- provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
1592
- });
1593
- expect(result).toMatchObject({
1594
- pass: true,
1595
- reason: 'Assertion passed',
1596
- });
1597
- });
1598
- it('should pass when the multiline javascript assertion fails', async () => {
1599
- const output = 'Not the expected output';
1600
- const result = await (0, assertions_1.runAssertion)({
1601
- prompt: 'Some prompt',
1602
- assertion: javascriptMultilineStringAssertion,
1603
- test: {},
1604
- providerResponse: { output },
1605
- provider: new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini'),
1606
- });
1607
- expect(result).toMatchObject({
1608
- pass: false,
1609
- reason: 'Assertion failed',
1610
- });
1611
- });
1612
1493
  const notContainsAssertion = {
1613
1494
  type: 'not-contains',
1614
1495
  value: 'Unexpected output',
@@ -2103,336 +1984,6 @@ describe('runAssertion', () => {
2103
1984
  reason: 'Levenshtein distance 8 is greater than threshold 5',
2104
1985
  });
2105
1986
  });
2106
- it.each([
2107
- [
2108
- 'boolean',
2109
- jest.fn((output) => output === 'Expected output'),
2110
- true,
2111
- 'Assertion passed',
2112
- ],
2113
- ['number', jest.fn((output) => output.length), true, 'Assertion passed'],
2114
- [
2115
- 'GradingResult',
2116
- jest.fn((output) => ({ pass: true, score: 1, reason: 'Custom reason' })),
2117
- true,
2118
- 'Custom reason',
2119
- ],
2120
- [
2121
- 'boolean',
2122
- jest.fn((output) => output !== 'Expected output'),
2123
- false,
2124
- 'Custom function returned false',
2125
- ],
2126
- ['number', jest.fn((output) => 0), false, 'Custom function returned false'],
2127
- [
2128
- 'GradingResult',
2129
- jest.fn((output) => ({ pass: false, score: 0.1, reason: 'Custom reason' })),
2130
- false,
2131
- 'Custom reason',
2132
- ],
2133
- [
2134
- 'boolean Promise',
2135
- jest.fn((output) => Promise.resolve(true)),
2136
- true,
2137
- 'Assertion passed',
2138
- ],
2139
- ])('should pass when the file:// assertion with .js file returns a %s', async (type, mockFn, expectedPass, expectedReason) => {
2140
- const output = 'Expected output';
2141
- jest.doMock(path.resolve('/path/to/assert.js'), () => mockFn, { virtual: true });
2142
- const fileAssertion = {
2143
- type: 'javascript',
2144
- value: 'file:///path/to/assert.js',
2145
- };
2146
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
2147
- const providerResponse = { output };
2148
- const result = await (0, assertions_1.runAssertion)({
2149
- prompt: 'Some prompt',
2150
- provider,
2151
- assertion: fileAssertion,
2152
- test: {},
2153
- providerResponse,
2154
- });
2155
- expect(mockFn).toHaveBeenCalledWith('Expected output', {
2156
- prompt: 'Some prompt',
2157
- vars: {},
2158
- test: {},
2159
- provider,
2160
- providerResponse,
2161
- });
2162
- expect(result).toMatchObject({
2163
- pass: expectedPass,
2164
- reason: expect.stringContaining(expectedReason),
2165
- });
2166
- });
2167
- it.each([
2168
- [
2169
- 'boolean',
2170
- jest.fn((output) => output === 'Expected output'),
2171
- true,
2172
- 'Assertion passed',
2173
- ],
2174
- ['number', jest.fn((output) => output.length), true, 'Assertion passed'],
2175
- [
2176
- 'GradingResult',
2177
- jest.fn((output) => ({ pass: true, score: 1, reason: 'Custom reason' })),
2178
- true,
2179
- 'Custom reason',
2180
- ],
2181
- [
2182
- 'boolean',
2183
- jest.fn((output) => output !== 'Expected output'),
2184
- false,
2185
- 'Custom function returned false',
2186
- ],
2187
- ['number', jest.fn((output) => 0), false, 'Custom function returned false'],
2188
- [
2189
- 'GradingResult',
2190
- jest.fn((output) => ({ pass: false, score: 0.1, reason: 'Custom reason' })),
2191
- false,
2192
- 'Custom reason',
2193
- ],
2194
- [
2195
- 'boolean Promise',
2196
- jest.fn((output) => Promise.resolve(true)),
2197
- true,
2198
- 'Assertion passed',
2199
- ],
2200
- ])('should pass when assertion is a package path', async (type, mockFn, expectedPass, expectedReason) => {
2201
- const output = 'Expected output';
2202
- const require = (0, node_module_1.createRequire)('');
2203
- jest.spyOn(require, 'resolve').mockReturnValueOnce('/node_modules/@promptfoo/fake/index.js');
2204
- jest.doMock(path.resolve('/node_modules/@promptfoo/fake/index.js'), () => {
2205
- return {
2206
- assertionFunction: mockFn,
2207
- };
2208
- }, { virtual: true });
2209
- const fileAssertion = {
2210
- type: 'javascript',
2211
- value: 'package:@promptfoo/fake:assertionFunction',
2212
- };
2213
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
2214
- const providerResponse = { output };
2215
- const result = await (0, assertions_1.runAssertion)({
2216
- prompt: 'Some prompt',
2217
- provider,
2218
- assertion: fileAssertion,
2219
- test: {},
2220
- providerResponse,
2221
- });
2222
- expect(mockFn).toHaveBeenCalledWith('Expected output', {
2223
- prompt: 'Some prompt',
2224
- vars: {},
2225
- test: {},
2226
- provider,
2227
- providerResponse,
2228
- });
2229
- expect(result).toMatchObject({
2230
- pass: expectedPass,
2231
- reason: expect.stringContaining(expectedReason),
2232
- });
2233
- });
2234
- it('should resolve js paths relative to the configuration file', async () => {
2235
- const output = 'Expected output';
2236
- const mockFn = jest.fn((output) => output === 'Expected output');
2237
- jest.doMock(path.resolve('/base/path/path/to/assert.js'), () => mockFn, { virtual: true });
2238
- const fileAssertion = {
2239
- type: 'javascript',
2240
- value: 'file://./path/to/assert.js',
2241
- };
2242
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
2243
- const providerResponse = { output };
2244
- const result = await (0, assertions_1.runAssertion)({
2245
- prompt: 'Some prompt',
2246
- provider,
2247
- assertion: fileAssertion,
2248
- test: {},
2249
- providerResponse,
2250
- });
2251
- expect(mockFn).toHaveBeenCalledWith('Expected output', {
2252
- prompt: 'Some prompt',
2253
- vars: {},
2254
- test: {},
2255
- provider,
2256
- providerResponse,
2257
- });
2258
- expect(result).toMatchObject({
2259
- pass: true,
2260
- reason: 'Assertion passed',
2261
- });
2262
- });
2263
- it('should handle output strings with both single and double quotes correctly in python assertion', async () => {
2264
- const expectedPythonValue = '0.5';
2265
- jest.mocked(wrapper_1.runPythonCode).mockResolvedValueOnce(expectedPythonValue);
2266
- const output = 'This is a string with "double quotes"\n and \'single quotes\' \n\n and some \n\t newlines.';
2267
- const pythonAssertion = {
2268
- type: 'python',
2269
- value: expectedPythonValue,
2270
- };
2271
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
2272
- const providerResponse = { output };
2273
- const result = await (0, assertions_1.runAssertion)({
2274
- prompt: 'Some prompt',
2275
- provider,
2276
- assertion: pythonAssertion,
2277
- test: {},
2278
- providerResponse,
2279
- });
2280
- expect(wrapper_1.runPythonCode).toHaveBeenCalledTimes(1);
2281
- expect(wrapper_1.runPythonCode).toHaveBeenCalledWith(expect.anything(), 'main', [
2282
- output,
2283
- { prompt: 'Some prompt', test: {}, vars: {}, provider, providerResponse },
2284
- ]);
2285
- expect(result).toMatchObject({
2286
- pass: true,
2287
- reason: 'Assertion passed',
2288
- score: Number(expectedPythonValue),
2289
- });
2290
- });
2291
- it.each([
2292
- ['boolean', false, 0, 'Python code returned false', false, undefined],
2293
- ['number', 0, 0, 'Python code returned false', false, undefined],
2294
- [
2295
- 'GradingResult',
2296
- `{"pass": false, "score": 0, "reason": "Custom error"}`,
2297
- 0,
2298
- 'Custom error',
2299
- false,
2300
- undefined,
2301
- ],
2302
- ['boolean', true, 1, 'Assertion passed', true, undefined],
2303
- ['number', 1, 1, 'Assertion passed', true, undefined],
2304
- [
2305
- 'GradingResult',
2306
- `{"pass": true, "score": 1, "reason": "Custom success"}`,
2307
- 1,
2308
- 'Custom success',
2309
- true,
2310
- undefined,
2311
- ],
2312
- [
2313
- 'GradingResult',
2314
- // This score is less than the assertion threshold in the test
2315
- `{"pass": true, "score": 0.4, "reason": "Foo bar"}`,
2316
- 0.4,
2317
- 'Python score 0.4 is less than threshold 0.5',
2318
- false,
2319
- 0.5,
2320
- ],
2321
- ])('should handle inline return type %s with return value: %p', async (type, returnValue, expectedScore, expectedReason, expectedPass, threshold) => {
2322
- const output = 'This is a string with "double quotes"\n and \'single quotes\' \n\n and some \n\t newlines.';
2323
- let resolvedValue;
2324
- if (type === 'GradingResult') {
2325
- resolvedValue = JSON.parse(returnValue);
2326
- }
2327
- else {
2328
- resolvedValue = returnValue;
2329
- }
2330
- const pythonAssertion = {
2331
- type: 'python',
2332
- value: returnValue.toString(),
2333
- threshold,
2334
- };
2335
- jest.mocked(wrapper_1.runPythonCode).mockResolvedValueOnce(resolvedValue);
2336
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
2337
- const providerResponse = { output };
2338
- const result = await (0, assertions_1.runAssertion)({
2339
- prompt: 'Some prompt',
2340
- provider,
2341
- assertion: pythonAssertion,
2342
- test: {},
2343
- providerResponse,
2344
- });
2345
- expect(wrapper_1.runPythonCode).toHaveBeenCalledTimes(1);
2346
- expect(wrapper_1.runPythonCode).toHaveBeenCalledWith(expect.anything(), 'main', [
2347
- output,
2348
- { prompt: 'Some prompt', test: {}, vars: {}, provider, providerResponse },
2349
- ]);
2350
- expect(result).toMatchObject({
2351
- pass: expectedPass,
2352
- reason: expect.stringMatching(expectedReason),
2353
- score: expectedScore,
2354
- });
2355
- });
2356
- it.each([
2357
- ['boolean', 'True', true, 'Assertion passed'],
2358
- ['number', '0.5', true, 'Assertion passed'],
2359
- ['boolean', true, true, 'Assertion passed'],
2360
- ['number', 0.5, true, 'Assertion passed'],
2361
- [
2362
- 'GradingResult',
2363
- '{"pass": true, "score": 1, "reason": "Custom reason"}',
2364
- true,
2365
- 'Custom reason',
2366
- ],
2367
- ['boolean', 'False', false, 'Python code returned false'],
2368
- ['number', '0', false, 'Python code returned false'],
2369
- [
2370
- 'GradingResult',
2371
- '{"pass": false, "score": 0, "reason": "Custom reason"}',
2372
- false,
2373
- 'Custom reason',
2374
- ],
2375
- ])('should handle when the file:// assertion with .py file returns a %s', async (type, pythonOutput, expectedPass, expectedReason) => {
2376
- const output = 'Expected output';
2377
- jest.mocked(pythonUtils_1.runPython).mockResolvedValueOnce(pythonOutput);
2378
- const fileAssertion = {
2379
- type: 'python',
2380
- value: 'file:///path/to/assert.py',
2381
- };
2382
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
2383
- const providerResponse = { output };
2384
- const result = await (0, assertions_1.runAssertion)({
2385
- prompt: 'Some prompt that includes "double quotes" and \'single quotes\'',
2386
- provider,
2387
- assertion: fileAssertion,
2388
- test: {},
2389
- providerResponse,
2390
- });
2391
- expect(pythonUtils_1.runPython).toHaveBeenCalledWith(path.resolve('/path/to/assert.py'), 'get_assert', [
2392
- output,
2393
- {
2394
- prompt: 'Some prompt that includes "double quotes" and \'single quotes\'',
2395
- vars: {},
2396
- test: {},
2397
- provider,
2398
- providerResponse,
2399
- },
2400
- ]);
2401
- expect(result).toMatchObject({
2402
- pass: expectedPass,
2403
- reason: expect.stringContaining(expectedReason),
2404
- });
2405
- expect(pythonUtils_1.runPython).toHaveBeenCalledTimes(1);
2406
- });
2407
- it('should handle when python file assertions throw an error', async () => {
2408
- const output = 'Expected output';
2409
- jest
2410
- .mocked(pythonUtils_1.runPython)
2411
- .mockRejectedValue(new Error('The Python script `call_api` function must return a dict with an `output`'));
2412
- const fileAssertion = {
2413
- type: 'python',
2414
- value: 'file:///path/to/assert.py',
2415
- };
2416
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
2417
- const providerResponse = { output };
2418
- const result = await (0, assertions_1.runAssertion)({
2419
- prompt: 'Some prompt that includes "double quotes" and \'single quotes\'',
2420
- provider,
2421
- assertion: fileAssertion,
2422
- test: {},
2423
- providerResponse,
2424
- });
2425
- expect(pythonUtils_1.runPython).toHaveBeenCalledTimes(1);
2426
- expect(result).toEqual({
2427
- assertion: {
2428
- type: 'python',
2429
- value: 'file:///path/to/assert.py',
2430
- },
2431
- pass: false,
2432
- reason: 'The Python script `call_api` function must return a dict with an `output`',
2433
- score: 0,
2434
- });
2435
- });
2436
1987
  describe('latency assertion', () => {
2437
1988
  it('should pass when the latency assertion passes', async () => {
2438
1989
  const output = 'Expected output';