promptfoo 0.103.13 → 0.103.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/dist/package.json +13 -13
  2. package/dist/src/app/assets/{index-CNZfEf8i.js → index-gCO1so_L.js} +233 -232
  3. package/dist/src/app/assets/{index.es-vrsSPryr.js → index.es-BebRurvf.js} +1 -1
  4. package/dist/src/app/assets/{sync-B_Y0n4tJ.js → sync-cefC8sLm.js} +1 -1
  5. package/dist/src/app/index.html +1 -1
  6. package/dist/src/fetch.d.ts +2 -0
  7. package/dist/src/fetch.d.ts.map +1 -1
  8. package/dist/src/fetch.js +2 -1
  9. package/dist/src/fetch.js.map +1 -1
  10. package/dist/src/providers/azure.d.ts.map +1 -1
  11. package/dist/src/providers/azure.js +10 -0
  12. package/dist/src/providers/azure.js.map +1 -1
  13. package/dist/src/providers/http.d.ts +5 -3
  14. package/dist/src/providers/http.d.ts.map +1 -1
  15. package/dist/src/providers/http.js +7 -6
  16. package/dist/src/providers/http.js.map +1 -1
  17. package/dist/src/redteam/commands/generate.d.ts.map +1 -1
  18. package/dist/src/redteam/commands/generate.js +30 -9
  19. package/dist/src/redteam/commands/generate.js.map +1 -1
  20. package/dist/src/redteam/graders.d.ts.map +1 -1
  21. package/dist/src/redteam/graders.js +13 -13
  22. package/dist/src/redteam/graders.js.map +1 -1
  23. package/dist/src/redteam/index.d.ts +1 -0
  24. package/dist/src/redteam/index.d.ts.map +1 -1
  25. package/dist/src/redteam/index.js +1 -1
  26. package/dist/src/redteam/index.js.map +1 -1
  27. package/dist/src/redteam/plugins/harmful/graders.d.ts +52 -0
  28. package/dist/src/redteam/plugins/harmful/graders.d.ts.map +1 -1
  29. package/dist/src/redteam/plugins/harmful/graders.js +577 -22
  30. package/dist/src/redteam/plugins/harmful/graders.js.map +1 -1
  31. package/dist/src/redteam/types.d.ts +1 -0
  32. package/dist/src/redteam/types.d.ts.map +1 -1
  33. package/dist/src/redteam/util.d.ts.map +1 -1
  34. package/dist/src/redteam/util.js +2 -0
  35. package/dist/src/redteam/util.js.map +1 -1
  36. package/dist/src/server/server.d.ts.map +1 -1
  37. package/dist/src/server/server.js +1 -7
  38. package/dist/src/server/server.js.map +1 -1
  39. package/dist/src/validators/redteam.d.ts +3 -0
  40. package/dist/src/validators/redteam.d.ts.map +1 -1
  41. package/dist/src/validators/redteam.js +2 -0
  42. package/dist/src/validators/redteam.js.map +1 -1
  43. package/dist/test/assertions/answerRelevance.test.d.ts +2 -0
  44. package/dist/test/assertions/answerRelevance.test.d.ts.map +1 -0
  45. package/dist/test/assertions/answerRelevance.test.js +177 -0
  46. package/dist/test/assertions/answerRelevance.test.js.map +1 -0
  47. package/dist/test/assertions/contextFaithfulness.test.d.ts +2 -0
  48. package/dist/test/assertions/contextFaithfulness.test.d.ts.map +1 -0
  49. package/dist/test/assertions/contextFaithfulness.test.js +226 -0
  50. package/dist/test/assertions/contextFaithfulness.test.js.map +1 -0
  51. package/dist/test/assertions/contextRecall.test.d.ts +2 -0
  52. package/dist/test/assertions/contextRecall.test.d.ts.map +1 -0
  53. package/dist/test/assertions/contextRecall.test.js +243 -0
  54. package/dist/test/assertions/contextRecall.test.js.map +1 -0
  55. package/dist/test/assertions/contextRelevance.test.d.ts +2 -0
  56. package/dist/test/assertions/contextRelevance.test.d.ts.map +1 -0
  57. package/dist/test/assertions/contextRelevance.test.js +238 -0
  58. package/dist/test/assertions/contextRelevance.test.js.map +1 -0
  59. package/dist/test/assertions/geval.test.d.ts +2 -0
  60. package/dist/test/assertions/geval.test.d.ts.map +1 -0
  61. package/dist/test/assertions/geval.test.js +222 -0
  62. package/dist/test/assertions/geval.test.js.map +1 -0
  63. package/dist/test/assertions/modelGradedClosedQa.test.d.ts +2 -0
  64. package/dist/test/assertions/modelGradedClosedQa.test.d.ts.map +1 -0
  65. package/dist/test/assertions/modelGradedClosedQa.test.js +200 -0
  66. package/dist/test/assertions/modelGradedClosedQa.test.js.map +1 -0
  67. package/dist/test/fetch.test.js +66 -18
  68. package/dist/test/fetch.test.js.map +1 -1
  69. package/dist/test/providers/azure.test.js +41 -11
  70. package/dist/test/providers/azure.test.js.map +1 -1
  71. package/dist/test/providers/http.test.js +70 -2
  72. package/dist/test/providers/http.test.js.map +1 -1
  73. package/dist/test/providers/index.test.js +0 -454
  74. package/dist/test/providers/index.test.js.map +1 -1
  75. package/dist/test/providers/openai.test.js +509 -0
  76. package/dist/test/providers/openai.test.js.map +1 -1
  77. package/dist/test/redteam/commands/generate.test.js +7 -0
  78. package/dist/test/redteam/commands/generate.test.js.map +1 -1
  79. package/dist/test/redteam/extraction/purpose.test.js +1 -0
  80. package/dist/test/redteam/extraction/purpose.test.js.map +1 -1
  81. package/dist/test/redteam/strategies/index.test.js +1 -0
  82. package/dist/test/redteam/strategies/index.test.js.map +1 -1
  83. package/dist/tsconfig.tsbuildinfo +1 -1
  84. package/package.json +13 -13
@@ -122,199 +122,6 @@ describe('call provider apis', () => {
122
122
  jest.clearAllMocks();
123
123
  await (0, cache_1.clearCache)();
124
124
  });
125
- it('OpenAiCompletionProvider callApi', async () => {
126
- const mockResponse = {
127
- ...defaultMockResponse,
128
- text: jest.fn().mockResolvedValue(JSON.stringify({
129
- choices: [{ text: 'Test output' }],
130
- usage: { total_tokens: 10, prompt_tokens: 5, completion_tokens: 5 },
131
- })),
132
- };
133
- mockFetch.mockResolvedValue(mockResponse);
134
- const provider = new openai_1.OpenAiCompletionProvider('text-davinci-003');
135
- const result = await provider.callApi('Test prompt');
136
- expect(mockFetch).toHaveBeenCalledTimes(1);
137
- expect(result.output).toBe('Test output');
138
- expect(result.tokenUsage).toEqual({ total: 10, prompt: 5, completion: 5 });
139
- });
140
- it('OpenAiChatCompletionProvider callApi', async () => {
141
- const mockResponse = {
142
- ...defaultMockResponse,
143
- text: jest.fn().mockResolvedValue(JSON.stringify({
144
- choices: [{ message: { content: 'Test output' } }],
145
- usage: { total_tokens: 10, prompt_tokens: 5, completion_tokens: 5 },
146
- })),
147
- ok: true,
148
- };
149
- mockFetch.mockResolvedValue(mockResponse);
150
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
151
- const result = await provider.callApi(JSON.stringify([{ role: 'user', content: 'Test prompt' }]));
152
- expect(mockFetch).toHaveBeenCalledTimes(1);
153
- expect(result.output).toBe('Test output');
154
- expect(result.tokenUsage).toEqual({ total: 10, prompt: 5, completion: 5 });
155
- });
156
- it('OpenAiChatCompletionProvider callApi with caching', async () => {
157
- const mockResponse = {
158
- ...defaultMockResponse,
159
- text: jest.fn().mockResolvedValue(JSON.stringify({
160
- choices: [{ message: { content: 'Test output 2' } }],
161
- usage: { total_tokens: 10, prompt_tokens: 5, completion_tokens: 5 },
162
- })),
163
- ok: true,
164
- };
165
- mockFetch.mockResolvedValue(mockResponse);
166
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
167
- const result = await provider.callApi(JSON.stringify([{ role: 'user', content: 'Test prompt 2' }]));
168
- expect(mockFetch).toHaveBeenCalledTimes(1);
169
- expect(result.output).toBe('Test output 2');
170
- expect(result.tokenUsage).toEqual({ total: 10, prompt: 5, completion: 5 });
171
- const result2 = await provider.callApi(JSON.stringify([{ role: 'user', content: 'Test prompt 2' }]));
172
- expect(mockFetch).toHaveBeenCalledTimes(1);
173
- expect(result2.output).toBe('Test output 2');
174
- expect(result2.tokenUsage).toEqual({ total: 10, cached: 10 });
175
- });
176
- it('OpenAiChatCompletionProvider callApi with cache disabled', async () => {
177
- const mockResponse = {
178
- ...defaultMockResponse,
179
- text: jest.fn().mockResolvedValue(JSON.stringify({
180
- choices: [{ message: { content: 'Test output' } }],
181
- usage: { total_tokens: 10, prompt_tokens: 5, completion_tokens: 5 },
182
- })),
183
- ok: true,
184
- };
185
- mockFetch.mockResolvedValue(mockResponse);
186
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
187
- const result = await provider.callApi(JSON.stringify([{ role: 'user', content: 'Test prompt' }]));
188
- expect(mockFetch).toHaveBeenCalledTimes(1);
189
- expect(result.output).toBe('Test output');
190
- expect(result.tokenUsage).toEqual({ total: 10, prompt: 5, completion: 5 });
191
- (0, cache_1.disableCache)();
192
- const result2 = await provider.callApi(JSON.stringify([{ role: 'user', content: 'Test prompt' }]));
193
- expect(mockFetch).toHaveBeenCalledTimes(2);
194
- expect(result2.output).toBe('Test output');
195
- expect(result2.tokenUsage).toEqual({ total: 10, prompt: 5, completion: 5 });
196
- (0, cache_1.enableCache)();
197
- });
198
- it('OpenAiChatCompletionProvider constructor with config', async () => {
199
- const config = {
200
- temperature: 3.1415926,
201
- max_tokens: 201,
202
- };
203
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini', { config });
204
- const prompt = 'Test prompt';
205
- await provider.callApi(prompt);
206
- expect(mockFetch).toHaveBeenCalledWith(expect.any(String), expect.objectContaining({
207
- body: expect.stringMatching(`temperature\":3.1415926`),
208
- }));
209
- expect(provider.config.temperature).toBe(config.temperature);
210
- expect(provider.config.max_tokens).toBe(config.max_tokens);
211
- });
212
- it('OpenAiChatCompletionProvider callApi with structured output', async () => {
213
- const mockResponse = {
214
- ...defaultMockResponse,
215
- text: jest.fn().mockResolvedValue(JSON.stringify({
216
- choices: [{ message: { content: '{"name": "John", "age": 30}' } }],
217
- usage: { total_tokens: 10, prompt_tokens: 5, completion_tokens: 5 },
218
- })),
219
- ok: true,
220
- };
221
- mockFetch.mockResolvedValue(mockResponse);
222
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini', {
223
- config: {
224
- response_format: {
225
- type: 'json_schema',
226
- json_schema: {
227
- name: 'person',
228
- strict: true,
229
- schema: {
230
- type: 'object',
231
- properties: {
232
- name: { type: 'string' },
233
- age: { type: 'number' },
234
- },
235
- required: ['name', 'age'],
236
- additionalProperties: false,
237
- },
238
- },
239
- },
240
- },
241
- });
242
- const result = await provider.callApi(JSON.stringify([{ role: 'user', content: 'Get me a person' }]));
243
- expect(mockFetch).toHaveBeenCalledTimes(1);
244
- expect(result.output).toEqual({ name: 'John', age: 30 });
245
- expect(result.tokenUsage).toEqual({ total: 10, prompt: 5, completion: 5 });
246
- });
247
- it('OpenAiChatCompletionProvider callApi handles model refusals', async () => {
248
- const mockResponse = {
249
- ...defaultMockResponse,
250
- text: jest.fn().mockResolvedValue(JSON.stringify({
251
- choices: [{ message: { refusal: 'Content policy violation' } }],
252
- usage: { total_tokens: 5, prompt_tokens: 5, completion_tokens: 0 },
253
- })),
254
- ok: true,
255
- };
256
- mockFetch.mockResolvedValue(mockResponse);
257
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini');
258
- const result = await provider.callApi(JSON.stringify([{ role: 'user', content: 'Generate inappropriate content' }]));
259
- expect(mockFetch).toHaveBeenCalledTimes(1);
260
- expect(result.output).toBe('Content policy violation');
261
- expect(result.tokenUsage).toEqual({ total: 5, prompt: 5, completion: 0 });
262
- expect(result.isRefusal).toBe(true);
263
- });
264
- it('OpenAiChatCompletionProvider callApi with function tool callbacks', async () => {
265
- const mockResponse = {
266
- ...defaultMockResponse,
267
- text: jest.fn().mockResolvedValue(JSON.stringify({
268
- choices: [
269
- {
270
- message: {
271
- content: null,
272
- tool_calls: [
273
- {
274
- function: {
275
- name: 'get_weather',
276
- arguments: '{"location":"New York"}',
277
- },
278
- },
279
- ],
280
- },
281
- },
282
- ],
283
- usage: { total_tokens: 15, prompt_tokens: 10, completion_tokens: 5 },
284
- })),
285
- ok: true,
286
- };
287
- mockFetch.mockResolvedValue(mockResponse);
288
- const mockWeatherFunction = jest.fn().mockResolvedValue('Sunny, 25°C');
289
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini', {
290
- config: {
291
- tools: [
292
- {
293
- type: 'function',
294
- function: {
295
- name: 'get_weather',
296
- description: 'Get the weather for a location',
297
- parameters: {
298
- type: 'object',
299
- properties: {
300
- location: { type: 'string' },
301
- },
302
- required: ['location'],
303
- },
304
- },
305
- },
306
- ],
307
- functionToolCallbacks: {
308
- get_weather: mockWeatherFunction,
309
- },
310
- },
311
- });
312
- const result = await provider.callApi(JSON.stringify([{ role: 'user', content: "What's the weather in New York?" }]));
313
- expect(mockFetch).toHaveBeenCalledTimes(1);
314
- expect(mockWeatherFunction).toHaveBeenCalledWith('{"location":"New York"}');
315
- expect(result.output).toBe('Sunny, 25°C');
316
- expect(result.tokenUsage).toEqual({ total: 15, prompt: 10, completion: 5 });
317
- });
318
125
  it('AzureOpenAiCompletionProvider callApi', async () => {
319
126
  const mockResponse = {
320
127
  ...defaultMockResponse,
@@ -748,267 +555,6 @@ describe('call provider apis', () => {
748
555
  jest.restoreAllMocks();
749
556
  });
750
557
  });
751
- describe('OpenAiChatCompletionProvider with functionToolCallbacks', () => {
752
- it('should call function tool and return result', async () => {
753
- const mockResponse = {
754
- ...defaultMockResponse,
755
- text: jest.fn().mockResolvedValue(JSON.stringify({
756
- choices: [
757
- {
758
- message: {
759
- content: null,
760
- function_call: {
761
- name: 'addNumbers',
762
- arguments: '{"a":5,"b":6}',
763
- },
764
- },
765
- },
766
- ],
767
- usage: { total_tokens: 10, prompt_tokens: 5, completion_tokens: 5 },
768
- })),
769
- ok: true,
770
- };
771
- mockFetch.mockResolvedValue(mockResponse);
772
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini', {
773
- config: {
774
- tools: [
775
- {
776
- type: 'function',
777
- function: {
778
- name: 'addNumbers',
779
- description: 'Add two numbers together',
780
- parameters: {
781
- type: 'object',
782
- properties: {
783
- a: { type: 'number' },
784
- b: { type: 'number' },
785
- },
786
- required: ['a', 'b'],
787
- },
788
- },
789
- },
790
- ],
791
- functionToolCallbacks: {
792
- addNumbers: (parametersJsonString) => {
793
- const { a, b } = JSON.parse(parametersJsonString);
794
- return Promise.resolve(JSON.stringify(a + b));
795
- },
796
- },
797
- },
798
- });
799
- const result = await provider.callApi('Add 5 and 6');
800
- expect(mockFetch).toHaveBeenCalledTimes(1);
801
- expect(result.output).toBe('11');
802
- expect(result.tokenUsage).toEqual({ total: 10, prompt: 5, completion: 5 });
803
- });
804
- it('should handle multiple function tool calls', async () => {
805
- const mockResponse = {
806
- ...defaultMockResponse,
807
- text: jest.fn().mockResolvedValue(JSON.stringify({
808
- choices: [
809
- {
810
- message: {
811
- content: null,
812
- tool_calls: [
813
- {
814
- function: {
815
- name: 'addNumbers',
816
- arguments: '{"a":5,"b":6}',
817
- },
818
- },
819
- {
820
- function: {
821
- name: 'multiplyNumbers',
822
- arguments: '{"x":2,"y":3}',
823
- },
824
- },
825
- ],
826
- },
827
- },
828
- ],
829
- usage: { total_tokens: 15, prompt_tokens: 7, completion_tokens: 8 },
830
- })),
831
- ok: true,
832
- };
833
- mockFetch.mockResolvedValue(mockResponse);
834
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini', {
835
- config: {
836
- tools: [
837
- {
838
- type: 'function',
839
- function: {
840
- name: 'addNumbers',
841
- description: 'Add two numbers together',
842
- parameters: {
843
- type: 'object',
844
- properties: {
845
- a: { type: 'number' },
846
- b: { type: 'number' },
847
- },
848
- required: ['a', 'b'],
849
- },
850
- },
851
- },
852
- {
853
- type: 'function',
854
- function: {
855
- name: 'multiplyNumbers',
856
- description: 'Multiply two numbers',
857
- parameters: {
858
- type: 'object',
859
- properties: {
860
- x: { type: 'number' },
861
- y: { type: 'number' },
862
- },
863
- required: ['x', 'y'],
864
- },
865
- },
866
- },
867
- ],
868
- functionToolCallbacks: {
869
- addNumbers: (parametersJsonString) => {
870
- const { a, b } = JSON.parse(parametersJsonString);
871
- return Promise.resolve(JSON.stringify(a + b));
872
- },
873
- multiplyNumbers: (parametersJsonString) => {
874
- const { x, y } = JSON.parse(parametersJsonString);
875
- return Promise.resolve(JSON.stringify(x * y));
876
- },
877
- },
878
- },
879
- });
880
- const result = await provider.callApi('Add 5 and 6, then multiply 2 and 3');
881
- expect(mockFetch).toHaveBeenCalledTimes(1);
882
- expect(result.output).toBe('11\n6');
883
- expect(result.tokenUsage).toEqual({ total: 15, prompt: 7, completion: 8 });
884
- });
885
- it('should handle errors in function tool callbacks', async () => {
886
- const mockResponse = {
887
- ...defaultMockResponse,
888
- text: jest.fn().mockResolvedValue(JSON.stringify({
889
- choices: [
890
- {
891
- message: {
892
- content: null,
893
- function_call: {
894
- name: 'errorFunction',
895
- arguments: '{}',
896
- },
897
- },
898
- },
899
- ],
900
- usage: { total_tokens: 5, prompt_tokens: 2, completion_tokens: 3 },
901
- })),
902
- ok: true,
903
- };
904
- mockFetch.mockResolvedValue(mockResponse);
905
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini', {
906
- config: {
907
- tools: [
908
- {
909
- type: 'function',
910
- function: {
911
- name: 'errorFunction',
912
- description: 'A function that always throws an error',
913
- parameters: {
914
- type: 'object',
915
- properties: {},
916
- },
917
- },
918
- },
919
- ],
920
- functionToolCallbacks: {
921
- errorFunction: () => {
922
- throw new Error('Test error');
923
- },
924
- },
925
- },
926
- });
927
- const result = await provider.callApi('Call the error function');
928
- expect(mockFetch).toHaveBeenCalledTimes(1);
929
- expect(result.output).toEqual({ arguments: '{}', name: 'errorFunction' });
930
- expect(result.tokenUsage).toEqual({ total: 5, prompt: 2, completion: 3 });
931
- });
932
- });
933
- describe('OpenAiChatCompletionProvider response_format', () => {
934
- it('should prioritize response_format from prompt config over provider config', async () => {
935
- const providerResponseFormat = {
936
- type: 'json_object',
937
- };
938
- const promptResponseFormat = {
939
- type: 'json_schema',
940
- json_schema: {
941
- name: 'test_schema',
942
- strict: true,
943
- schema: {
944
- type: 'object',
945
- properties: { key2: { type: 'string' } },
946
- additionalProperties: false,
947
- },
948
- },
949
- };
950
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini', {
951
- config: {
952
- response_format: providerResponseFormat,
953
- },
954
- });
955
- const mockResponse = {
956
- ...defaultMockResponse,
957
- text: jest.fn().mockResolvedValue(JSON.stringify({
958
- choices: [{ message: { content: '{"key2": "value2"}' } }],
959
- usage: { total_tokens: 10, prompt_tokens: 5, completion_tokens: 5 },
960
- })),
961
- };
962
- jest.mocked(fetch).mockResolvedValue(mockResponse);
963
- const result = await provider.callApi('Test prompt', {
964
- vars: {},
965
- prompt: {
966
- raw: 'Test prompt',
967
- label: 'Test prompt',
968
- config: {
969
- response_format: promptResponseFormat,
970
- },
971
- },
972
- });
973
- expect(fetch).toHaveBeenCalledTimes(1);
974
- const fetchArgs = jest.mocked(fetch).mock.calls[0];
975
- const requestBody = JSON.parse(fetchArgs[1]?.body);
976
- expect(requestBody.response_format).toEqual(promptResponseFormat);
977
- expect(result.output).toEqual({ key2: 'value2' });
978
- expect(result.tokenUsage).toEqual({ total: 10, prompt: 5, completion: 5 });
979
- });
980
- it('should use provider config response_format when prompt config is not provided', async () => {
981
- const providerResponseFormat = {
982
- type: 'json_object',
983
- };
984
- const provider = new openai_1.OpenAiChatCompletionProvider('gpt-4o-mini', {
985
- config: {
986
- response_format: providerResponseFormat,
987
- },
988
- });
989
- const mockResponse = {
990
- ...defaultMockResponse,
991
- text: jest.fn().mockResolvedValue(JSON.stringify({
992
- choices: [{ message: { content: '{"key1": "value1"}' } }],
993
- usage: { total_tokens: 10, prompt_tokens: 5, completion_tokens: 5 },
994
- })),
995
- };
996
- jest.mocked(fetch).mockResolvedValue(mockResponse);
997
- const result = await provider.callApi('Test prompt', {
998
- vars: {},
999
- prompt: {
1000
- raw: 'Test prompt',
1001
- label: 'Test prompt',
1002
- },
1003
- });
1004
- expect(fetch).toHaveBeenCalledTimes(1);
1005
- const fetchArgs = jest.mocked(fetch).mock.calls[0];
1006
- const requestBody = JSON.parse(fetchArgs[1]?.body);
1007
- expect(requestBody.response_format).toEqual(providerResponseFormat);
1008
- expect(result.output).toBe('{"key1": "value1"}');
1009
- expect(result.tokenUsage).toEqual({ total: 10, prompt: 5, completion: 5 });
1010
- });
1011
- });
1012
558
  });
1013
559
  describe('loadApiProvider', () => {
1014
560
  beforeEach(() => {