promptfoo 0.107.0 → 0.107.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. package/dist/package.json +20 -17
  2. package/dist/src/app/assets/{index-aPpzVtDP.js → index-DmqkyVHC.js} +234 -234
  3. package/dist/src/app/assets/{index.es-BZ4ADnog.js → index.es-BSriX8RW.js} +1 -1
  4. package/dist/src/app/assets/{sync-ulUFOpow.js → sync-CAUebwzg.js} +1 -1
  5. package/dist/src/app/index.html +1 -1
  6. package/dist/src/assertions/answerRelevance.d.ts.map +1 -1
  7. package/dist/src/assertions/contains.d.ts.map +1 -1
  8. package/dist/src/assertions/contextRecall.d.ts.map +1 -1
  9. package/dist/src/assertions/contextRelevance.d.ts.map +1 -1
  10. package/dist/src/assertions/cost.d.ts.map +1 -1
  11. package/dist/src/assertions/equals.d.ts.map +1 -1
  12. package/dist/src/assertions/factuality.d.ts.map +1 -1
  13. package/dist/src/assertions/geval.d.ts.map +1 -1
  14. package/dist/src/assertions/guardrails.d.ts.map +1 -1
  15. package/dist/src/assertions/javascript.d.ts.map +1 -1
  16. package/dist/src/assertions/latency.d.ts.map +1 -1
  17. package/dist/src/assertions/llmRubric.d.ts.map +1 -1
  18. package/dist/src/assertions/modelGradedClosedQa.d.ts.map +1 -1
  19. package/dist/src/assertions/moderation.d.ts.map +1 -1
  20. package/dist/src/assertions/openai.d.ts.map +1 -1
  21. package/dist/src/assertions/python.d.ts.map +1 -1
  22. package/dist/src/assertions/redteam.d.ts.map +1 -1
  23. package/dist/src/assertions/regex.d.ts.map +1 -1
  24. package/dist/src/assertions/similar.d.ts.map +1 -1
  25. package/dist/src/assertions/sql.d.ts.map +1 -1
  26. package/dist/src/assertions/startsWith.d.ts.map +1 -1
  27. package/dist/src/assertions/xml.d.ts.map +1 -1
  28. package/dist/src/commands/auth.d.ts.map +1 -1
  29. package/dist/src/commands/auth.js +4 -5
  30. package/dist/src/commands/auth.js.map +1 -1
  31. package/dist/src/commands/init.d.ts.map +1 -1
  32. package/dist/src/commands/init.js +14 -2
  33. package/dist/src/commands/init.js.map +1 -1
  34. package/dist/src/database/tables.d.ts +22 -0
  35. package/dist/src/database/tables.d.ts.map +1 -1
  36. package/dist/src/fetch.js +1 -1
  37. package/dist/src/fetch.js.map +1 -1
  38. package/dist/src/globalConfig/cloud.d.ts +1 -0
  39. package/dist/src/globalConfig/cloud.d.ts.map +1 -1
  40. package/dist/src/globalConfig/cloud.js +11 -10
  41. package/dist/src/globalConfig/cloud.js.map +1 -1
  42. package/dist/src/matchers.d.ts.map +1 -1
  43. package/dist/src/matchers.js +79 -39
  44. package/dist/src/matchers.js.map +1 -1
  45. package/dist/src/onboarding.d.ts.map +1 -1
  46. package/dist/src/onboarding.js +5 -8
  47. package/dist/src/onboarding.js.map +1 -1
  48. package/dist/src/prompts/grading.d.ts +1 -10
  49. package/dist/src/prompts/grading.d.ts.map +1 -1
  50. package/dist/src/prompts/grading.js +37 -42
  51. package/dist/src/prompts/grading.js.map +1 -1
  52. package/dist/src/prompts/processors/javascript.d.ts.map +1 -1
  53. package/dist/src/prompts/processors/python.d.ts.map +1 -1
  54. package/dist/src/providers/adaline.gateway.d.ts.map +1 -1
  55. package/dist/src/providers/adaline.gateway.js +6 -5
  56. package/dist/src/providers/adaline.gateway.js.map +1 -1
  57. package/dist/src/providers/anthropic/completion.d.ts +15 -0
  58. package/dist/src/providers/anthropic/completion.d.ts.map +1 -0
  59. package/dist/src/providers/anthropic/completion.js +93 -0
  60. package/dist/src/providers/anthropic/completion.js.map +1 -0
  61. package/dist/src/providers/anthropic/defaults.d.ts +18 -0
  62. package/dist/src/providers/anthropic/defaults.d.ts.map +1 -0
  63. package/dist/src/providers/anthropic/defaults.js +105 -0
  64. package/dist/src/providers/anthropic/defaults.js.map +1 -0
  65. package/dist/src/providers/anthropic/generic.d.ts +37 -0
  66. package/dist/src/providers/anthropic/generic.d.ts.map +1 -0
  67. package/dist/src/providers/anthropic/generic.js +46 -0
  68. package/dist/src/providers/anthropic/generic.js.map +1 -0
  69. package/dist/src/providers/anthropic/messages.d.ts +23 -0
  70. package/dist/src/providers/anthropic/messages.d.ts.map +1 -0
  71. package/dist/src/providers/anthropic/messages.js +129 -0
  72. package/dist/src/providers/anthropic/messages.js.map +1 -0
  73. package/dist/src/providers/anthropic/types.d.ts +26 -0
  74. package/dist/src/providers/anthropic/types.d.ts.map +1 -0
  75. package/dist/src/providers/anthropic/types.js +3 -0
  76. package/dist/src/providers/anthropic/types.js.map +1 -0
  77. package/dist/src/providers/anthropic/util.d.ts +18 -0
  78. package/dist/src/providers/anthropic/util.d.ts.map +1 -0
  79. package/dist/src/providers/anthropic/util.js +192 -0
  80. package/dist/src/providers/anthropic/util.js.map +1 -0
  81. package/dist/src/providers/bedrock.d.ts +12 -12
  82. package/dist/src/providers/bedrock.d.ts.map +1 -1
  83. package/dist/src/providers/bedrock.js +14 -12
  84. package/dist/src/providers/bedrock.js.map +1 -1
  85. package/dist/src/providers/defaults.d.ts +1 -12
  86. package/dist/src/providers/defaults.d.ts.map +1 -1
  87. package/dist/src/providers/defaults.js +24 -22
  88. package/dist/src/providers/defaults.js.map +1 -1
  89. package/dist/src/providers/openai/realtime.d.ts +68 -0
  90. package/dist/src/providers/openai/realtime.d.ts.map +1 -0
  91. package/dist/src/providers/openai/realtime.js +919 -0
  92. package/dist/src/providers/openai/realtime.js.map +1 -0
  93. package/dist/src/providers/openai/util.d.ts +10 -0
  94. package/dist/src/providers/openai/util.d.ts.map +1 -1
  95. package/dist/src/providers/openai/util.js +32 -2
  96. package/dist/src/providers/openai/util.js.map +1 -1
  97. package/dist/src/providers/registry.d.ts.map +1 -1
  98. package/dist/src/providers/registry.js +30 -11
  99. package/dist/src/providers/registry.js.map +1 -1
  100. package/dist/src/redteam/constants.d.ts +1 -1
  101. package/dist/src/redteam/constants.d.ts.map +1 -1
  102. package/dist/src/redteam/constants.js +7 -0
  103. package/dist/src/redteam/constants.js.map +1 -1
  104. package/dist/src/redteam/extraction/entities.d.ts.map +1 -1
  105. package/dist/src/redteam/extraction/entities.js +2 -0
  106. package/dist/src/redteam/extraction/entities.js.map +1 -1
  107. package/dist/src/redteam/graders.d.ts.map +1 -1
  108. package/dist/src/redteam/graders.js +2 -0
  109. package/dist/src/redteam/graders.js.map +1 -1
  110. package/dist/src/redteam/plugins/index.d.ts.map +1 -1
  111. package/dist/src/redteam/plugins/index.js +2 -0
  112. package/dist/src/redteam/plugins/index.js.map +1 -1
  113. package/dist/src/redteam/plugins/toolDiscovery.d.ts +13 -0
  114. package/dist/src/redteam/plugins/toolDiscovery.d.ts.map +1 -0
  115. package/dist/src/redteam/plugins/toolDiscovery.js +102 -0
  116. package/dist/src/redteam/plugins/toolDiscovery.js.map +1 -0
  117. package/dist/src/redteam/providers/shared.d.ts.map +1 -1
  118. package/dist/src/redteam/strategies/iterative.js +1 -1
  119. package/dist/src/redteam/strategies/iterative.js.map +1 -1
  120. package/dist/src/types/env.d.ts +3 -0
  121. package/dist/src/types/env.d.ts.map +1 -1
  122. package/dist/src/types/env.js +1 -0
  123. package/dist/src/types/env.js.map +1 -1
  124. package/dist/src/types/index.d.ts +291 -0
  125. package/dist/src/types/index.d.ts.map +1 -1
  126. package/dist/src/types/providers.d.ts +13 -0
  127. package/dist/src/types/providers.d.ts.map +1 -1
  128. package/dist/src/util/database.d.ts +2 -0
  129. package/dist/src/util/database.d.ts.map +1 -1
  130. package/dist/src/util/objectUtils.d.ts.map +1 -1
  131. package/dist/src/util/time.d.ts.map +1 -1
  132. package/dist/src/validators/providers.d.ts +20 -0
  133. package/dist/src/validators/providers.d.ts.map +1 -1
  134. package/dist/src/validators/redteam.d.ts +8 -0
  135. package/dist/src/validators/redteam.d.ts.map +1 -1
  136. package/dist/tsconfig.tsbuildinfo +1 -1
  137. package/package.json +20 -17
  138. package/dist/src/providers/anthropic.d.ts +0 -84
  139. package/dist/src/providers/anthropic.d.ts.map +0 -1
  140. package/dist/src/providers/anthropic.js +0 -505
  141. package/dist/src/providers/anthropic.js.map +0 -1
@@ -0,0 +1,919 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.OpenAiRealtimeProvider = exports.OPENAI_REALTIME_MODELS = void 0;
7
+ const ws_1 = __importDefault(require("ws"));
8
+ const _1 = require(".");
9
+ const logger_1 = __importDefault(require("../../logger"));
10
+ const util_1 = require("../../util");
11
+ const util_2 = require("./util");
12
+ // Define supported Realtime models
13
+ var util_3 = require("./util");
14
+ Object.defineProperty(exports, "OPENAI_REALTIME_MODELS", { enumerable: true, get: function () { return util_3.OPENAI_REALTIME_MODELS; } });
15
+ class OpenAiRealtimeProvider extends _1.OpenAiGenericProvider {
16
+ constructor(modelName, options = {}) {
17
+ if (!OpenAiRealtimeProvider.OPENAI_REALTIME_MODEL_NAMES.includes(modelName)) {
18
+ logger_1.default.debug(`Using unknown OpenAI realtime model: ${modelName}`);
19
+ }
20
+ super(modelName, options);
21
+ this.config = options.config || {};
22
+ }
23
+ getRealtimeSessionBody() {
24
+ // Default values
25
+ const modalities = this.config.modalities || ['text', 'audio'];
26
+ const voice = this.config.voice || 'alloy';
27
+ const instructions = this.config.instructions || 'You are a helpful assistant.';
28
+ const inputAudioFormat = this.config.input_audio_format || 'pcm16';
29
+ const outputAudioFormat = this.config.output_audio_format || 'pcm16';
30
+ const temperature = this.config.temperature ?? 0.8;
31
+ const maxResponseOutputTokens = this.config.max_response_output_tokens || 'inf';
32
+ const body = {
33
+ model: this.modelName,
34
+ modalities,
35
+ instructions,
36
+ voice,
37
+ input_audio_format: inputAudioFormat,
38
+ output_audio_format: outputAudioFormat,
39
+ temperature,
40
+ max_response_output_tokens: maxResponseOutputTokens,
41
+ };
42
+ // Add optional configurations
43
+ if (this.config.input_audio_transcription !== undefined) {
44
+ body.input_audio_transcription = this.config.input_audio_transcription;
45
+ }
46
+ if (this.config.turn_detection !== undefined) {
47
+ body.turn_detection = this.config.turn_detection;
48
+ }
49
+ if (this.config.tools && this.config.tools.length > 0) {
50
+ body.tools = (0, util_1.renderVarsInObject)(this.config.tools);
51
+ // If tools are provided but no tool_choice, default to auto
52
+ if (this.config.tool_choice === undefined) {
53
+ body.tool_choice = 'auto';
54
+ }
55
+ }
56
+ if (this.config.tool_choice) {
57
+ body.tool_choice = this.config.tool_choice;
58
+ }
59
+ return body;
60
+ }
61
+ generateEventId() {
62
+ return `event_${Date.now()}_${Math.random().toString(36).substring(2, 10)}`;
63
+ }
64
+ async webSocketRequest(clientSecret, prompt) {
65
+ return new Promise((resolve, reject) => {
66
+ logger_1.default.debug(`Attempting to connect to OpenAI WebSocket with client secret: ${clientSecret.slice(0, 5)}...`);
67
+ // The WebSocket URL needs to include the client secret
68
+ const wsUrl = `wss://api.openai.com/v1/realtime/socket?client_secret=${encodeURIComponent(clientSecret)}`;
69
+ logger_1.default.debug(`Connecting to WebSocket URL: ${wsUrl.slice(0, 60)}...`);
70
+ // Add WebSocket options to bypass potential network issues
71
+ const wsOptions = {
72
+ headers: {
73
+ 'User-Agent': 'promptfoo Realtime API Client',
74
+ Origin: 'https://api.openai.com',
75
+ },
76
+ handshakeTimeout: 10000,
77
+ perMessageDeflate: false,
78
+ };
79
+ const ws = new ws_1.default(wsUrl, wsOptions);
80
+ // Set a timeout for the WebSocket connection
81
+ const timeout = setTimeout(() => {
82
+ logger_1.default.error('WebSocket connection timed out after 30 seconds');
83
+ ws.close();
84
+ reject(new Error('WebSocket connection timed out'));
85
+ }, this.config.websocketTimeout || 30000); // Default 30 second timeout
86
+ // Accumulators for response text and errors
87
+ let responseText = '';
88
+ let responseError = '';
89
+ let responseDone = false;
90
+ let usage = null;
91
+ // Audio content accumulators
92
+ const audioContent = [];
93
+ let audioFormat = 'wav';
94
+ let hasAudioContent = false;
95
+ // Track message IDs and function call state
96
+ let messageId = '';
97
+ let responseId = '';
98
+ let pendingFunctionCalls = [];
99
+ let functionCallOccurred = false;
100
+ const functionCallResults = [];
101
+ const sendEvent = (event) => {
102
+ if (!event.event_id) {
103
+ event.event_id = this.generateEventId();
104
+ }
105
+ logger_1.default.debug(`Sending event: ${JSON.stringify(event)}`);
106
+ ws.send(JSON.stringify(event));
107
+ return event.event_id;
108
+ };
109
+ ws.on('open', () => {
110
+ logger_1.default.debug('WebSocket connection established successfully');
111
+ // Create a conversation item with the user's prompt - immediately after connection
112
+ // Don't send ping event as it's not supported
113
+ sendEvent({
114
+ type: 'conversation.item.create',
115
+ previous_item_id: null,
116
+ item: {
117
+ type: 'message',
118
+ role: 'user',
119
+ content: [
120
+ {
121
+ type: 'input_text',
122
+ text: prompt,
123
+ },
124
+ ],
125
+ },
126
+ });
127
+ });
128
+ ws.on('message', async (data) => {
129
+ try {
130
+ const message = JSON.parse(data.toString());
131
+ logger_1.default.debug(`Received WebSocket message: ${message.type}`);
132
+ // For better debugging, log the full message structure (without potentially large audio data)
133
+ const debugMessage = { ...message };
134
+ if (debugMessage.audio) {
135
+ debugMessage.audio = '[AUDIO_DATA]';
136
+ }
137
+ logger_1.default.debug(`Message data: ${JSON.stringify(debugMessage, null, 2)}`);
138
+ // Handle different event types
139
+ switch (message.type) {
140
+ case 'session.ready':
141
+ logger_1.default.debug('Session ready on WebSocket');
142
+ // Create a conversation item with the user's prompt
143
+ sendEvent({
144
+ type: 'conversation.item.create',
145
+ previous_item_id: null,
146
+ item: {
147
+ type: 'message',
148
+ role: 'user',
149
+ content: [
150
+ {
151
+ type: 'input_text',
152
+ text: prompt,
153
+ },
154
+ ],
155
+ },
156
+ });
157
+ break;
158
+ case 'session.created':
159
+ logger_1.default.debug('Session created on WebSocket');
160
+ // No need to do anything here as we'll wait for session.ready
161
+ break;
162
+ case 'conversation.item.created':
163
+ if (message.item.role === 'user') {
164
+ // User message was created, now create a response
165
+ messageId = message.item.id;
166
+ // Prepare response creation event with appropriate settings
167
+ const responseEvent = {
168
+ type: 'response.create',
169
+ response: {
170
+ modalities: this.config.modalities || ['text', 'audio'],
171
+ instructions: this.config.instructions || 'You are a helpful assistant.',
172
+ voice: this.config.voice || 'alloy',
173
+ temperature: this.config.temperature ?? 0.8,
174
+ },
175
+ };
176
+ // Add tools if configured
177
+ if (this.config.tools && this.config.tools.length > 0) {
178
+ responseEvent.response.tools = this.config.tools;
179
+ if (Object.prototype.hasOwnProperty.call(this.config, 'tool_choice')) {
180
+ responseEvent.response.tool_choice = this.config.tool_choice;
181
+ }
182
+ else {
183
+ responseEvent.response.tool_choice = 'auto';
184
+ }
185
+ }
186
+ sendEvent(responseEvent);
187
+ }
188
+ break;
189
+ case 'response.created':
190
+ responseId = message.response.id;
191
+ break;
192
+ case 'response.text.delta':
193
+ // Accumulate text deltas
194
+ responseText += message.delta;
195
+ logger_1.default.debug(`Added text delta: "${message.delta}", current length: ${responseText.length}`);
196
+ break;
197
+ case 'response.text.done':
198
+ // Final text content
199
+ if (message.text && message.text.length > 0) {
200
+ logger_1.default.debug(`Setting final text content from response.text.done: "${message.text}" (length: ${message.text.length})`);
201
+ responseText = message.text;
202
+ }
203
+ else {
204
+ logger_1.default.debug('Received empty text in response.text.done');
205
+ }
206
+ break;
207
+ // Handle content part events
208
+ case 'response.content_part.added':
209
+ // Log that we received a content part
210
+ logger_1.default.debug(`Received content part: ${JSON.stringify(message.content_part)}`);
211
+ // Track content part ID if needed for later reference
212
+ if (message.content_part && message.content_part.id) {
213
+ logger_1.default.debug(`Content part added with ID: ${message.content_part.id}`);
214
+ }
215
+ break;
216
+ case 'response.content_part.done':
217
+ logger_1.default.debug('Content part completed');
218
+ break;
219
+ // Handle audio transcript events
220
+ case 'response.audio_transcript.delta':
221
+ // Accumulate audio transcript deltas - this is the text content
222
+ responseText += message.delta;
223
+ logger_1.default.debug(`Added audio transcript delta: "${message.delta}", current length: ${responseText.length}`);
224
+ break;
225
+ case 'response.audio_transcript.done':
226
+ // Final audio transcript content
227
+ if (message.text && message.text.length > 0) {
228
+ logger_1.default.debug(`Setting final audio transcript text: "${message.text}" (length: ${message.text.length})`);
229
+ responseText = message.text;
230
+ }
231
+ else {
232
+ logger_1.default.debug('Received empty text in response.audio_transcript.done');
233
+ }
234
+ break;
235
+ // Handle audio data events - store in metadata if needed
236
+ case 'response.audio.delta':
237
+ // Handle audio data (could store in metadata for playback if needed)
238
+ logger_1.default.debug('Received audio data chunk');
239
+ if (message.audio && message.audio.length > 0) {
240
+ // Store the audio data for later use
241
+ try {
242
+ const audioBuffer = Buffer.from(message.audio, 'base64');
243
+ audioContent.push(audioBuffer);
244
+ hasAudioContent = true;
245
+ }
246
+ catch (error) {
247
+ logger_1.default.error(`Error processing audio data: ${error}`);
248
+ }
249
+ }
250
+ break;
251
+ case 'response.audio.done':
252
+ logger_1.default.debug('Audio data complete');
253
+ // If audio format is specified in the message, capture it
254
+ if (message.format) {
255
+ audioFormat = message.format;
256
+ }
257
+ break;
258
+ // Handle output items (including function calls)
259
+ case 'response.output_item.added':
260
+ if (message.item.type === 'function_call') {
261
+ functionCallOccurred = true;
262
+ // Store the function call details for later handling
263
+ pendingFunctionCalls.push({
264
+ id: message.item.call_id,
265
+ name: message.item.name,
266
+ arguments: message.item.arguments || '{}',
267
+ });
268
+ }
269
+ else if (message.item.type === 'text') {
270
+ // Handle text output item - also add to responseText
271
+ if (message.item.text) {
272
+ responseText += message.item.text;
273
+ logger_1.default.debug(`Added text output item: "${message.item.text}", current length: ${responseText.length}`);
274
+ }
275
+ else {
276
+ logger_1.default.debug('Received text output item with empty text');
277
+ }
278
+ }
279
+ else {
280
+ // Log other output item types
281
+ logger_1.default.debug(`Received output item of type: ${message.item.type}`);
282
+ }
283
+ break;
284
+ case 'response.output_item.done':
285
+ logger_1.default.debug('Output item complete');
286
+ break;
287
+ case 'response.function_call_arguments.done':
288
+ // Find the function call in our pending list and update its arguments
289
+ const callIndex = pendingFunctionCalls.findIndex((call) => call.id === message.call_id);
290
+ if (callIndex !== -1) {
291
+ pendingFunctionCalls[callIndex].arguments = message.arguments;
292
+ }
293
+ break;
294
+ case 'response.done':
295
+ responseDone = true;
296
+ usage = message.response.usage;
297
+ // If there are pending function calls, process them
298
+ if (pendingFunctionCalls.length > 0 && this.config.functionCallHandler) {
299
+ for (const call of pendingFunctionCalls) {
300
+ try {
301
+ // Execute the function handler
302
+ const result = await this.config.functionCallHandler(call.name, call.arguments);
303
+ functionCallResults.push(result);
304
+ // Send the function call result back to the model
305
+ sendEvent({
306
+ type: 'conversation.item.create',
307
+ item: {
308
+ type: 'function_call_output',
309
+ call_id: call.id,
310
+ output: result,
311
+ },
312
+ });
313
+ }
314
+ catch (err) {
315
+ logger_1.default.error(`Error executing function ${call.name}: ${err}`);
316
+ // Send an error result back to the model
317
+ sendEvent({
318
+ type: 'conversation.item.create',
319
+ item: {
320
+ type: 'function_call_output',
321
+ call_id: call.id,
322
+ output: JSON.stringify({ error: String(err) }),
323
+ },
324
+ });
325
+ }
326
+ }
327
+ // Request a new response from the model using the function results
328
+ sendEvent({
329
+ type: 'response.create',
330
+ });
331
+ // Reset pending function calls - we've handled them
332
+ pendingFunctionCalls = [];
333
+ // Don't resolve the promise yet - wait for the final response
334
+ return;
335
+ }
336
+ // If no function calls or we've processed them all, close the connection
337
+ clearTimeout(timeout);
338
+ // Check if we have an empty response and try to diagnose the issue
339
+ if (responseText.length === 0) {
340
+ // Only log at debug level to prevent user-visible warnings
341
+ logger_1.default.debug('Empty response detected before resolving. Checking response message details');
342
+ logger_1.default.debug('Response message details: ' + JSON.stringify(message, null, 2));
343
+ // Try to extract any text content from the message as a fallback
344
+ if (message.response &&
345
+ message.response.content &&
346
+ Array.isArray(message.response.content)) {
347
+ const textContent = message.response.content.find((item) => item.type === 'text' && item.text && item.text.length > 0);
348
+ if (textContent) {
349
+ logger_1.default.debug(`Found text in response content, using as fallback: "${textContent.text}"`);
350
+ responseText = textContent.text;
351
+ }
352
+ else {
353
+ logger_1.default.debug('No fallback text content found in response message');
354
+ }
355
+ }
356
+ // If still empty, add a placeholder message to indicate the issue
357
+ if (responseText.length === 0) {
358
+ responseText = '[No response received from API]';
359
+ logger_1.default.debug('Using placeholder message for empty response');
360
+ }
361
+ }
362
+ ws.close();
363
+ // Prepare audio data if available
364
+ const finalAudioData = hasAudioContent
365
+ ? Buffer.concat(audioContent).toString('base64')
366
+ : null;
367
+ resolve({
368
+ output: responseText,
369
+ tokenUsage: {
370
+ total: usage?.total_tokens || 0,
371
+ prompt: usage?.input_tokens || 0,
372
+ completion: usage?.output_tokens || 0,
373
+ cached: 0,
374
+ },
375
+ cached: false,
376
+ metadata: {
377
+ responseId,
378
+ messageId,
379
+ usage,
380
+ // Include audio data in metadata if available
381
+ ...(hasAudioContent && {
382
+ audio: {
383
+ data: finalAudioData,
384
+ format: audioFormat,
385
+ },
386
+ }),
387
+ },
388
+ functionCallOccurred,
389
+ functionCallResults: functionCallResults.length > 0 ? functionCallResults : undefined,
390
+ });
391
+ break;
392
+ case 'rate_limits.updated':
393
+ // Store rate limits in metadata if needed
394
+ logger_1.default.debug(`Rate limits updated: ${JSON.stringify(message.rate_limits)}`);
395
+ break;
396
+ case 'error':
397
+ responseError = `Error: ${message.error.message}`;
398
+ logger_1.default.error(`WebSocket error: ${responseError} (${message.error.type})`);
399
+ // Always close on errors to prevent hanging connections
400
+ clearTimeout(timeout);
401
+ ws.close();
402
+ reject(new Error(responseError));
403
+ break;
404
+ }
405
+ }
406
+ catch (err) {
407
+ logger_1.default.error(`Error parsing WebSocket message: ${err}`);
408
+ clearTimeout(timeout);
409
+ ws.close();
410
+ reject(err);
411
+ }
412
+ });
413
+ ws.on('error', (err) => {
414
+ logger_1.default.error(`WebSocket error: ${err.message}`);
415
+ clearTimeout(timeout);
416
+ reject(err);
417
+ });
418
+ ws.on('close', (code, reason) => {
419
+ logger_1.default.debug(`WebSocket closed with code ${code}: ${reason}`);
420
+ clearTimeout(timeout);
421
+ // Provide more detailed error messages for common WebSocket close codes
422
+ if (code === 1006) {
423
+ logger_1.default.error('WebSocket connection closed abnormally - this often indicates a network or firewall issue');
424
+ }
425
+ else if (code === 1008) {
426
+ logger_1.default.error('WebSocket connection rejected due to policy violation (possibly wrong API key or permissions)');
427
+ }
428
+ else if (code === 403 || reason.includes('403')) {
429
+ logger_1.default.error('WebSocket connection received 403 Forbidden - verify API key permissions and rate limits');
430
+ }
431
+ // Only reject if we haven't received a completed response or error
432
+ const connectionClosedPrematurely = responseDone === false && responseError.length === 0;
433
+ if (connectionClosedPrematurely) {
434
+ reject(new Error(`WebSocket closed unexpectedly with code ${code}: ${reason}. This may indicate a networking issue, firewall restriction, or API access limitation.`));
435
+ }
436
+ });
437
+ });
438
+ }
439
+ async callApi(prompt, context, callApiOptions) {
440
+ if (!this.getApiKey()) {
441
+ throw new Error('OpenAI API key is not set. Set the OPENAI_API_KEY environment variable or add `apiKey` to the provider config.');
442
+ }
443
+ // Apply function handler if provided in context
444
+ if (context?.prompt?.config?.functionCallHandler &&
445
+ typeof context.prompt.config.functionCallHandler === 'function') {
446
+ this.config.functionCallHandler = context.prompt.config.functionCallHandler;
447
+ }
448
+ try {
449
+ // Extract the message content for WebSocket communications
450
+ // This approach is similar to parseChatPrompt but specialized for Realtime API
451
+ let promptText = prompt;
452
+ try {
453
+ // Check if the prompt is a JSON string
454
+ const parsedPrompt = JSON.parse(prompt);
455
+ // Handle array format (OpenAI chat format)
456
+ if (Array.isArray(parsedPrompt) && parsedPrompt.length > 0) {
457
+ // Find the last user message (following OpenAI's chat convention)
458
+ for (let i = parsedPrompt.length - 1; i >= 0; i--) {
459
+ const message = parsedPrompt[i];
460
+ if (message.role === 'user') {
461
+ // Handle both simple content string and array of content objects
462
+ if (typeof message.content === 'string') {
463
+ promptText = message.content;
464
+ break;
465
+ }
466
+ else if (Array.isArray(message.content) && message.content.length > 0) {
467
+ // Find the first text content - check for both 'text' and 'input_text' for backward compatibility
468
+ const textContent = message.content.find((content) => (content.type === 'text' || content.type === 'input_text') &&
469
+ typeof content.text === 'string');
470
+ if (textContent) {
471
+ promptText = textContent.text;
472
+ break;
473
+ }
474
+ }
475
+ }
476
+ }
477
+ }
478
+ else if (parsedPrompt && typeof parsedPrompt === 'object' && parsedPrompt.prompt) {
479
+ // Handle {prompt: "..."} format that some templates might use
480
+ promptText = parsedPrompt.prompt;
481
+ }
482
+ }
483
+ catch {
484
+ // Not JSON or couldn't extract - use as is
485
+ logger_1.default.debug('Using prompt as is - not a JSON structure');
486
+ }
487
+ // Connect directly to the WebSocket API using API key
488
+ logger_1.default.debug(`Connecting directly to OpenAI Realtime API WebSocket with API key`);
489
+ const result = await this.directWebSocketRequest(promptText);
490
+ // Format the output - if function calls occurred, include that info
491
+ let finalOutput = result.output;
492
+ // Log the output we received for debugging
493
+ logger_1.default.debug(`Final output from API: "${finalOutput}" (length: ${finalOutput.length})`);
494
+ if (finalOutput.length === 0) {
495
+ // Log at debug level instead of warn to prevent user-visible warnings
496
+ logger_1.default.debug('Received empty response from Realtime API - possible issue with transcript accumulation. Check modalities configuration.');
497
+ // Set a fallback message to help users, but keep it shorter
498
+ finalOutput = '[No response received from API]';
499
+ }
500
+ if (result.functionCallOccurred &&
501
+ result.functionCallResults &&
502
+ result.functionCallResults.length > 0) {
503
+ finalOutput += '\n\n[Function calls were made during processing]';
504
+ }
505
+ // Construct the metadata with audio if available
506
+ const metadata = {
507
+ ...result.metadata,
508
+ functionCallOccurred: result.functionCallOccurred,
509
+ functionCallResults: result.functionCallResults,
510
+ };
511
+ // If the response has audio data, format it according to the promptfoo audio interface
512
+ if (result.metadata?.audio) {
513
+ // Convert Buffer to base64 string for the audio data
514
+ const audioDataBase64 = result.metadata.audio.data;
515
+ metadata.audio = {
516
+ data: audioDataBase64,
517
+ format: result.metadata.audio.format,
518
+ transcript: result.output, // Use the text output as transcript
519
+ };
520
+ }
521
+ return {
522
+ output: finalOutput,
523
+ tokenUsage: result.tokenUsage,
524
+ cached: result.cached,
525
+ metadata,
526
+ // Add audio at top level if available (EvalOutputCell expects this)
527
+ ...(result.metadata?.audio && {
528
+ audio: {
529
+ data: result.metadata.audio.data,
530
+ format: result.metadata.audio.format,
531
+ transcript: result.output, // Use the text output as transcript
532
+ },
533
+ }),
534
+ };
535
+ }
536
+ catch (err) {
537
+ const errorMessage = `WebSocket error: ${String(err)}`;
538
+ logger_1.default.error(errorMessage);
539
+ // If this is an Unexpected server response: 403, add additional troubleshooting info
540
+ if (errorMessage.includes('403')) {
541
+ logger_1.default.error(`
542
+ This 403 error usually means one of the following:
543
+ 1. WebSocket connections are blocked by your network/firewall
544
+ 2. Your OpenAI API key doesn't have access to the Realtime API
545
+ 3. There are rate limits or quotas in place for your account
546
+ Try:
547
+ - Using a different network connection
548
+ - Checking your OpenAI API key permissions
549
+ - Verifying you have access to the Realtime API beta`);
550
+ }
551
+ return {
552
+ error: errorMessage,
553
+ metadata: {},
554
+ };
555
+ }
556
+ }
557
+ async directWebSocketRequest(prompt) {
558
+ return new Promise((resolve, reject) => {
559
+ logger_1.default.debug(`Establishing direct WebSocket connection to OpenAI Realtime API`);
560
+ // Construct URL with model parameter
561
+ const wsUrl = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.modelName)}`;
562
+ logger_1.default.debug(`Connecting to WebSocket URL: ${wsUrl}`);
563
+ // Add WebSocket options with required headers
564
+ const wsOptions = {
565
+ headers: {
566
+ Authorization: `Bearer ${this.getApiKey()}`,
567
+ 'OpenAI-Beta': 'realtime=v1',
568
+ 'User-Agent': 'promptfoo Realtime API Client',
569
+ Origin: 'https://api.openai.com',
570
+ },
571
+ handshakeTimeout: 10000,
572
+ perMessageDeflate: false,
573
+ };
574
+ const ws = new ws_1.default(wsUrl, wsOptions);
575
+ // Set a timeout for the WebSocket connection
576
+ const timeout = setTimeout(() => {
577
+ logger_1.default.error('WebSocket connection timed out after 30 seconds');
578
+ ws.close();
579
+ reject(new Error('WebSocket connection timed out'));
580
+ }, this.config.websocketTimeout || 30000);
581
+ // Accumulators for response text and errors
582
+ let responseText = '';
583
+ let responseError = '';
584
+ let responseDone = false;
585
+ let usage = null;
586
+ // Audio content accumulators
587
+ const audioContent = [];
588
+ let audioFormat = 'wav';
589
+ let hasAudioContent = false;
590
+ // Track message IDs and function call state
591
+ let messageId = '';
592
+ let responseId = '';
593
+ let pendingFunctionCalls = [];
594
+ let functionCallOccurred = false;
595
+ const functionCallResults = [];
596
+ const sendEvent = (event) => {
597
+ if (!event.event_id) {
598
+ event.event_id = this.generateEventId();
599
+ }
600
+ logger_1.default.debug(`Sending event: ${JSON.stringify(event)}`);
601
+ ws.send(JSON.stringify(event));
602
+ return event.event_id;
603
+ };
604
+ ws.on('open', () => {
605
+ logger_1.default.debug('WebSocket connection established successfully');
606
+ // Create a conversation item with the user's prompt - immediately after connection
607
+ // Don't send ping event as it's not supported
608
+ sendEvent({
609
+ type: 'conversation.item.create',
610
+ previous_item_id: null,
611
+ item: {
612
+ type: 'message',
613
+ role: 'user',
614
+ content: [
615
+ {
616
+ type: 'input_text',
617
+ text: prompt,
618
+ },
619
+ ],
620
+ },
621
+ });
622
+ });
623
+ ws.on('message', async (data) => {
624
+ try {
625
+ const message = JSON.parse(data.toString());
626
+ logger_1.default.debug(`Received WebSocket message: ${message.type}`);
627
+ // For better debugging, log the full message structure (without potentially large audio data)
628
+ const debugMessage = { ...message };
629
+ if (debugMessage.audio) {
630
+ debugMessage.audio = '[AUDIO_DATA]';
631
+ }
632
+ logger_1.default.debug(`Message data: ${JSON.stringify(debugMessage, null, 2)}`);
633
+ // Handle different event types
634
+ switch (message.type) {
635
+ case 'session.created':
636
+ logger_1.default.debug('Session created on WebSocket');
637
+ break;
638
+ case 'conversation.item.created':
639
+ if (message.item.role === 'user') {
640
+ // User message was created, now create a response
641
+ messageId = message.item.id;
642
+ // Prepare response creation event with appropriate settings
643
+ const responseEvent = {
644
+ type: 'response.create',
645
+ response: {
646
+ modalities: this.config.modalities || ['text', 'audio'],
647
+ instructions: this.config.instructions || 'You are a helpful assistant.',
648
+ voice: this.config.voice || 'alloy',
649
+ temperature: this.config.temperature ?? 0.8,
650
+ },
651
+ };
652
+ // Add tools if configured
653
+ if (this.config.tools && this.config.tools.length > 0) {
654
+ responseEvent.response.tools = this.config.tools;
655
+ if (Object.prototype.hasOwnProperty.call(this.config, 'tool_choice')) {
656
+ responseEvent.response.tool_choice = this.config.tool_choice;
657
+ }
658
+ else {
659
+ responseEvent.response.tool_choice = 'auto';
660
+ }
661
+ }
662
+ sendEvent(responseEvent);
663
+ }
664
+ break;
665
+ case 'response.created':
666
+ responseId = message.response.id;
667
+ break;
668
+ case 'response.text.delta':
669
+ // Accumulate text deltas
670
+ responseText += message.delta;
671
+ logger_1.default.debug(`Added text delta: "${message.delta}", current length: ${responseText.length}`);
672
+ break;
673
+ case 'response.text.done':
674
+ // Final text content
675
+ if (message.text && message.text.length > 0) {
676
+ logger_1.default.debug(`Setting final text content from response.text.done: "${message.text}" (length: ${message.text.length})`);
677
+ responseText = message.text;
678
+ }
679
+ else {
680
+ logger_1.default.debug('Received empty text in response.text.done');
681
+ }
682
+ break;
683
+ // Handle content part events
684
+ case 'response.content_part.added':
685
+ // Log that we received a content part
686
+ logger_1.default.debug(`Received content part: ${JSON.stringify(message.content_part)}`);
687
+ // Track content part ID if needed for later reference
688
+ if (message.content_part && message.content_part.id) {
689
+ logger_1.default.debug(`Content part added with ID: ${message.content_part.id}`);
690
+ }
691
+ break;
692
+ case 'response.content_part.done':
693
+ logger_1.default.debug('Content part completed');
694
+ break;
695
+ // Handle audio transcript events
696
+ case 'response.audio_transcript.delta':
697
+ // Accumulate audio transcript deltas - this is the text content
698
+ responseText += message.delta;
699
+ logger_1.default.debug(`Added audio transcript delta: "${message.delta}", current length: ${responseText.length}`);
700
+ break;
701
+ case 'response.audio_transcript.done':
702
+ // Final audio transcript content
703
+ if (message.text && message.text.length > 0) {
704
+ logger_1.default.debug(`Setting final audio transcript text: "${message.text}" (length: ${message.text.length})`);
705
+ responseText = message.text;
706
+ }
707
+ else {
708
+ logger_1.default.debug('Received empty text in response.audio_transcript.done');
709
+ }
710
+ break;
711
+ // Handle audio data events - store in metadata if needed
712
+ case 'response.audio.delta':
713
+ // Handle audio data (could store in metadata for playback if needed)
714
+ logger_1.default.debug('Received audio data chunk');
715
+ if (message.audio && message.audio.length > 0) {
716
+ // Store the audio data for later use
717
+ try {
718
+ const audioBuffer = Buffer.from(message.audio, 'base64');
719
+ audioContent.push(audioBuffer);
720
+ hasAudioContent = true;
721
+ }
722
+ catch (error) {
723
+ logger_1.default.error(`Error processing audio data: ${error}`);
724
+ }
725
+ }
726
+ break;
727
+ case 'response.audio.done':
728
+ logger_1.default.debug('Audio data complete');
729
+ // If audio format is specified in the message, capture it
730
+ if (message.format) {
731
+ audioFormat = message.format;
732
+ }
733
+ break;
734
+ // Handle output items (including function calls)
735
+ case 'response.output_item.added':
736
+ if (message.item.type === 'function_call') {
737
+ functionCallOccurred = true;
738
+ // Store the function call details for later handling
739
+ pendingFunctionCalls.push({
740
+ id: message.item.call_id,
741
+ name: message.item.name,
742
+ arguments: message.item.arguments || '{}',
743
+ });
744
+ }
745
+ else if (message.item.type === 'text') {
746
+ // Handle text output item - also add to responseText
747
+ if (message.item.text) {
748
+ responseText += message.item.text;
749
+ logger_1.default.debug(`Added text output item: "${message.item.text}", current length: ${responseText.length}`);
750
+ }
751
+ else {
752
+ logger_1.default.debug('Received text output item with empty text');
753
+ }
754
+ }
755
+ else {
756
+ // Log other output item types
757
+ logger_1.default.debug(`Received output item of type: ${message.item.type}`);
758
+ }
759
+ break;
760
+ case 'response.output_item.done':
761
+ logger_1.default.debug('Output item complete');
762
+ break;
763
+ case 'response.function_call_arguments.done':
764
+ // Find the function call in our pending list and update its arguments
765
+ const callIndex = pendingFunctionCalls.findIndex((call) => call.id === message.call_id);
766
+ if (callIndex !== -1) {
767
+ pendingFunctionCalls[callIndex].arguments = message.arguments;
768
+ }
769
+ break;
770
+ case 'response.done':
771
+ responseDone = true;
772
+ usage = message.response.usage;
773
+ // If there are pending function calls, process them
774
+ if (pendingFunctionCalls.length > 0 && this.config.functionCallHandler) {
775
+ for (const call of pendingFunctionCalls) {
776
+ try {
777
+ // Execute the function handler
778
+ const result = await this.config.functionCallHandler(call.name, call.arguments);
779
+ functionCallResults.push(result);
780
+ // Send the function call result back to the model
781
+ sendEvent({
782
+ type: 'conversation.item.create',
783
+ item: {
784
+ type: 'function_call_output',
785
+ call_id: call.id,
786
+ output: result,
787
+ },
788
+ });
789
+ }
790
+ catch (err) {
791
+ logger_1.default.error(`Error executing function ${call.name}: ${err}`);
792
+ // Send an error result back to the model
793
+ sendEvent({
794
+ type: 'conversation.item.create',
795
+ item: {
796
+ type: 'function_call_output',
797
+ call_id: call.id,
798
+ output: JSON.stringify({ error: String(err) }),
799
+ },
800
+ });
801
+ }
802
+ }
803
+ // Request a new response from the model using the function results
804
+ sendEvent({
805
+ type: 'response.create',
806
+ });
807
+ // Reset pending function calls - we've handled them
808
+ pendingFunctionCalls = [];
809
+ // Don't resolve the promise yet - wait for the final response
810
+ return;
811
+ }
812
+ // If no function calls or we've processed them all, close the connection
813
+ clearTimeout(timeout);
814
+ // Check if we have an empty response and try to diagnose the issue
815
+ if (responseText.length === 0) {
816
+ // Only log at debug level to prevent user-visible warnings
817
+ logger_1.default.debug('Empty response detected before resolving. Checking response message details');
818
+ logger_1.default.debug('Response message details: ' + JSON.stringify(message, null, 2));
819
+ // Try to extract any text content from the message as a fallback
820
+ if (message.response &&
821
+ message.response.content &&
822
+ Array.isArray(message.response.content)) {
823
+ const textContent = message.response.content.find((item) => item.type === 'text' && item.text && item.text.length > 0);
824
+ if (textContent) {
825
+ logger_1.default.debug(`Found text in response content, using as fallback: "${textContent.text}"`);
826
+ responseText = textContent.text;
827
+ }
828
+ else {
829
+ logger_1.default.debug('No fallback text content found in response message');
830
+ }
831
+ }
832
+ // If still empty, add a placeholder message to indicate the issue
833
+ if (responseText.length === 0) {
834
+ responseText = '[No response received from API]';
835
+ logger_1.default.debug('Using placeholder message for empty response');
836
+ }
837
+ }
838
+ ws.close();
839
+ // Prepare audio data if available
840
+ const finalAudioData = hasAudioContent
841
+ ? Buffer.concat(audioContent).toString('base64')
842
+ : null;
843
+ resolve({
844
+ output: responseText,
845
+ tokenUsage: {
846
+ total: usage?.total_tokens || 0,
847
+ prompt: usage?.input_tokens || 0,
848
+ completion: usage?.output_tokens || 0,
849
+ cached: 0,
850
+ },
851
+ cached: false,
852
+ metadata: {
853
+ responseId,
854
+ messageId,
855
+ usage,
856
+ // Include audio data in metadata if available
857
+ ...(hasAudioContent && {
858
+ audio: {
859
+ data: finalAudioData,
860
+ format: audioFormat,
861
+ },
862
+ }),
863
+ },
864
+ functionCallOccurred,
865
+ functionCallResults: functionCallResults.length > 0 ? functionCallResults : undefined,
866
+ });
867
+ break;
868
+ case 'rate_limits.updated':
869
+ // Store rate limits in metadata if needed
870
+ logger_1.default.debug(`Rate limits updated: ${JSON.stringify(message.rate_limits)}`);
871
+ break;
872
+ case 'error':
873
+ responseError = `Error: ${message.error.message}`;
874
+ logger_1.default.error(`WebSocket error: ${responseError} (${message.error.type})`);
875
+ // Always close on errors to prevent hanging connections
876
+ clearTimeout(timeout);
877
+ ws.close();
878
+ reject(new Error(responseError));
879
+ break;
880
+ }
881
+ }
882
+ catch (err) {
883
+ logger_1.default.error(`Error parsing WebSocket message: ${err}`);
884
+ clearTimeout(timeout);
885
+ ws.close();
886
+ reject(err);
887
+ }
888
+ });
889
+ ws.on('error', (err) => {
890
+ logger_1.default.error(`WebSocket error: ${err.message}`);
891
+ clearTimeout(timeout);
892
+ reject(err);
893
+ });
894
+ ws.on('close', (code, reason) => {
895
+ logger_1.default.debug(`WebSocket closed with code ${code}: ${reason}`);
896
+ clearTimeout(timeout);
897
+ // Provide more detailed error messages for common WebSocket close codes
898
+ if (code === 1006) {
899
+ logger_1.default.error('WebSocket connection closed abnormally - this often indicates a network or firewall issue');
900
+ }
901
+ else if (code === 1008) {
902
+ logger_1.default.error('WebSocket connection rejected due to policy violation (possibly wrong API key or permissions)');
903
+ }
904
+ else if (code === 403 || reason.includes('403')) {
905
+ logger_1.default.error('WebSocket connection received 403 Forbidden - verify API key permissions and rate limits');
906
+ }
907
+ // Only reject if we haven't received a completed response or error
908
+ const connectionClosedPrematurely = responseDone === false && responseError.length === 0;
909
+ if (connectionClosedPrematurely) {
910
+ reject(new Error(`WebSocket closed unexpectedly with code ${code}: ${reason}. This may indicate a networking issue, firewall restriction, or API access limitation.`));
911
+ }
912
+ });
913
+ });
914
+ }
915
+ }
916
+ exports.OpenAiRealtimeProvider = OpenAiRealtimeProvider;
917
+ OpenAiRealtimeProvider.OPENAI_REALTIME_MODELS = util_2.OPENAI_REALTIME_MODELS;
918
+ OpenAiRealtimeProvider.OPENAI_REALTIME_MODEL_NAMES = util_2.OPENAI_REALTIME_MODELS.map((model) => model.id);
919
+ //# sourceMappingURL=realtime.js.map