promptfoo 0.107.0 → 0.107.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package.json +20 -17
- package/dist/src/app/assets/{index-aPpzVtDP.js → index-DmqkyVHC.js} +234 -234
- package/dist/src/app/assets/{index.es-BZ4ADnog.js → index.es-BSriX8RW.js} +1 -1
- package/dist/src/app/assets/{sync-ulUFOpow.js → sync-CAUebwzg.js} +1 -1
- package/dist/src/app/index.html +1 -1
- package/dist/src/assertions/answerRelevance.d.ts.map +1 -1
- package/dist/src/assertions/contains.d.ts.map +1 -1
- package/dist/src/assertions/contextRecall.d.ts.map +1 -1
- package/dist/src/assertions/contextRelevance.d.ts.map +1 -1
- package/dist/src/assertions/cost.d.ts.map +1 -1
- package/dist/src/assertions/equals.d.ts.map +1 -1
- package/dist/src/assertions/factuality.d.ts.map +1 -1
- package/dist/src/assertions/geval.d.ts.map +1 -1
- package/dist/src/assertions/guardrails.d.ts.map +1 -1
- package/dist/src/assertions/javascript.d.ts.map +1 -1
- package/dist/src/assertions/latency.d.ts.map +1 -1
- package/dist/src/assertions/llmRubric.d.ts.map +1 -1
- package/dist/src/assertions/modelGradedClosedQa.d.ts.map +1 -1
- package/dist/src/assertions/moderation.d.ts.map +1 -1
- package/dist/src/assertions/openai.d.ts.map +1 -1
- package/dist/src/assertions/python.d.ts.map +1 -1
- package/dist/src/assertions/redteam.d.ts.map +1 -1
- package/dist/src/assertions/regex.d.ts.map +1 -1
- package/dist/src/assertions/similar.d.ts.map +1 -1
- package/dist/src/assertions/sql.d.ts.map +1 -1
- package/dist/src/assertions/startsWith.d.ts.map +1 -1
- package/dist/src/assertions/xml.d.ts.map +1 -1
- package/dist/src/commands/auth.d.ts.map +1 -1
- package/dist/src/commands/auth.js +4 -5
- package/dist/src/commands/auth.js.map +1 -1
- package/dist/src/commands/init.d.ts.map +1 -1
- package/dist/src/commands/init.js +14 -2
- package/dist/src/commands/init.js.map +1 -1
- package/dist/src/database/tables.d.ts +22 -0
- package/dist/src/database/tables.d.ts.map +1 -1
- package/dist/src/fetch.js +1 -1
- package/dist/src/fetch.js.map +1 -1
- package/dist/src/globalConfig/cloud.d.ts +1 -0
- package/dist/src/globalConfig/cloud.d.ts.map +1 -1
- package/dist/src/globalConfig/cloud.js +11 -10
- package/dist/src/globalConfig/cloud.js.map +1 -1
- package/dist/src/matchers.d.ts.map +1 -1
- package/dist/src/matchers.js +79 -39
- package/dist/src/matchers.js.map +1 -1
- package/dist/src/onboarding.d.ts.map +1 -1
- package/dist/src/onboarding.js +5 -8
- package/dist/src/onboarding.js.map +1 -1
- package/dist/src/prompts/grading.d.ts +1 -10
- package/dist/src/prompts/grading.d.ts.map +1 -1
- package/dist/src/prompts/grading.js +37 -42
- package/dist/src/prompts/grading.js.map +1 -1
- package/dist/src/prompts/processors/javascript.d.ts.map +1 -1
- package/dist/src/prompts/processors/python.d.ts.map +1 -1
- package/dist/src/providers/adaline.gateway.d.ts.map +1 -1
- package/dist/src/providers/adaline.gateway.js +6 -5
- package/dist/src/providers/adaline.gateway.js.map +1 -1
- package/dist/src/providers/anthropic/completion.d.ts +15 -0
- package/dist/src/providers/anthropic/completion.d.ts.map +1 -0
- package/dist/src/providers/anthropic/completion.js +93 -0
- package/dist/src/providers/anthropic/completion.js.map +1 -0
- package/dist/src/providers/anthropic/defaults.d.ts +18 -0
- package/dist/src/providers/anthropic/defaults.d.ts.map +1 -0
- package/dist/src/providers/anthropic/defaults.js +105 -0
- package/dist/src/providers/anthropic/defaults.js.map +1 -0
- package/dist/src/providers/anthropic/generic.d.ts +37 -0
- package/dist/src/providers/anthropic/generic.d.ts.map +1 -0
- package/dist/src/providers/anthropic/generic.js +46 -0
- package/dist/src/providers/anthropic/generic.js.map +1 -0
- package/dist/src/providers/anthropic/messages.d.ts +23 -0
- package/dist/src/providers/anthropic/messages.d.ts.map +1 -0
- package/dist/src/providers/anthropic/messages.js +129 -0
- package/dist/src/providers/anthropic/messages.js.map +1 -0
- package/dist/src/providers/anthropic/types.d.ts +26 -0
- package/dist/src/providers/anthropic/types.d.ts.map +1 -0
- package/dist/src/providers/anthropic/types.js +3 -0
- package/dist/src/providers/anthropic/types.js.map +1 -0
- package/dist/src/providers/anthropic/util.d.ts +18 -0
- package/dist/src/providers/anthropic/util.d.ts.map +1 -0
- package/dist/src/providers/anthropic/util.js +192 -0
- package/dist/src/providers/anthropic/util.js.map +1 -0
- package/dist/src/providers/bedrock.d.ts +12 -12
- package/dist/src/providers/bedrock.d.ts.map +1 -1
- package/dist/src/providers/bedrock.js +14 -12
- package/dist/src/providers/bedrock.js.map +1 -1
- package/dist/src/providers/defaults.d.ts +1 -12
- package/dist/src/providers/defaults.d.ts.map +1 -1
- package/dist/src/providers/defaults.js +24 -22
- package/dist/src/providers/defaults.js.map +1 -1
- package/dist/src/providers/openai/realtime.d.ts +68 -0
- package/dist/src/providers/openai/realtime.d.ts.map +1 -0
- package/dist/src/providers/openai/realtime.js +919 -0
- package/dist/src/providers/openai/realtime.js.map +1 -0
- package/dist/src/providers/openai/util.d.ts +10 -0
- package/dist/src/providers/openai/util.d.ts.map +1 -1
- package/dist/src/providers/openai/util.js +32 -2
- package/dist/src/providers/openai/util.js.map +1 -1
- package/dist/src/providers/registry.d.ts.map +1 -1
- package/dist/src/providers/registry.js +30 -11
- package/dist/src/providers/registry.js.map +1 -1
- package/dist/src/redteam/constants.d.ts +1 -1
- package/dist/src/redteam/constants.d.ts.map +1 -1
- package/dist/src/redteam/constants.js +7 -0
- package/dist/src/redteam/constants.js.map +1 -1
- package/dist/src/redteam/extraction/entities.d.ts.map +1 -1
- package/dist/src/redteam/extraction/entities.js +2 -0
- package/dist/src/redteam/extraction/entities.js.map +1 -1
- package/dist/src/redteam/graders.d.ts.map +1 -1
- package/dist/src/redteam/graders.js +2 -0
- package/dist/src/redteam/graders.js.map +1 -1
- package/dist/src/redteam/plugins/index.d.ts.map +1 -1
- package/dist/src/redteam/plugins/index.js +2 -0
- package/dist/src/redteam/plugins/index.js.map +1 -1
- package/dist/src/redteam/plugins/toolDiscovery.d.ts +13 -0
- package/dist/src/redteam/plugins/toolDiscovery.d.ts.map +1 -0
- package/dist/src/redteam/plugins/toolDiscovery.js +102 -0
- package/dist/src/redteam/plugins/toolDiscovery.js.map +1 -0
- package/dist/src/redteam/providers/shared.d.ts.map +1 -1
- package/dist/src/redteam/strategies/iterative.js +1 -1
- package/dist/src/redteam/strategies/iterative.js.map +1 -1
- package/dist/src/types/env.d.ts +3 -0
- package/dist/src/types/env.d.ts.map +1 -1
- package/dist/src/types/env.js +1 -0
- package/dist/src/types/env.js.map +1 -1
- package/dist/src/types/index.d.ts +291 -0
- package/dist/src/types/index.d.ts.map +1 -1
- package/dist/src/types/providers.d.ts +13 -0
- package/dist/src/types/providers.d.ts.map +1 -1
- package/dist/src/util/database.d.ts +2 -0
- package/dist/src/util/database.d.ts.map +1 -1
- package/dist/src/util/objectUtils.d.ts.map +1 -1
- package/dist/src/util/time.d.ts.map +1 -1
- package/dist/src/validators/providers.d.ts +20 -0
- package/dist/src/validators/providers.d.ts.map +1 -1
- package/dist/src/validators/redteam.d.ts +8 -0
- package/dist/src/validators/redteam.d.ts.map +1 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +20 -17
- package/dist/src/providers/anthropic.d.ts +0 -84
- package/dist/src/providers/anthropic.d.ts.map +0 -1
- package/dist/src/providers/anthropic.js +0 -505
- package/dist/src/providers/anthropic.js.map +0 -1
|
@@ -0,0 +1,919 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.OpenAiRealtimeProvider = exports.OPENAI_REALTIME_MODELS = void 0;
|
|
7
|
+
const ws_1 = __importDefault(require("ws"));
|
|
8
|
+
const _1 = require(".");
|
|
9
|
+
const logger_1 = __importDefault(require("../../logger"));
|
|
10
|
+
const util_1 = require("../../util");
|
|
11
|
+
const util_2 = require("./util");
|
|
12
|
+
// Define supported Realtime models
|
|
13
|
+
var util_3 = require("./util");
|
|
14
|
+
Object.defineProperty(exports, "OPENAI_REALTIME_MODELS", { enumerable: true, get: function () { return util_3.OPENAI_REALTIME_MODELS; } });
|
|
15
|
+
class OpenAiRealtimeProvider extends _1.OpenAiGenericProvider {
|
|
16
|
+
constructor(modelName, options = {}) {
|
|
17
|
+
if (!OpenAiRealtimeProvider.OPENAI_REALTIME_MODEL_NAMES.includes(modelName)) {
|
|
18
|
+
logger_1.default.debug(`Using unknown OpenAI realtime model: ${modelName}`);
|
|
19
|
+
}
|
|
20
|
+
super(modelName, options);
|
|
21
|
+
this.config = options.config || {};
|
|
22
|
+
}
|
|
23
|
+
getRealtimeSessionBody() {
|
|
24
|
+
// Default values
|
|
25
|
+
const modalities = this.config.modalities || ['text', 'audio'];
|
|
26
|
+
const voice = this.config.voice || 'alloy';
|
|
27
|
+
const instructions = this.config.instructions || 'You are a helpful assistant.';
|
|
28
|
+
const inputAudioFormat = this.config.input_audio_format || 'pcm16';
|
|
29
|
+
const outputAudioFormat = this.config.output_audio_format || 'pcm16';
|
|
30
|
+
const temperature = this.config.temperature ?? 0.8;
|
|
31
|
+
const maxResponseOutputTokens = this.config.max_response_output_tokens || 'inf';
|
|
32
|
+
const body = {
|
|
33
|
+
model: this.modelName,
|
|
34
|
+
modalities,
|
|
35
|
+
instructions,
|
|
36
|
+
voice,
|
|
37
|
+
input_audio_format: inputAudioFormat,
|
|
38
|
+
output_audio_format: outputAudioFormat,
|
|
39
|
+
temperature,
|
|
40
|
+
max_response_output_tokens: maxResponseOutputTokens,
|
|
41
|
+
};
|
|
42
|
+
// Add optional configurations
|
|
43
|
+
if (this.config.input_audio_transcription !== undefined) {
|
|
44
|
+
body.input_audio_transcription = this.config.input_audio_transcription;
|
|
45
|
+
}
|
|
46
|
+
if (this.config.turn_detection !== undefined) {
|
|
47
|
+
body.turn_detection = this.config.turn_detection;
|
|
48
|
+
}
|
|
49
|
+
if (this.config.tools && this.config.tools.length > 0) {
|
|
50
|
+
body.tools = (0, util_1.renderVarsInObject)(this.config.tools);
|
|
51
|
+
// If tools are provided but no tool_choice, default to auto
|
|
52
|
+
if (this.config.tool_choice === undefined) {
|
|
53
|
+
body.tool_choice = 'auto';
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
if (this.config.tool_choice) {
|
|
57
|
+
body.tool_choice = this.config.tool_choice;
|
|
58
|
+
}
|
|
59
|
+
return body;
|
|
60
|
+
}
|
|
61
|
+
generateEventId() {
|
|
62
|
+
return `event_${Date.now()}_${Math.random().toString(36).substring(2, 10)}`;
|
|
63
|
+
}
|
|
64
|
+
async webSocketRequest(clientSecret, prompt) {
|
|
65
|
+
return new Promise((resolve, reject) => {
|
|
66
|
+
logger_1.default.debug(`Attempting to connect to OpenAI WebSocket with client secret: ${clientSecret.slice(0, 5)}...`);
|
|
67
|
+
// The WebSocket URL needs to include the client secret
|
|
68
|
+
const wsUrl = `wss://api.openai.com/v1/realtime/socket?client_secret=${encodeURIComponent(clientSecret)}`;
|
|
69
|
+
logger_1.default.debug(`Connecting to WebSocket URL: ${wsUrl.slice(0, 60)}...`);
|
|
70
|
+
// Add WebSocket options to bypass potential network issues
|
|
71
|
+
const wsOptions = {
|
|
72
|
+
headers: {
|
|
73
|
+
'User-Agent': 'promptfoo Realtime API Client',
|
|
74
|
+
Origin: 'https://api.openai.com',
|
|
75
|
+
},
|
|
76
|
+
handshakeTimeout: 10000,
|
|
77
|
+
perMessageDeflate: false,
|
|
78
|
+
};
|
|
79
|
+
const ws = new ws_1.default(wsUrl, wsOptions);
|
|
80
|
+
// Set a timeout for the WebSocket connection
|
|
81
|
+
const timeout = setTimeout(() => {
|
|
82
|
+
logger_1.default.error('WebSocket connection timed out after 30 seconds');
|
|
83
|
+
ws.close();
|
|
84
|
+
reject(new Error('WebSocket connection timed out'));
|
|
85
|
+
}, this.config.websocketTimeout || 30000); // Default 30 second timeout
|
|
86
|
+
// Accumulators for response text and errors
|
|
87
|
+
let responseText = '';
|
|
88
|
+
let responseError = '';
|
|
89
|
+
let responseDone = false;
|
|
90
|
+
let usage = null;
|
|
91
|
+
// Audio content accumulators
|
|
92
|
+
const audioContent = [];
|
|
93
|
+
let audioFormat = 'wav';
|
|
94
|
+
let hasAudioContent = false;
|
|
95
|
+
// Track message IDs and function call state
|
|
96
|
+
let messageId = '';
|
|
97
|
+
let responseId = '';
|
|
98
|
+
let pendingFunctionCalls = [];
|
|
99
|
+
let functionCallOccurred = false;
|
|
100
|
+
const functionCallResults = [];
|
|
101
|
+
const sendEvent = (event) => {
|
|
102
|
+
if (!event.event_id) {
|
|
103
|
+
event.event_id = this.generateEventId();
|
|
104
|
+
}
|
|
105
|
+
logger_1.default.debug(`Sending event: ${JSON.stringify(event)}`);
|
|
106
|
+
ws.send(JSON.stringify(event));
|
|
107
|
+
return event.event_id;
|
|
108
|
+
};
|
|
109
|
+
ws.on('open', () => {
|
|
110
|
+
logger_1.default.debug('WebSocket connection established successfully');
|
|
111
|
+
// Create a conversation item with the user's prompt - immediately after connection
|
|
112
|
+
// Don't send ping event as it's not supported
|
|
113
|
+
sendEvent({
|
|
114
|
+
type: 'conversation.item.create',
|
|
115
|
+
previous_item_id: null,
|
|
116
|
+
item: {
|
|
117
|
+
type: 'message',
|
|
118
|
+
role: 'user',
|
|
119
|
+
content: [
|
|
120
|
+
{
|
|
121
|
+
type: 'input_text',
|
|
122
|
+
text: prompt,
|
|
123
|
+
},
|
|
124
|
+
],
|
|
125
|
+
},
|
|
126
|
+
});
|
|
127
|
+
});
|
|
128
|
+
ws.on('message', async (data) => {
|
|
129
|
+
try {
|
|
130
|
+
const message = JSON.parse(data.toString());
|
|
131
|
+
logger_1.default.debug(`Received WebSocket message: ${message.type}`);
|
|
132
|
+
// For better debugging, log the full message structure (without potentially large audio data)
|
|
133
|
+
const debugMessage = { ...message };
|
|
134
|
+
if (debugMessage.audio) {
|
|
135
|
+
debugMessage.audio = '[AUDIO_DATA]';
|
|
136
|
+
}
|
|
137
|
+
logger_1.default.debug(`Message data: ${JSON.stringify(debugMessage, null, 2)}`);
|
|
138
|
+
// Handle different event types
|
|
139
|
+
switch (message.type) {
|
|
140
|
+
case 'session.ready':
|
|
141
|
+
logger_1.default.debug('Session ready on WebSocket');
|
|
142
|
+
// Create a conversation item with the user's prompt
|
|
143
|
+
sendEvent({
|
|
144
|
+
type: 'conversation.item.create',
|
|
145
|
+
previous_item_id: null,
|
|
146
|
+
item: {
|
|
147
|
+
type: 'message',
|
|
148
|
+
role: 'user',
|
|
149
|
+
content: [
|
|
150
|
+
{
|
|
151
|
+
type: 'input_text',
|
|
152
|
+
text: prompt,
|
|
153
|
+
},
|
|
154
|
+
],
|
|
155
|
+
},
|
|
156
|
+
});
|
|
157
|
+
break;
|
|
158
|
+
case 'session.created':
|
|
159
|
+
logger_1.default.debug('Session created on WebSocket');
|
|
160
|
+
// No need to do anything here as we'll wait for session.ready
|
|
161
|
+
break;
|
|
162
|
+
case 'conversation.item.created':
|
|
163
|
+
if (message.item.role === 'user') {
|
|
164
|
+
// User message was created, now create a response
|
|
165
|
+
messageId = message.item.id;
|
|
166
|
+
// Prepare response creation event with appropriate settings
|
|
167
|
+
const responseEvent = {
|
|
168
|
+
type: 'response.create',
|
|
169
|
+
response: {
|
|
170
|
+
modalities: this.config.modalities || ['text', 'audio'],
|
|
171
|
+
instructions: this.config.instructions || 'You are a helpful assistant.',
|
|
172
|
+
voice: this.config.voice || 'alloy',
|
|
173
|
+
temperature: this.config.temperature ?? 0.8,
|
|
174
|
+
},
|
|
175
|
+
};
|
|
176
|
+
// Add tools if configured
|
|
177
|
+
if (this.config.tools && this.config.tools.length > 0) {
|
|
178
|
+
responseEvent.response.tools = this.config.tools;
|
|
179
|
+
if (Object.prototype.hasOwnProperty.call(this.config, 'tool_choice')) {
|
|
180
|
+
responseEvent.response.tool_choice = this.config.tool_choice;
|
|
181
|
+
}
|
|
182
|
+
else {
|
|
183
|
+
responseEvent.response.tool_choice = 'auto';
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
sendEvent(responseEvent);
|
|
187
|
+
}
|
|
188
|
+
break;
|
|
189
|
+
case 'response.created':
|
|
190
|
+
responseId = message.response.id;
|
|
191
|
+
break;
|
|
192
|
+
case 'response.text.delta':
|
|
193
|
+
// Accumulate text deltas
|
|
194
|
+
responseText += message.delta;
|
|
195
|
+
logger_1.default.debug(`Added text delta: "${message.delta}", current length: ${responseText.length}`);
|
|
196
|
+
break;
|
|
197
|
+
case 'response.text.done':
|
|
198
|
+
// Final text content
|
|
199
|
+
if (message.text && message.text.length > 0) {
|
|
200
|
+
logger_1.default.debug(`Setting final text content from response.text.done: "${message.text}" (length: ${message.text.length})`);
|
|
201
|
+
responseText = message.text;
|
|
202
|
+
}
|
|
203
|
+
else {
|
|
204
|
+
logger_1.default.debug('Received empty text in response.text.done');
|
|
205
|
+
}
|
|
206
|
+
break;
|
|
207
|
+
// Handle content part events
|
|
208
|
+
case 'response.content_part.added':
|
|
209
|
+
// Log that we received a content part
|
|
210
|
+
logger_1.default.debug(`Received content part: ${JSON.stringify(message.content_part)}`);
|
|
211
|
+
// Track content part ID if needed for later reference
|
|
212
|
+
if (message.content_part && message.content_part.id) {
|
|
213
|
+
logger_1.default.debug(`Content part added with ID: ${message.content_part.id}`);
|
|
214
|
+
}
|
|
215
|
+
break;
|
|
216
|
+
case 'response.content_part.done':
|
|
217
|
+
logger_1.default.debug('Content part completed');
|
|
218
|
+
break;
|
|
219
|
+
// Handle audio transcript events
|
|
220
|
+
case 'response.audio_transcript.delta':
|
|
221
|
+
// Accumulate audio transcript deltas - this is the text content
|
|
222
|
+
responseText += message.delta;
|
|
223
|
+
logger_1.default.debug(`Added audio transcript delta: "${message.delta}", current length: ${responseText.length}`);
|
|
224
|
+
break;
|
|
225
|
+
case 'response.audio_transcript.done':
|
|
226
|
+
// Final audio transcript content
|
|
227
|
+
if (message.text && message.text.length > 0) {
|
|
228
|
+
logger_1.default.debug(`Setting final audio transcript text: "${message.text}" (length: ${message.text.length})`);
|
|
229
|
+
responseText = message.text;
|
|
230
|
+
}
|
|
231
|
+
else {
|
|
232
|
+
logger_1.default.debug('Received empty text in response.audio_transcript.done');
|
|
233
|
+
}
|
|
234
|
+
break;
|
|
235
|
+
// Handle audio data events - store in metadata if needed
|
|
236
|
+
case 'response.audio.delta':
|
|
237
|
+
// Handle audio data (could store in metadata for playback if needed)
|
|
238
|
+
logger_1.default.debug('Received audio data chunk');
|
|
239
|
+
if (message.audio && message.audio.length > 0) {
|
|
240
|
+
// Store the audio data for later use
|
|
241
|
+
try {
|
|
242
|
+
const audioBuffer = Buffer.from(message.audio, 'base64');
|
|
243
|
+
audioContent.push(audioBuffer);
|
|
244
|
+
hasAudioContent = true;
|
|
245
|
+
}
|
|
246
|
+
catch (error) {
|
|
247
|
+
logger_1.default.error(`Error processing audio data: ${error}`);
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
break;
|
|
251
|
+
case 'response.audio.done':
|
|
252
|
+
logger_1.default.debug('Audio data complete');
|
|
253
|
+
// If audio format is specified in the message, capture it
|
|
254
|
+
if (message.format) {
|
|
255
|
+
audioFormat = message.format;
|
|
256
|
+
}
|
|
257
|
+
break;
|
|
258
|
+
// Handle output items (including function calls)
|
|
259
|
+
case 'response.output_item.added':
|
|
260
|
+
if (message.item.type === 'function_call') {
|
|
261
|
+
functionCallOccurred = true;
|
|
262
|
+
// Store the function call details for later handling
|
|
263
|
+
pendingFunctionCalls.push({
|
|
264
|
+
id: message.item.call_id,
|
|
265
|
+
name: message.item.name,
|
|
266
|
+
arguments: message.item.arguments || '{}',
|
|
267
|
+
});
|
|
268
|
+
}
|
|
269
|
+
else if (message.item.type === 'text') {
|
|
270
|
+
// Handle text output item - also add to responseText
|
|
271
|
+
if (message.item.text) {
|
|
272
|
+
responseText += message.item.text;
|
|
273
|
+
logger_1.default.debug(`Added text output item: "${message.item.text}", current length: ${responseText.length}`);
|
|
274
|
+
}
|
|
275
|
+
else {
|
|
276
|
+
logger_1.default.debug('Received text output item with empty text');
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
else {
|
|
280
|
+
// Log other output item types
|
|
281
|
+
logger_1.default.debug(`Received output item of type: ${message.item.type}`);
|
|
282
|
+
}
|
|
283
|
+
break;
|
|
284
|
+
case 'response.output_item.done':
|
|
285
|
+
logger_1.default.debug('Output item complete');
|
|
286
|
+
break;
|
|
287
|
+
case 'response.function_call_arguments.done':
|
|
288
|
+
// Find the function call in our pending list and update its arguments
|
|
289
|
+
const callIndex = pendingFunctionCalls.findIndex((call) => call.id === message.call_id);
|
|
290
|
+
if (callIndex !== -1) {
|
|
291
|
+
pendingFunctionCalls[callIndex].arguments = message.arguments;
|
|
292
|
+
}
|
|
293
|
+
break;
|
|
294
|
+
case 'response.done':
|
|
295
|
+
responseDone = true;
|
|
296
|
+
usage = message.response.usage;
|
|
297
|
+
// If there are pending function calls, process them
|
|
298
|
+
if (pendingFunctionCalls.length > 0 && this.config.functionCallHandler) {
|
|
299
|
+
for (const call of pendingFunctionCalls) {
|
|
300
|
+
try {
|
|
301
|
+
// Execute the function handler
|
|
302
|
+
const result = await this.config.functionCallHandler(call.name, call.arguments);
|
|
303
|
+
functionCallResults.push(result);
|
|
304
|
+
// Send the function call result back to the model
|
|
305
|
+
sendEvent({
|
|
306
|
+
type: 'conversation.item.create',
|
|
307
|
+
item: {
|
|
308
|
+
type: 'function_call_output',
|
|
309
|
+
call_id: call.id,
|
|
310
|
+
output: result,
|
|
311
|
+
},
|
|
312
|
+
});
|
|
313
|
+
}
|
|
314
|
+
catch (err) {
|
|
315
|
+
logger_1.default.error(`Error executing function ${call.name}: ${err}`);
|
|
316
|
+
// Send an error result back to the model
|
|
317
|
+
sendEvent({
|
|
318
|
+
type: 'conversation.item.create',
|
|
319
|
+
item: {
|
|
320
|
+
type: 'function_call_output',
|
|
321
|
+
call_id: call.id,
|
|
322
|
+
output: JSON.stringify({ error: String(err) }),
|
|
323
|
+
},
|
|
324
|
+
});
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
// Request a new response from the model using the function results
|
|
328
|
+
sendEvent({
|
|
329
|
+
type: 'response.create',
|
|
330
|
+
});
|
|
331
|
+
// Reset pending function calls - we've handled them
|
|
332
|
+
pendingFunctionCalls = [];
|
|
333
|
+
// Don't resolve the promise yet - wait for the final response
|
|
334
|
+
return;
|
|
335
|
+
}
|
|
336
|
+
// If no function calls or we've processed them all, close the connection
|
|
337
|
+
clearTimeout(timeout);
|
|
338
|
+
// Check if we have an empty response and try to diagnose the issue
|
|
339
|
+
if (responseText.length === 0) {
|
|
340
|
+
// Only log at debug level to prevent user-visible warnings
|
|
341
|
+
logger_1.default.debug('Empty response detected before resolving. Checking response message details');
|
|
342
|
+
logger_1.default.debug('Response message details: ' + JSON.stringify(message, null, 2));
|
|
343
|
+
// Try to extract any text content from the message as a fallback
|
|
344
|
+
if (message.response &&
|
|
345
|
+
message.response.content &&
|
|
346
|
+
Array.isArray(message.response.content)) {
|
|
347
|
+
const textContent = message.response.content.find((item) => item.type === 'text' && item.text && item.text.length > 0);
|
|
348
|
+
if (textContent) {
|
|
349
|
+
logger_1.default.debug(`Found text in response content, using as fallback: "${textContent.text}"`);
|
|
350
|
+
responseText = textContent.text;
|
|
351
|
+
}
|
|
352
|
+
else {
|
|
353
|
+
logger_1.default.debug('No fallback text content found in response message');
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
// If still empty, add a placeholder message to indicate the issue
|
|
357
|
+
if (responseText.length === 0) {
|
|
358
|
+
responseText = '[No response received from API]';
|
|
359
|
+
logger_1.default.debug('Using placeholder message for empty response');
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
ws.close();
|
|
363
|
+
// Prepare audio data if available
|
|
364
|
+
const finalAudioData = hasAudioContent
|
|
365
|
+
? Buffer.concat(audioContent).toString('base64')
|
|
366
|
+
: null;
|
|
367
|
+
resolve({
|
|
368
|
+
output: responseText,
|
|
369
|
+
tokenUsage: {
|
|
370
|
+
total: usage?.total_tokens || 0,
|
|
371
|
+
prompt: usage?.input_tokens || 0,
|
|
372
|
+
completion: usage?.output_tokens || 0,
|
|
373
|
+
cached: 0,
|
|
374
|
+
},
|
|
375
|
+
cached: false,
|
|
376
|
+
metadata: {
|
|
377
|
+
responseId,
|
|
378
|
+
messageId,
|
|
379
|
+
usage,
|
|
380
|
+
// Include audio data in metadata if available
|
|
381
|
+
...(hasAudioContent && {
|
|
382
|
+
audio: {
|
|
383
|
+
data: finalAudioData,
|
|
384
|
+
format: audioFormat,
|
|
385
|
+
},
|
|
386
|
+
}),
|
|
387
|
+
},
|
|
388
|
+
functionCallOccurred,
|
|
389
|
+
functionCallResults: functionCallResults.length > 0 ? functionCallResults : undefined,
|
|
390
|
+
});
|
|
391
|
+
break;
|
|
392
|
+
case 'rate_limits.updated':
|
|
393
|
+
// Store rate limits in metadata if needed
|
|
394
|
+
logger_1.default.debug(`Rate limits updated: ${JSON.stringify(message.rate_limits)}`);
|
|
395
|
+
break;
|
|
396
|
+
case 'error':
|
|
397
|
+
responseError = `Error: ${message.error.message}`;
|
|
398
|
+
logger_1.default.error(`WebSocket error: ${responseError} (${message.error.type})`);
|
|
399
|
+
// Always close on errors to prevent hanging connections
|
|
400
|
+
clearTimeout(timeout);
|
|
401
|
+
ws.close();
|
|
402
|
+
reject(new Error(responseError));
|
|
403
|
+
break;
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
catch (err) {
|
|
407
|
+
logger_1.default.error(`Error parsing WebSocket message: ${err}`);
|
|
408
|
+
clearTimeout(timeout);
|
|
409
|
+
ws.close();
|
|
410
|
+
reject(err);
|
|
411
|
+
}
|
|
412
|
+
});
|
|
413
|
+
ws.on('error', (err) => {
|
|
414
|
+
logger_1.default.error(`WebSocket error: ${err.message}`);
|
|
415
|
+
clearTimeout(timeout);
|
|
416
|
+
reject(err);
|
|
417
|
+
});
|
|
418
|
+
ws.on('close', (code, reason) => {
|
|
419
|
+
logger_1.default.debug(`WebSocket closed with code ${code}: ${reason}`);
|
|
420
|
+
clearTimeout(timeout);
|
|
421
|
+
// Provide more detailed error messages for common WebSocket close codes
|
|
422
|
+
if (code === 1006) {
|
|
423
|
+
logger_1.default.error('WebSocket connection closed abnormally - this often indicates a network or firewall issue');
|
|
424
|
+
}
|
|
425
|
+
else if (code === 1008) {
|
|
426
|
+
logger_1.default.error('WebSocket connection rejected due to policy violation (possibly wrong API key or permissions)');
|
|
427
|
+
}
|
|
428
|
+
else if (code === 403 || reason.includes('403')) {
|
|
429
|
+
logger_1.default.error('WebSocket connection received 403 Forbidden - verify API key permissions and rate limits');
|
|
430
|
+
}
|
|
431
|
+
// Only reject if we haven't received a completed response or error
|
|
432
|
+
const connectionClosedPrematurely = responseDone === false && responseError.length === 0;
|
|
433
|
+
if (connectionClosedPrematurely) {
|
|
434
|
+
reject(new Error(`WebSocket closed unexpectedly with code ${code}: ${reason}. This may indicate a networking issue, firewall restriction, or API access limitation.`));
|
|
435
|
+
}
|
|
436
|
+
});
|
|
437
|
+
});
|
|
438
|
+
}
|
|
439
|
+
async callApi(prompt, context, callApiOptions) {
|
|
440
|
+
if (!this.getApiKey()) {
|
|
441
|
+
throw new Error('OpenAI API key is not set. Set the OPENAI_API_KEY environment variable or add `apiKey` to the provider config.');
|
|
442
|
+
}
|
|
443
|
+
// Apply function handler if provided in context
|
|
444
|
+
if (context?.prompt?.config?.functionCallHandler &&
|
|
445
|
+
typeof context.prompt.config.functionCallHandler === 'function') {
|
|
446
|
+
this.config.functionCallHandler = context.prompt.config.functionCallHandler;
|
|
447
|
+
}
|
|
448
|
+
try {
|
|
449
|
+
// Extract the message content for WebSocket communications
|
|
450
|
+
// This approach is similar to parseChatPrompt but specialized for Realtime API
|
|
451
|
+
let promptText = prompt;
|
|
452
|
+
try {
|
|
453
|
+
// Check if the prompt is a JSON string
|
|
454
|
+
const parsedPrompt = JSON.parse(prompt);
|
|
455
|
+
// Handle array format (OpenAI chat format)
|
|
456
|
+
if (Array.isArray(parsedPrompt) && parsedPrompt.length > 0) {
|
|
457
|
+
// Find the last user message (following OpenAI's chat convention)
|
|
458
|
+
for (let i = parsedPrompt.length - 1; i >= 0; i--) {
|
|
459
|
+
const message = parsedPrompt[i];
|
|
460
|
+
if (message.role === 'user') {
|
|
461
|
+
// Handle both simple content string and array of content objects
|
|
462
|
+
if (typeof message.content === 'string') {
|
|
463
|
+
promptText = message.content;
|
|
464
|
+
break;
|
|
465
|
+
}
|
|
466
|
+
else if (Array.isArray(message.content) && message.content.length > 0) {
|
|
467
|
+
// Find the first text content - check for both 'text' and 'input_text' for backward compatibility
|
|
468
|
+
const textContent = message.content.find((content) => (content.type === 'text' || content.type === 'input_text') &&
|
|
469
|
+
typeof content.text === 'string');
|
|
470
|
+
if (textContent) {
|
|
471
|
+
promptText = textContent.text;
|
|
472
|
+
break;
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
else if (parsedPrompt && typeof parsedPrompt === 'object' && parsedPrompt.prompt) {
|
|
479
|
+
// Handle {prompt: "..."} format that some templates might use
|
|
480
|
+
promptText = parsedPrompt.prompt;
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
catch {
|
|
484
|
+
// Not JSON or couldn't extract - use as is
|
|
485
|
+
logger_1.default.debug('Using prompt as is - not a JSON structure');
|
|
486
|
+
}
|
|
487
|
+
// Connect directly to the WebSocket API using API key
|
|
488
|
+
logger_1.default.debug(`Connecting directly to OpenAI Realtime API WebSocket with API key`);
|
|
489
|
+
const result = await this.directWebSocketRequest(promptText);
|
|
490
|
+
// Format the output - if function calls occurred, include that info
|
|
491
|
+
let finalOutput = result.output;
|
|
492
|
+
// Log the output we received for debugging
|
|
493
|
+
logger_1.default.debug(`Final output from API: "${finalOutput}" (length: ${finalOutput.length})`);
|
|
494
|
+
if (finalOutput.length === 0) {
|
|
495
|
+
// Log at debug level instead of warn to prevent user-visible warnings
|
|
496
|
+
logger_1.default.debug('Received empty response from Realtime API - possible issue with transcript accumulation. Check modalities configuration.');
|
|
497
|
+
// Set a fallback message to help users, but keep it shorter
|
|
498
|
+
finalOutput = '[No response received from API]';
|
|
499
|
+
}
|
|
500
|
+
if (result.functionCallOccurred &&
|
|
501
|
+
result.functionCallResults &&
|
|
502
|
+
result.functionCallResults.length > 0) {
|
|
503
|
+
finalOutput += '\n\n[Function calls were made during processing]';
|
|
504
|
+
}
|
|
505
|
+
// Construct the metadata with audio if available
|
|
506
|
+
const metadata = {
|
|
507
|
+
...result.metadata,
|
|
508
|
+
functionCallOccurred: result.functionCallOccurred,
|
|
509
|
+
functionCallResults: result.functionCallResults,
|
|
510
|
+
};
|
|
511
|
+
// If the response has audio data, format it according to the promptfoo audio interface
|
|
512
|
+
if (result.metadata?.audio) {
|
|
513
|
+
// Convert Buffer to base64 string for the audio data
|
|
514
|
+
const audioDataBase64 = result.metadata.audio.data;
|
|
515
|
+
metadata.audio = {
|
|
516
|
+
data: audioDataBase64,
|
|
517
|
+
format: result.metadata.audio.format,
|
|
518
|
+
transcript: result.output, // Use the text output as transcript
|
|
519
|
+
};
|
|
520
|
+
}
|
|
521
|
+
return {
|
|
522
|
+
output: finalOutput,
|
|
523
|
+
tokenUsage: result.tokenUsage,
|
|
524
|
+
cached: result.cached,
|
|
525
|
+
metadata,
|
|
526
|
+
// Add audio at top level if available (EvalOutputCell expects this)
|
|
527
|
+
...(result.metadata?.audio && {
|
|
528
|
+
audio: {
|
|
529
|
+
data: result.metadata.audio.data,
|
|
530
|
+
format: result.metadata.audio.format,
|
|
531
|
+
transcript: result.output, // Use the text output as transcript
|
|
532
|
+
},
|
|
533
|
+
}),
|
|
534
|
+
};
|
|
535
|
+
}
|
|
536
|
+
catch (err) {
|
|
537
|
+
const errorMessage = `WebSocket error: ${String(err)}`;
|
|
538
|
+
logger_1.default.error(errorMessage);
|
|
539
|
+
// If this is an Unexpected server response: 403, add additional troubleshooting info
|
|
540
|
+
if (errorMessage.includes('403')) {
|
|
541
|
+
logger_1.default.error(`
|
|
542
|
+
This 403 error usually means one of the following:
|
|
543
|
+
1. WebSocket connections are blocked by your network/firewall
|
|
544
|
+
2. Your OpenAI API key doesn't have access to the Realtime API
|
|
545
|
+
3. There are rate limits or quotas in place for your account
|
|
546
|
+
Try:
|
|
547
|
+
- Using a different network connection
|
|
548
|
+
- Checking your OpenAI API key permissions
|
|
549
|
+
- Verifying you have access to the Realtime API beta`);
|
|
550
|
+
}
|
|
551
|
+
return {
|
|
552
|
+
error: errorMessage,
|
|
553
|
+
metadata: {},
|
|
554
|
+
};
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
async directWebSocketRequest(prompt) {
|
|
558
|
+
return new Promise((resolve, reject) => {
|
|
559
|
+
logger_1.default.debug(`Establishing direct WebSocket connection to OpenAI Realtime API`);
|
|
560
|
+
// Construct URL with model parameter
|
|
561
|
+
const wsUrl = `wss://api.openai.com/v1/realtime?model=${encodeURIComponent(this.modelName)}`;
|
|
562
|
+
logger_1.default.debug(`Connecting to WebSocket URL: ${wsUrl}`);
|
|
563
|
+
// Add WebSocket options with required headers
|
|
564
|
+
const wsOptions = {
|
|
565
|
+
headers: {
|
|
566
|
+
Authorization: `Bearer ${this.getApiKey()}`,
|
|
567
|
+
'OpenAI-Beta': 'realtime=v1',
|
|
568
|
+
'User-Agent': 'promptfoo Realtime API Client',
|
|
569
|
+
Origin: 'https://api.openai.com',
|
|
570
|
+
},
|
|
571
|
+
handshakeTimeout: 10000,
|
|
572
|
+
perMessageDeflate: false,
|
|
573
|
+
};
|
|
574
|
+
const ws = new ws_1.default(wsUrl, wsOptions);
|
|
575
|
+
// Set a timeout for the WebSocket connection
|
|
576
|
+
const timeout = setTimeout(() => {
|
|
577
|
+
logger_1.default.error('WebSocket connection timed out after 30 seconds');
|
|
578
|
+
ws.close();
|
|
579
|
+
reject(new Error('WebSocket connection timed out'));
|
|
580
|
+
}, this.config.websocketTimeout || 30000);
|
|
581
|
+
// Accumulators for response text and errors
|
|
582
|
+
let responseText = '';
|
|
583
|
+
let responseError = '';
|
|
584
|
+
let responseDone = false;
|
|
585
|
+
let usage = null;
|
|
586
|
+
// Audio content accumulators
|
|
587
|
+
const audioContent = [];
|
|
588
|
+
let audioFormat = 'wav';
|
|
589
|
+
let hasAudioContent = false;
|
|
590
|
+
// Track message IDs and function call state
|
|
591
|
+
let messageId = '';
|
|
592
|
+
let responseId = '';
|
|
593
|
+
let pendingFunctionCalls = [];
|
|
594
|
+
let functionCallOccurred = false;
|
|
595
|
+
const functionCallResults = [];
|
|
596
|
+
const sendEvent = (event) => {
|
|
597
|
+
if (!event.event_id) {
|
|
598
|
+
event.event_id = this.generateEventId();
|
|
599
|
+
}
|
|
600
|
+
logger_1.default.debug(`Sending event: ${JSON.stringify(event)}`);
|
|
601
|
+
ws.send(JSON.stringify(event));
|
|
602
|
+
return event.event_id;
|
|
603
|
+
};
|
|
604
|
+
ws.on('open', () => {
|
|
605
|
+
logger_1.default.debug('WebSocket connection established successfully');
|
|
606
|
+
// Create a conversation item with the user's prompt - immediately after connection
|
|
607
|
+
// Don't send ping event as it's not supported
|
|
608
|
+
sendEvent({
|
|
609
|
+
type: 'conversation.item.create',
|
|
610
|
+
previous_item_id: null,
|
|
611
|
+
item: {
|
|
612
|
+
type: 'message',
|
|
613
|
+
role: 'user',
|
|
614
|
+
content: [
|
|
615
|
+
{
|
|
616
|
+
type: 'input_text',
|
|
617
|
+
text: prompt,
|
|
618
|
+
},
|
|
619
|
+
],
|
|
620
|
+
},
|
|
621
|
+
});
|
|
622
|
+
});
|
|
623
|
+
ws.on('message', async (data) => {
|
|
624
|
+
try {
|
|
625
|
+
const message = JSON.parse(data.toString());
|
|
626
|
+
logger_1.default.debug(`Received WebSocket message: ${message.type}`);
|
|
627
|
+
// For better debugging, log the full message structure (without potentially large audio data)
|
|
628
|
+
const debugMessage = { ...message };
|
|
629
|
+
if (debugMessage.audio) {
|
|
630
|
+
debugMessage.audio = '[AUDIO_DATA]';
|
|
631
|
+
}
|
|
632
|
+
logger_1.default.debug(`Message data: ${JSON.stringify(debugMessage, null, 2)}`);
|
|
633
|
+
// Handle different event types
|
|
634
|
+
switch (message.type) {
|
|
635
|
+
case 'session.created':
|
|
636
|
+
logger_1.default.debug('Session created on WebSocket');
|
|
637
|
+
break;
|
|
638
|
+
case 'conversation.item.created':
|
|
639
|
+
if (message.item.role === 'user') {
|
|
640
|
+
// User message was created, now create a response
|
|
641
|
+
messageId = message.item.id;
|
|
642
|
+
// Prepare response creation event with appropriate settings
|
|
643
|
+
const responseEvent = {
|
|
644
|
+
type: 'response.create',
|
|
645
|
+
response: {
|
|
646
|
+
modalities: this.config.modalities || ['text', 'audio'],
|
|
647
|
+
instructions: this.config.instructions || 'You are a helpful assistant.',
|
|
648
|
+
voice: this.config.voice || 'alloy',
|
|
649
|
+
temperature: this.config.temperature ?? 0.8,
|
|
650
|
+
},
|
|
651
|
+
};
|
|
652
|
+
// Add tools if configured
|
|
653
|
+
if (this.config.tools && this.config.tools.length > 0) {
|
|
654
|
+
responseEvent.response.tools = this.config.tools;
|
|
655
|
+
if (Object.prototype.hasOwnProperty.call(this.config, 'tool_choice')) {
|
|
656
|
+
responseEvent.response.tool_choice = this.config.tool_choice;
|
|
657
|
+
}
|
|
658
|
+
else {
|
|
659
|
+
responseEvent.response.tool_choice = 'auto';
|
|
660
|
+
}
|
|
661
|
+
}
|
|
662
|
+
sendEvent(responseEvent);
|
|
663
|
+
}
|
|
664
|
+
break;
|
|
665
|
+
case 'response.created':
|
|
666
|
+
responseId = message.response.id;
|
|
667
|
+
break;
|
|
668
|
+
case 'response.text.delta':
|
|
669
|
+
// Accumulate text deltas
|
|
670
|
+
responseText += message.delta;
|
|
671
|
+
logger_1.default.debug(`Added text delta: "${message.delta}", current length: ${responseText.length}`);
|
|
672
|
+
break;
|
|
673
|
+
case 'response.text.done':
|
|
674
|
+
// Final text content
|
|
675
|
+
if (message.text && message.text.length > 0) {
|
|
676
|
+
logger_1.default.debug(`Setting final text content from response.text.done: "${message.text}" (length: ${message.text.length})`);
|
|
677
|
+
responseText = message.text;
|
|
678
|
+
}
|
|
679
|
+
else {
|
|
680
|
+
logger_1.default.debug('Received empty text in response.text.done');
|
|
681
|
+
}
|
|
682
|
+
break;
|
|
683
|
+
// Handle content part events
|
|
684
|
+
case 'response.content_part.added':
|
|
685
|
+
// Log that we received a content part
|
|
686
|
+
logger_1.default.debug(`Received content part: ${JSON.stringify(message.content_part)}`);
|
|
687
|
+
// Track content part ID if needed for later reference
|
|
688
|
+
if (message.content_part && message.content_part.id) {
|
|
689
|
+
logger_1.default.debug(`Content part added with ID: ${message.content_part.id}`);
|
|
690
|
+
}
|
|
691
|
+
break;
|
|
692
|
+
case 'response.content_part.done':
|
|
693
|
+
logger_1.default.debug('Content part completed');
|
|
694
|
+
break;
|
|
695
|
+
// Handle audio transcript events
|
|
696
|
+
case 'response.audio_transcript.delta':
|
|
697
|
+
// Accumulate audio transcript deltas - this is the text content
|
|
698
|
+
responseText += message.delta;
|
|
699
|
+
logger_1.default.debug(`Added audio transcript delta: "${message.delta}", current length: ${responseText.length}`);
|
|
700
|
+
break;
|
|
701
|
+
case 'response.audio_transcript.done':
|
|
702
|
+
// Final audio transcript content
|
|
703
|
+
if (message.text && message.text.length > 0) {
|
|
704
|
+
logger_1.default.debug(`Setting final audio transcript text: "${message.text}" (length: ${message.text.length})`);
|
|
705
|
+
responseText = message.text;
|
|
706
|
+
}
|
|
707
|
+
else {
|
|
708
|
+
logger_1.default.debug('Received empty text in response.audio_transcript.done');
|
|
709
|
+
}
|
|
710
|
+
break;
|
|
711
|
+
// Handle audio data events - store in metadata if needed
|
|
712
|
+
case 'response.audio.delta':
|
|
713
|
+
// Handle audio data (could store in metadata for playback if needed)
|
|
714
|
+
logger_1.default.debug('Received audio data chunk');
|
|
715
|
+
if (message.audio && message.audio.length > 0) {
|
|
716
|
+
// Store the audio data for later use
|
|
717
|
+
try {
|
|
718
|
+
const audioBuffer = Buffer.from(message.audio, 'base64');
|
|
719
|
+
audioContent.push(audioBuffer);
|
|
720
|
+
hasAudioContent = true;
|
|
721
|
+
}
|
|
722
|
+
catch (error) {
|
|
723
|
+
logger_1.default.error(`Error processing audio data: ${error}`);
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
break;
|
|
727
|
+
case 'response.audio.done':
|
|
728
|
+
logger_1.default.debug('Audio data complete');
|
|
729
|
+
// If audio format is specified in the message, capture it
|
|
730
|
+
if (message.format) {
|
|
731
|
+
audioFormat = message.format;
|
|
732
|
+
}
|
|
733
|
+
break;
|
|
734
|
+
// Handle output items (including function calls)
|
|
735
|
+
case 'response.output_item.added':
|
|
736
|
+
if (message.item.type === 'function_call') {
|
|
737
|
+
functionCallOccurred = true;
|
|
738
|
+
// Store the function call details for later handling
|
|
739
|
+
pendingFunctionCalls.push({
|
|
740
|
+
id: message.item.call_id,
|
|
741
|
+
name: message.item.name,
|
|
742
|
+
arguments: message.item.arguments || '{}',
|
|
743
|
+
});
|
|
744
|
+
}
|
|
745
|
+
else if (message.item.type === 'text') {
|
|
746
|
+
// Handle text output item - also add to responseText
|
|
747
|
+
if (message.item.text) {
|
|
748
|
+
responseText += message.item.text;
|
|
749
|
+
logger_1.default.debug(`Added text output item: "${message.item.text}", current length: ${responseText.length}`);
|
|
750
|
+
}
|
|
751
|
+
else {
|
|
752
|
+
logger_1.default.debug('Received text output item with empty text');
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
else {
|
|
756
|
+
// Log other output item types
|
|
757
|
+
logger_1.default.debug(`Received output item of type: ${message.item.type}`);
|
|
758
|
+
}
|
|
759
|
+
break;
|
|
760
|
+
case 'response.output_item.done':
|
|
761
|
+
logger_1.default.debug('Output item complete');
|
|
762
|
+
break;
|
|
763
|
+
case 'response.function_call_arguments.done':
|
|
764
|
+
// Find the function call in our pending list and update its arguments
|
|
765
|
+
const callIndex = pendingFunctionCalls.findIndex((call) => call.id === message.call_id);
|
|
766
|
+
if (callIndex !== -1) {
|
|
767
|
+
pendingFunctionCalls[callIndex].arguments = message.arguments;
|
|
768
|
+
}
|
|
769
|
+
break;
|
|
770
|
+
case 'response.done':
|
|
771
|
+
responseDone = true;
|
|
772
|
+
usage = message.response.usage;
|
|
773
|
+
// If there are pending function calls, process them
|
|
774
|
+
if (pendingFunctionCalls.length > 0 && this.config.functionCallHandler) {
|
|
775
|
+
for (const call of pendingFunctionCalls) {
|
|
776
|
+
try {
|
|
777
|
+
// Execute the function handler
|
|
778
|
+
const result = await this.config.functionCallHandler(call.name, call.arguments);
|
|
779
|
+
functionCallResults.push(result);
|
|
780
|
+
// Send the function call result back to the model
|
|
781
|
+
sendEvent({
|
|
782
|
+
type: 'conversation.item.create',
|
|
783
|
+
item: {
|
|
784
|
+
type: 'function_call_output',
|
|
785
|
+
call_id: call.id,
|
|
786
|
+
output: result,
|
|
787
|
+
},
|
|
788
|
+
});
|
|
789
|
+
}
|
|
790
|
+
catch (err) {
|
|
791
|
+
logger_1.default.error(`Error executing function ${call.name}: ${err}`);
|
|
792
|
+
// Send an error result back to the model
|
|
793
|
+
sendEvent({
|
|
794
|
+
type: 'conversation.item.create',
|
|
795
|
+
item: {
|
|
796
|
+
type: 'function_call_output',
|
|
797
|
+
call_id: call.id,
|
|
798
|
+
output: JSON.stringify({ error: String(err) }),
|
|
799
|
+
},
|
|
800
|
+
});
|
|
801
|
+
}
|
|
802
|
+
}
|
|
803
|
+
// Request a new response from the model using the function results
|
|
804
|
+
sendEvent({
|
|
805
|
+
type: 'response.create',
|
|
806
|
+
});
|
|
807
|
+
// Reset pending function calls - we've handled them
|
|
808
|
+
pendingFunctionCalls = [];
|
|
809
|
+
// Don't resolve the promise yet - wait for the final response
|
|
810
|
+
return;
|
|
811
|
+
}
|
|
812
|
+
// If no function calls or we've processed them all, close the connection
|
|
813
|
+
clearTimeout(timeout);
|
|
814
|
+
// Check if we have an empty response and try to diagnose the issue
|
|
815
|
+
if (responseText.length === 0) {
|
|
816
|
+
// Only log at debug level to prevent user-visible warnings
|
|
817
|
+
logger_1.default.debug('Empty response detected before resolving. Checking response message details');
|
|
818
|
+
logger_1.default.debug('Response message details: ' + JSON.stringify(message, null, 2));
|
|
819
|
+
// Try to extract any text content from the message as a fallback
|
|
820
|
+
if (message.response &&
|
|
821
|
+
message.response.content &&
|
|
822
|
+
Array.isArray(message.response.content)) {
|
|
823
|
+
const textContent = message.response.content.find((item) => item.type === 'text' && item.text && item.text.length > 0);
|
|
824
|
+
if (textContent) {
|
|
825
|
+
logger_1.default.debug(`Found text in response content, using as fallback: "${textContent.text}"`);
|
|
826
|
+
responseText = textContent.text;
|
|
827
|
+
}
|
|
828
|
+
else {
|
|
829
|
+
logger_1.default.debug('No fallback text content found in response message');
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
// If still empty, add a placeholder message to indicate the issue
|
|
833
|
+
if (responseText.length === 0) {
|
|
834
|
+
responseText = '[No response received from API]';
|
|
835
|
+
logger_1.default.debug('Using placeholder message for empty response');
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
ws.close();
|
|
839
|
+
// Prepare audio data if available
|
|
840
|
+
const finalAudioData = hasAudioContent
|
|
841
|
+
? Buffer.concat(audioContent).toString('base64')
|
|
842
|
+
: null;
|
|
843
|
+
resolve({
|
|
844
|
+
output: responseText,
|
|
845
|
+
tokenUsage: {
|
|
846
|
+
total: usage?.total_tokens || 0,
|
|
847
|
+
prompt: usage?.input_tokens || 0,
|
|
848
|
+
completion: usage?.output_tokens || 0,
|
|
849
|
+
cached: 0,
|
|
850
|
+
},
|
|
851
|
+
cached: false,
|
|
852
|
+
metadata: {
|
|
853
|
+
responseId,
|
|
854
|
+
messageId,
|
|
855
|
+
usage,
|
|
856
|
+
// Include audio data in metadata if available
|
|
857
|
+
...(hasAudioContent && {
|
|
858
|
+
audio: {
|
|
859
|
+
data: finalAudioData,
|
|
860
|
+
format: audioFormat,
|
|
861
|
+
},
|
|
862
|
+
}),
|
|
863
|
+
},
|
|
864
|
+
functionCallOccurred,
|
|
865
|
+
functionCallResults: functionCallResults.length > 0 ? functionCallResults : undefined,
|
|
866
|
+
});
|
|
867
|
+
break;
|
|
868
|
+
case 'rate_limits.updated':
|
|
869
|
+
// Store rate limits in metadata if needed
|
|
870
|
+
logger_1.default.debug(`Rate limits updated: ${JSON.stringify(message.rate_limits)}`);
|
|
871
|
+
break;
|
|
872
|
+
case 'error':
|
|
873
|
+
responseError = `Error: ${message.error.message}`;
|
|
874
|
+
logger_1.default.error(`WebSocket error: ${responseError} (${message.error.type})`);
|
|
875
|
+
// Always close on errors to prevent hanging connections
|
|
876
|
+
clearTimeout(timeout);
|
|
877
|
+
ws.close();
|
|
878
|
+
reject(new Error(responseError));
|
|
879
|
+
break;
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
catch (err) {
|
|
883
|
+
logger_1.default.error(`Error parsing WebSocket message: ${err}`);
|
|
884
|
+
clearTimeout(timeout);
|
|
885
|
+
ws.close();
|
|
886
|
+
reject(err);
|
|
887
|
+
}
|
|
888
|
+
});
|
|
889
|
+
ws.on('error', (err) => {
|
|
890
|
+
logger_1.default.error(`WebSocket error: ${err.message}`);
|
|
891
|
+
clearTimeout(timeout);
|
|
892
|
+
reject(err);
|
|
893
|
+
});
|
|
894
|
+
ws.on('close', (code, reason) => {
|
|
895
|
+
logger_1.default.debug(`WebSocket closed with code ${code}: ${reason}`);
|
|
896
|
+
clearTimeout(timeout);
|
|
897
|
+
// Provide more detailed error messages for common WebSocket close codes
|
|
898
|
+
if (code === 1006) {
|
|
899
|
+
logger_1.default.error('WebSocket connection closed abnormally - this often indicates a network or firewall issue');
|
|
900
|
+
}
|
|
901
|
+
else if (code === 1008) {
|
|
902
|
+
logger_1.default.error('WebSocket connection rejected due to policy violation (possibly wrong API key or permissions)');
|
|
903
|
+
}
|
|
904
|
+
else if (code === 403 || reason.includes('403')) {
|
|
905
|
+
logger_1.default.error('WebSocket connection received 403 Forbidden - verify API key permissions and rate limits');
|
|
906
|
+
}
|
|
907
|
+
// Only reject if we haven't received a completed response or error
|
|
908
|
+
const connectionClosedPrematurely = responseDone === false && responseError.length === 0;
|
|
909
|
+
if (connectionClosedPrematurely) {
|
|
910
|
+
reject(new Error(`WebSocket closed unexpectedly with code ${code}: ${reason}. This may indicate a networking issue, firewall restriction, or API access limitation.`));
|
|
911
|
+
}
|
|
912
|
+
});
|
|
913
|
+
});
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
exports.OpenAiRealtimeProvider = OpenAiRealtimeProvider;
|
|
917
|
+
OpenAiRealtimeProvider.OPENAI_REALTIME_MODELS = util_2.OPENAI_REALTIME_MODELS;
|
|
918
|
+
OpenAiRealtimeProvider.OPENAI_REALTIME_MODEL_NAMES = util_2.OPENAI_REALTIME_MODELS.map((model) => model.id);
|
|
919
|
+
//# sourceMappingURL=realtime.js.map
|