symposium 0.13.5 → 0.13.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Agent.js +21 -0
- package/Model.js +1 -0
- package/models/OpenAIModel.js +2 -2
- package/models/Whisper.js +2 -2
- package/package.json +1 -1
package/Agent.js
CHANGED
|
@@ -12,6 +12,7 @@ export default class Agent {
|
|
|
12
12
|
max_retries = 5;
|
|
13
13
|
callbacks = {};
|
|
14
14
|
utility = null;
|
|
15
|
+
transcription_model = null;
|
|
15
16
|
|
|
16
17
|
constructor(options) {
|
|
17
18
|
this.options = {
|
|
@@ -205,6 +206,26 @@ export default class Agent {
|
|
|
205
206
|
async generateCompletion(thread, options = {}, retry_counter = 1) {
|
|
206
207
|
try {
|
|
207
208
|
const model = Symposium.getModelByName(thread.state.model);
|
|
209
|
+
|
|
210
|
+
for (let message of thread.messages) {
|
|
211
|
+
for (let c of message.content) {
|
|
212
|
+
if (c.type === 'audio' && !model.supports_audio) {
|
|
213
|
+
if (!process.env.TRANSCRIPTION_MODEL)
|
|
214
|
+
throw new Error('Audio support is not enabled for this model');
|
|
215
|
+
if (c.content.type !== 'base64')
|
|
216
|
+
throw new Error('Audio content must be base64 encoded');
|
|
217
|
+
|
|
218
|
+
if (!this.transcription_model)
|
|
219
|
+
this.transcription_model = Symposium.getModelByName(process.env.TRANSCRIPTION_MODEL);
|
|
220
|
+
|
|
221
|
+
const ext = c.content.mime === 'audio/mpeg' ? 'mp3' : 'wav';
|
|
222
|
+
const transcribed = await this.transcription_model.transcribe(this, thread, new File([Buffer.from(c.content.data, 'base64')], 'audio.' + ext, {type: c.content.type}));
|
|
223
|
+
c.type = 'text';
|
|
224
|
+
c.content = '[voice message] ' + transcribed;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
208
229
|
const messages = await model.generate(thread, await this.getFunctions(), options);
|
|
209
230
|
return model.supports_functions ? messages : messages.map(m => this.parseFunctions(m));
|
|
210
231
|
} catch (error) {
|
package/Model.js
CHANGED
package/models/OpenAIModel.js
CHANGED
|
@@ -142,7 +142,7 @@ export default class OpenAIModel extends Model {
|
|
|
142
142
|
case 'audio':
|
|
143
143
|
if (c.content.type !== 'base64')
|
|
144
144
|
throw new Error('Audio content must be base64 encoded for this model');
|
|
145
|
-
if (!c.content.mime
|
|
145
|
+
if (!['audio/mpeg', 'audio/wav'].includes(c.content.mime))
|
|
146
146
|
throw new Error('Audio content must have a valid MIME type');
|
|
147
147
|
|
|
148
148
|
messages.push({
|
|
@@ -152,7 +152,7 @@ export default class OpenAIModel extends Model {
|
|
|
152
152
|
type: 'input_audio',
|
|
153
153
|
input_audio: {
|
|
154
154
|
data: c.content.data,
|
|
155
|
-
format: c.content.mime
|
|
155
|
+
format: c.content.mime === 'audio/mpeg' ? 'mp3' : 'wav',
|
|
156
156
|
},
|
|
157
157
|
},
|
|
158
158
|
],
|
package/models/Whisper.js
CHANGED
|
@@ -9,8 +9,8 @@ export default class Whisper extends OpenAIModel {
|
|
|
9
9
|
|
|
10
10
|
const response = await this.getOpenAi().audio.transcriptions.create({
|
|
11
11
|
file,
|
|
12
|
-
model: '
|
|
13
|
-
prompt: words.join(', '),
|
|
12
|
+
model: 'gpt-4o-transcribe',
|
|
13
|
+
prompt: 'Possibili parole usate: ' + words.join(', '),
|
|
14
14
|
});
|
|
15
15
|
return response.text;
|
|
16
16
|
}
|