symposium 0.13.6 → 0.13.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Agent.js CHANGED
@@ -12,6 +12,7 @@ export default class Agent {
12
12
  max_retries = 5;
13
13
  callbacks = {};
14
14
  utility = null;
15
+ transcription_model = null;
15
16
 
16
17
  constructor(options) {
17
18
  this.options = {
@@ -205,6 +206,26 @@ export default class Agent {
205
206
  async generateCompletion(thread, options = {}, retry_counter = 1) {
206
207
  try {
207
208
  const model = Symposium.getModelByName(thread.state.model);
209
+
210
+ for (let message of thread.messages) {
211
+ for (let c of message.content) {
212
+ if (c.type === 'audio' && !model.supports_audio) {
213
+ if (!process.env.TRANSCRIPTION_MODEL)
214
+ throw new Error('Audio support is not enabled for this model');
215
+ if (c.content.type !== 'base64')
216
+ throw new Error('Audio content must be base64 encoded');
217
+
218
+ if (!this.transcription_model)
219
+ this.transcription_model = Symposium.getModelByName(process.env.TRANSCRIPTION_MODEL);
220
+
221
+ const ext = c.content.mime === 'audio/mpeg' ? 'mp3' : 'wav';
222
+ const transcribed = await this.transcription_model.transcribe(this, thread, new File([Buffer.from(c.content.data, 'base64')], 'audio.' + ext, {type: c.content.type}));
223
+ c.type = 'text';
224
+ c.content = '[voice message] ' + transcribed;
225
+ }
226
+ }
227
+ }
228
+
208
229
  const messages = await model.generate(thread, await this.getFunctions(), options);
209
230
  return model.supports_functions ? messages : messages.map(m => this.parseFunctions(m));
210
231
  } catch (error) {
package/Model.js CHANGED
@@ -6,6 +6,7 @@ export default class Model {
6
6
  supports_functions = false;
7
7
  supports_structured_output = false;
8
8
  system_role_name = 'system';
9
+ supports_audio = false;
9
10
 
10
11
  constructor() {
11
12
  if (!this.label)
package/models/Whisper.js CHANGED
@@ -9,8 +9,8 @@ export default class Whisper extends OpenAIModel {
9
9
 
10
10
  const response = await this.getOpenAi().audio.transcriptions.create({
11
11
  file,
12
- model: 'whisper-1',
13
- prompt: words.join(', '),
12
+ model: 'gpt-4o-transcribe',
13
+ prompt: 'Possibili parole usate: ' + words.join(', '),
14
14
  });
15
15
  return response.text;
16
16
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "type": "module",
3
3
  "name": "symposium",
4
- "version": "0.13.6",
4
+ "version": "0.13.7",
5
5
  "description": "Agents",
6
6
  "main": "index.js",
7
7
  "author": "Domenico Giambra",