npm - symposium - Versions diffs - 0.13.5 → 0.13.7 - Mend

symposium 0.13.5 → 0.13.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/Agent.js CHANGED Viewed

@@ -12,6 +12,7 @@ export default class Agent {
 	max_retries = 5;
 	callbacks = {};
 	utility = null;
+	transcription_model = null;
 	constructor(options) {
 		this.options = {
@@ -205,6 +206,26 @@ export default class Agent {
 	async generateCompletion(thread, options = {}, retry_counter = 1) {
 		try {
 			const model = Symposium.getModelByName(thread.state.model);
+			for (let message of thread.messages) {
+				for (let c of message.content) {
+					if (c.type === 'audio' && !model.supports_audio) {
+						if (!process.env.TRANSCRIPTION_MODEL)
+							throw new Error('Audio support is not enabled for this model');
+						if (c.content.type !== 'base64')
+							throw new Error('Audio content must be base64 encoded');
+						if (!this.transcription_model)
+							this.transcription_model = Symposium.getModelByName(process.env.TRANSCRIPTION_MODEL);
+						const ext = c.content.mime === 'audio/mpeg' ? 'mp3' : 'wav';
+						const transcribed = await this.transcription_model.transcribe(this, thread, new File([Buffer.from(c.content.data, 'base64')], 'audio.' + ext, {type: c.content.type}));
+						c.type = 'text';
+						c.content = '[voice message] ' + transcribed;
+					}
+				}
+			}
 			const messages = await model.generate(thread, await this.getFunctions(), options);
 			return model.supports_functions ? messages : messages.map(m => this.parseFunctions(m));
 		} catch (error) {

package/Model.js CHANGED Viewed

@@ -6,6 +6,7 @@ export default class Model {
 	supports_functions = false;
 	supports_structured_output = false;
 	system_role_name = 'system';
+	supports_audio = false;
 	constructor() {
 		if (!this.label)

package/models/OpenAIModel.js CHANGED Viewed

@@ -142,7 +142,7 @@ export default class OpenAIModel extends Model {
 				case 'audio':
 					if (c.content.type !== 'base64')
 						throw new Error('Audio content must be base64 encoded for this model');
-					if (!c.content.mime.startsWith('audio/'))
+					if (!['audio/mpeg', 'audio/wav'].includes(c.content.mime))
 						throw new Error('Audio content must have a valid MIME type');
 					messages.push({
@@ -152,7 +152,7 @@ export default class OpenAIModel extends Model {
 								type: 'input_audio',
 								input_audio: {
 									data: c.content.data,
-									format: c.content.mime.substring(6), // Remove 'audio/' prefix
+									format: c.content.mime === 'audio/mpeg' ? 'mp3' : 'wav',
 								},
 							},
 						],

package/models/Whisper.js CHANGED Viewed

@@ -9,8 +9,8 @@ export default class Whisper extends OpenAIModel {
 		const response = await this.getOpenAi().audio.transcriptions.create({
 			file,
-			model: 'whisper-1',
-			prompt: words.join(', '),
+			model: 'gpt-4o-transcribe',
+			prompt: 'Possibili parole usate: ' + words.join(', '),
 		});
 		return response.text;
 	}

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "type": "module",
   "name": "symposium",
-  "version": "0.13.5",
+  "version": "0.13.7",
   "description": "Agents",
   "main": "index.js",
   "author": "Domenico Giambra",