symposium 0.13.7 → 0.13.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Agent.js +3 -11
- package/Symposium.js +51 -0
- package/models/Whisper.js +4 -5
- package/package.json +1 -1
package/Agent.js
CHANGED
|
@@ -12,7 +12,6 @@ export default class Agent {
|
|
|
12
12
|
max_retries = 5;
|
|
13
13
|
callbacks = {};
|
|
14
14
|
utility = null;
|
|
15
|
-
transcription_model = null;
|
|
16
15
|
|
|
17
16
|
constructor(options) {
|
|
18
17
|
this.options = {
|
|
@@ -210,16 +209,9 @@ export default class Agent {
|
|
|
210
209
|
for (let message of thread.messages) {
|
|
211
210
|
for (let c of message.content) {
|
|
212
211
|
if (c.type === 'audio' && !model.supports_audio) {
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
throw new Error('Audio content must be base64 encoded');
|
|
217
|
-
|
|
218
|
-
if (!this.transcription_model)
|
|
219
|
-
this.transcription_model = Symposium.getModelByName(process.env.TRANSCRIPTION_MODEL);
|
|
220
|
-
|
|
221
|
-
const ext = c.content.mime === 'audio/mpeg' ? 'mp3' : 'wav';
|
|
222
|
-
const transcribed = await this.transcription_model.transcribe(this, thread, new File([Buffer.from(c.content.data, 'base64')], 'audio.' + ext, {type: c.content.type}));
|
|
212
|
+
const words = await this.getPromptWordsForTranscription(thread);
|
|
213
|
+
const prompt = words.length ? 'Possibili parole usate: ' + words.join(', ') : null;
|
|
214
|
+
const transcribed = await Symposium.transcribe(c.content, prompt);
|
|
223
215
|
c.type = 'text';
|
|
224
216
|
c.content = '[voice message] ' + transcribed;
|
|
225
217
|
}
|
package/Symposium.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
|
|
1
3
|
import Gpt35 from "./models/Gpt35.js";
|
|
2
4
|
import Gpt4 from "./models/Gpt4.js";
|
|
3
5
|
import Gpt4Turbo from "./models/Gpt4Turbo.js";
|
|
@@ -17,6 +19,7 @@ import DeepSeekReasoner from "./models/DeepSeekReasoner.js";
|
|
|
17
19
|
export default class Symposium {
|
|
18
20
|
static models = new Map();
|
|
19
21
|
static storage = null;
|
|
22
|
+
static transcription_model = null;
|
|
20
23
|
|
|
21
24
|
/*
|
|
22
25
|
* Storage must expose the following methods:
|
|
@@ -74,4 +77,52 @@ export default class Symposium {
|
|
|
74
77
|
|
|
75
78
|
return functions;
|
|
76
79
|
}
|
|
80
|
+
|
|
81
|
+
static async transcribe(audio, prompt = null) {
|
|
82
|
+
if (!process.env.TRANSCRIPTION_MODEL)
|
|
83
|
+
throw new Error('Transcription is not enabled');
|
|
84
|
+
|
|
85
|
+
let file;
|
|
86
|
+
switch (audio.type) {
|
|
87
|
+
case 'url':
|
|
88
|
+
if (!audio.url)
|
|
89
|
+
throw new Error('Audio URL is required');
|
|
90
|
+
|
|
91
|
+
if (audio.url.startsWith('/')) { // Local path
|
|
92
|
+
// Get with fs
|
|
93
|
+
if (!fs.existsSync(audio.url))
|
|
94
|
+
throw new Error('Audio file does not exist at the specified path: ' + audio.url);
|
|
95
|
+
|
|
96
|
+
file = fs.readFileSync(audio.url);
|
|
97
|
+
} else {
|
|
98
|
+
file = await fetch(audio.url).then(res => res.blob());
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
file = new File([file], 'audio.' + this.getExtFromMime(file.type), {type: file.type});
|
|
102
|
+
break;
|
|
103
|
+
|
|
104
|
+
case 'base64':
|
|
105
|
+
file = new File([Buffer.from(audio.data, 'base64')], 'audio.' + this.getExtFromMime(audio.type), {type: audio.type});
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (!this.transcription_model)
|
|
110
|
+
this.transcription_model = Symposium.getModelByName(process.env.TRANSCRIPTION_MODEL);
|
|
111
|
+
|
|
112
|
+
return this.transcription_model.transcribe(file, prompt);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
static getExtFromMime(mime) {
|
|
116
|
+
const mimeToExt = {
|
|
117
|
+
'audio/mpeg': 'mp3',
|
|
118
|
+
'audio/wav': 'wav',
|
|
119
|
+
'audio/ogg': 'ogg',
|
|
120
|
+
'audio/flac': 'flac',
|
|
121
|
+
'audio/aac': 'aac',
|
|
122
|
+
'audio/mp4': 'm4a',
|
|
123
|
+
'audio/webm': 'webm',
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
return mimeToExt[mime] || null;
|
|
127
|
+
}
|
|
77
128
|
}
|
package/models/Whisper.js
CHANGED
|
@@ -4,14 +4,13 @@ export default class Whisper extends OpenAIModel {
|
|
|
4
4
|
type = 'stt';
|
|
5
5
|
name = 'whisper';
|
|
6
6
|
|
|
7
|
-
async transcribe(
|
|
8
|
-
const words = await agent.getPromptWordsForTranscription(thread);
|
|
9
|
-
|
|
7
|
+
async transcribe(file, prompt = null) {
|
|
10
8
|
const response = await this.getOpenAi().audio.transcriptions.create({
|
|
11
|
-
file,
|
|
12
9
|
model: 'gpt-4o-transcribe',
|
|
13
|
-
|
|
10
|
+
file,
|
|
11
|
+
prompt,
|
|
14
12
|
});
|
|
13
|
+
|
|
15
14
|
return response.text;
|
|
16
15
|
}
|
|
17
16
|
}
|