@adminforth/agent 1.43.5 → 1.43.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agentResponseEvents.ts +10 -1
- package/build.log +4 -2
- package/custom/composables/agentAudio/utils.ts +205 -0
- package/custom/composables/useAgentAudio.ts +34 -178
- package/custom/conversation_area/TextRenderer.vue +2 -2
- package/custom/speech_recognition_frontend/MicrophoneButon.vue +5 -3
- package/custom/types.ts +3 -0
- package/dist/agentResponseEvents.js +4 -1
- package/dist/custom/composables/agentAudio/utils.ts +205 -0
- package/dist/custom/composables/useAgentAudio.ts +34 -178
- package/dist/custom/conversation_area/TextRenderer.vue +2 -2
- package/dist/custom/speech_recognition_frontend/MicrophoneButon.vue +5 -3
- package/dist/custom/types.ts +3 -0
- package/dist/index.js +2 -2
- package/index.ts +2 -2
- package/package.json +1 -1
package/agentResponseEvents.ts
CHANGED
|
@@ -145,12 +145,21 @@ export function createAgentEventStream(
|
|
|
145
145
|
});
|
|
146
146
|
},
|
|
147
147
|
|
|
148
|
-
audioStart(
|
|
148
|
+
audioStart(
|
|
149
|
+
mimeType: string,
|
|
150
|
+
format: string,
|
|
151
|
+
sampleRate: number,
|
|
152
|
+
channelCount: number,
|
|
153
|
+
bitsPerSample: number,
|
|
154
|
+
) {
|
|
149
155
|
stream.send({
|
|
150
156
|
type: "audio-start",
|
|
151
157
|
data: {
|
|
152
158
|
mimeType,
|
|
153
159
|
format,
|
|
160
|
+
sampleRate,
|
|
161
|
+
channelCount,
|
|
162
|
+
bitsPerSample,
|
|
154
163
|
},
|
|
155
164
|
});
|
|
156
165
|
},
|
package/build.log
CHANGED
|
@@ -18,6 +18,8 @@ custom/composables/
|
|
|
18
18
|
custom/composables/useAgentAudio.ts
|
|
19
19
|
custom/composables/useAgentStore.ts
|
|
20
20
|
custom/composables/useAgentTransitions.ts
|
|
21
|
+
custom/composables/agentAudio/
|
|
22
|
+
custom/composables/agentAudio/utils.ts
|
|
21
23
|
custom/composables/agentStore/
|
|
22
24
|
custom/composables/agentStore/constants.ts
|
|
23
25
|
custom/composables/agentStore/pageContext.ts
|
|
@@ -58,5 +60,5 @@ custom/speech_recognition_frontend/voiceActivityDetection.ts
|
|
|
58
60
|
custom/speech_recognition_frontend/types/
|
|
59
61
|
custom/speech_recognition_frontend/types/voice-activity-detection.d.ts
|
|
60
62
|
|
|
61
|
-
sent 1,
|
|
62
|
-
total size is 1,
|
|
63
|
+
sent 1,663,013 bytes received 883 bytes 3,327,792.00 bytes/sec
|
|
64
|
+
total size is 1,658,998 speedup is 1.00
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
const ctx = new AudioContext();
|
|
2
|
+
let standbySource: AudioBufferSourceNode | null = null;
|
|
3
|
+
|
|
4
|
+
const DEFAULT_PCM_SAMPLE_RATE = 24000;
|
|
5
|
+
const DEFAULT_PCM_CHANNEL_COUNT = 1;
|
|
6
|
+
const DEFAULT_PCM_BITS_PER_SAMPLE = 16;
|
|
7
|
+
|
|
8
|
+
export type ChatResponseAudioPlayback = {
|
|
9
|
+
nextStartTime: number;
|
|
10
|
+
activeSources: Set<AudioBufferSourceNode>;
|
|
11
|
+
pendingSourceCount: number;
|
|
12
|
+
pendingPcmBytes: Uint8Array;
|
|
13
|
+
isDone: boolean;
|
|
14
|
+
isStopped: boolean;
|
|
15
|
+
sampleRate: number;
|
|
16
|
+
channelCount: number;
|
|
17
|
+
bitsPerSample: number;
|
|
18
|
+
onEnded?: () => void;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
export async function unlockAudio() {
|
|
22
|
+
await ctx.resume();
|
|
23
|
+
|
|
24
|
+
const buffer = ctx.createBuffer(1, 1, 22050);
|
|
25
|
+
const source = ctx.createBufferSource();
|
|
26
|
+
|
|
27
|
+
source.buffer = buffer;
|
|
28
|
+
source.connect(ctx.destination);
|
|
29
|
+
source.start(0);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export async function startStandByAudio() {
|
|
33
|
+
const response = await fetch(
|
|
34
|
+
`/plugins/AdminForthAgentPlugin/agentAudio/agent-processing.mp3`
|
|
35
|
+
);
|
|
36
|
+
|
|
37
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
38
|
+
const audioBuffer = await ctx.decodeAudioData(arrayBuffer);
|
|
39
|
+
|
|
40
|
+
const source = ctx.createBufferSource();
|
|
41
|
+
standbySource = source;
|
|
42
|
+
|
|
43
|
+
source.buffer = audioBuffer;
|
|
44
|
+
source.connect(ctx.destination);
|
|
45
|
+
|
|
46
|
+
source.start();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function createChatResponseAudioPlayback(options: {
|
|
50
|
+
sampleRate?: number;
|
|
51
|
+
channelCount?: number;
|
|
52
|
+
bitsPerSample?: number;
|
|
53
|
+
onEnded?: () => void;
|
|
54
|
+
} = {}): ChatResponseAudioPlayback {
|
|
55
|
+
return {
|
|
56
|
+
nextStartTime: ctx.currentTime,
|
|
57
|
+
activeSources: new Set(),
|
|
58
|
+
pendingSourceCount: 0,
|
|
59
|
+
pendingPcmBytes: new Uint8Array(0),
|
|
60
|
+
isDone: false,
|
|
61
|
+
isStopped: false,
|
|
62
|
+
sampleRate: options.sampleRate ?? DEFAULT_PCM_SAMPLE_RATE,
|
|
63
|
+
channelCount: options.channelCount ?? DEFAULT_PCM_CHANNEL_COUNT,
|
|
64
|
+
bitsPerSample: options.bitsPerSample ?? DEFAULT_PCM_BITS_PER_SAMPLE,
|
|
65
|
+
onEnded: options.onEnded,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function playChatResponseCurrentChunks({
|
|
70
|
+
playback,
|
|
71
|
+
chunks,
|
|
72
|
+
}: {
|
|
73
|
+
playback: ChatResponseAudioPlayback;
|
|
74
|
+
chunks: ArrayBuffer[];
|
|
75
|
+
}) {
|
|
76
|
+
void ctx.resume().catch(() => undefined);
|
|
77
|
+
|
|
78
|
+
for (const chunk of chunks) {
|
|
79
|
+
if (playback.isStopped || !chunk.byteLength) {
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const pcmBytes = concatUint8Arrays(playback.pendingPcmBytes, new Uint8Array(chunk));
|
|
84
|
+
const bytesPerFrame = playback.channelCount * (playback.bitsPerSample / 8);
|
|
85
|
+
const alignedByteLength = pcmBytes.byteLength - (pcmBytes.byteLength % bytesPerFrame);
|
|
86
|
+
|
|
87
|
+
playback.pendingPcmBytes = pcmBytes.slice(alignedByteLength);
|
|
88
|
+
|
|
89
|
+
if (!alignedByteLength) {
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const audioBuffer = createAudioBufferFromPcmChunk(
|
|
94
|
+
playback,
|
|
95
|
+
pcmBytes.subarray(0, alignedByteLength)
|
|
96
|
+
);
|
|
97
|
+
const source = ctx.createBufferSource();
|
|
98
|
+
const startTime = Math.max(playback.nextStartTime, ctx.currentTime);
|
|
99
|
+
|
|
100
|
+
source.buffer = audioBuffer;
|
|
101
|
+
source.connect(ctx.destination);
|
|
102
|
+
|
|
103
|
+
playback.activeSources.add(source);
|
|
104
|
+
playback.pendingSourceCount += 1;
|
|
105
|
+
playback.nextStartTime = startTime + audioBuffer.duration;
|
|
106
|
+
|
|
107
|
+
source.addEventListener(
|
|
108
|
+
'ended',
|
|
109
|
+
() => {
|
|
110
|
+
playback.activeSources.delete(source);
|
|
111
|
+
playback.pendingSourceCount = Math.max(0, playback.pendingSourceCount - 1);
|
|
112
|
+
|
|
113
|
+
if (!playback.isStopped && playback.isDone && playback.pendingSourceCount === 0) {
|
|
114
|
+
playback.onEnded?.();
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
{ once: true }
|
|
118
|
+
);
|
|
119
|
+
|
|
120
|
+
source.start(startTime);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export function finishChatResponseAudio(playback: ChatResponseAudioPlayback | null) {
|
|
125
|
+
if (!playback || playback.isStopped) {
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
playback.isDone = true;
|
|
130
|
+
|
|
131
|
+
if (playback.pendingSourceCount === 0) {
|
|
132
|
+
playback.onEnded?.();
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export function stopChatResponseAudio(playback: ChatResponseAudioPlayback | null) {
|
|
137
|
+
if (!playback || playback.isStopped) {
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
playback.isStopped = true;
|
|
142
|
+
|
|
143
|
+
for (const source of playback.activeSources) {
|
|
144
|
+
source.stop();
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
playback.activeSources.clear();
|
|
148
|
+
playback.pendingSourceCount = 0;
|
|
149
|
+
playback.pendingPcmBytes = new Uint8Array(0);
|
|
150
|
+
playback.nextStartTime = ctx.currentTime;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function createAudioBufferFromPcmChunk(
|
|
154
|
+
playback: ChatResponseAudioPlayback,
|
|
155
|
+
chunk: Uint8Array
|
|
156
|
+
) {
|
|
157
|
+
if (playback.bitsPerSample !== DEFAULT_PCM_BITS_PER_SAMPLE) {
|
|
158
|
+
throw new Error(`Unsupported PCM bit depth: ${playback.bitsPerSample}`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const bytesPerSample = playback.bitsPerSample / 8;
|
|
162
|
+
const frameCount = chunk.byteLength / playback.channelCount / bytesPerSample;
|
|
163
|
+
const audioBuffer = ctx.createBuffer(
|
|
164
|
+
playback.channelCount,
|
|
165
|
+
frameCount,
|
|
166
|
+
playback.sampleRate
|
|
167
|
+
);
|
|
168
|
+
const pcm = new DataView(chunk.buffer, chunk.byteOffset, chunk.byteLength);
|
|
169
|
+
const channelData = Array.from(
|
|
170
|
+
{ length: playback.channelCount },
|
|
171
|
+
(_, channelIndex) => audioBuffer.getChannelData(channelIndex)
|
|
172
|
+
);
|
|
173
|
+
|
|
174
|
+
for (let frameIndex = 0; frameIndex < frameCount; frameIndex += 1) {
|
|
175
|
+
for (let channelIndex = 0; channelIndex < playback.channelCount; channelIndex += 1) {
|
|
176
|
+
const sampleOffset =
|
|
177
|
+
(frameIndex * playback.channelCount + channelIndex) * bytesPerSample;
|
|
178
|
+
const sample = pcm.getInt16(sampleOffset, true) / 32768;
|
|
179
|
+
|
|
180
|
+
channelData[channelIndex][frameIndex] = sample;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return audioBuffer;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function concatUint8Arrays(left: Uint8Array, right: Uint8Array) {
|
|
188
|
+
if (!left.byteLength) {
|
|
189
|
+
return right;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const combined = new Uint8Array(left.byteLength + right.byteLength);
|
|
193
|
+
|
|
194
|
+
combined.set(left, 0);
|
|
195
|
+
combined.set(right, left.byteLength);
|
|
196
|
+
|
|
197
|
+
return combined;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
export function endStandByAudio() {
|
|
201
|
+
if (standbySource) {
|
|
202
|
+
standbySource.stop();
|
|
203
|
+
standbySource = null;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
@@ -4,61 +4,36 @@ import { defineStore } from 'pinia';
|
|
|
4
4
|
import type { SpeechStreamEvent } from '../types';
|
|
5
5
|
import { ref } from 'vue';
|
|
6
6
|
import { getCurrentPageContext } from './agentStore/pageContext';
|
|
7
|
+
import {
|
|
8
|
+
createChatResponseAudioPlayback,
|
|
9
|
+
endStandByAudio,
|
|
10
|
+
finishChatResponseAudio,
|
|
11
|
+
playChatResponseCurrentChunks,
|
|
12
|
+
startStandByAudio,
|
|
13
|
+
stopChatResponseAudio,
|
|
14
|
+
unlockAudio,
|
|
15
|
+
} from './agentAudio/utils';
|
|
16
|
+
import type { ChatResponseAudioPlayback } from './agentAudio/utils';
|
|
7
17
|
|
|
8
|
-
type StreamingAudioState = {
|
|
9
|
-
mimeType: string;
|
|
10
|
-
mediaSource: MediaSource;
|
|
11
|
-
sourceBuffer: SourceBuffer | null;
|
|
12
|
-
pendingChunks: ArrayBuffer[];
|
|
13
|
-
hasStartedPlayback: boolean;
|
|
14
|
-
isDone: boolean;
|
|
15
|
-
};
|
|
16
|
-
|
|
17
|
-
let standByAudio: HTMLAudioElement | null = null;
|
|
18
18
|
let isStandByAudioPlaying = false;
|
|
19
|
+
let isAudioUnlocked = false;
|
|
19
20
|
async function playStandByAudio() {
|
|
20
|
-
if (!standByAudio) {
|
|
21
|
-
standByAudio = new Audio(`/plugins/AdminForthAgentPlugin/agentAudio/agent-processing.mp3`);
|
|
22
|
-
standByAudio.addEventListener('ended', () => {
|
|
23
|
-
if (!standByAudio.paused) {
|
|
24
|
-
restartStandByAudio();
|
|
25
|
-
}
|
|
26
|
-
});
|
|
27
|
-
}
|
|
28
|
-
standByAudio.currentTime = 0;
|
|
29
|
-
await standByAudio.play();
|
|
30
21
|
isStandByAudioPlaying = true;
|
|
22
|
+
await startStandByAudio();
|
|
31
23
|
}
|
|
32
24
|
|
|
33
25
|
function stopStandByAudio() {
|
|
34
|
-
|
|
35
|
-
return;
|
|
36
|
-
}
|
|
37
|
-
standByAudio.pause();
|
|
38
|
-
standByAudio.currentTime = 0;
|
|
26
|
+
endStandByAudio();
|
|
39
27
|
isStandByAudioPlaying = false;
|
|
40
28
|
}
|
|
41
29
|
|
|
42
|
-
function restartStandByAudio() {
|
|
43
|
-
if (standByAudio) {
|
|
44
|
-
standByAudio.currentTime = 0;
|
|
45
|
-
}
|
|
46
|
-
playStandByAudio();
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
|
|
50
30
|
export const useAgentAudio = defineStore('agentAudio', () => {
|
|
51
31
|
const agentStore = useAgentStore();
|
|
52
32
|
const agentAudioMode = ref<'transcribing' | 'streaming' | 'fetchingAudio' | 'playingAgentResponse' | 'readyToRespond' >('readyToRespond');
|
|
53
33
|
const isStreamingResponse = ref(false);
|
|
54
34
|
|
|
55
35
|
let currentAbortController: AbortController | null = null;
|
|
56
|
-
let
|
|
57
|
-
let currentAudio: HTMLAudioElement | null = null;
|
|
58
|
-
let currentAudioObjectUrl: string | null = null;
|
|
59
|
-
let currentStreamingAudio: StreamingAudioState | null = null;
|
|
60
|
-
let bufferedAudioChunks: ArrayBuffer[] = [];
|
|
61
|
-
let bufferedAudioMimeType = 'audio/mpeg';
|
|
36
|
+
let currentStreamingAudio: ChatResponseAudioPlayback | null = null;
|
|
62
37
|
let wasAudioResponseReceived = false;
|
|
63
38
|
|
|
64
39
|
function stopGenerationAndAudio() {
|
|
@@ -72,6 +47,10 @@ export const useAgentAudio = defineStore('agentAudio', () => {
|
|
|
72
47
|
}
|
|
73
48
|
|
|
74
49
|
async function sendAudioToServerAndHandleResponse(blob: Blob) {
|
|
50
|
+
if (!isAudioUnlocked) {
|
|
51
|
+
await unlockAudio();
|
|
52
|
+
isAudioUnlocked = true;
|
|
53
|
+
}
|
|
75
54
|
currentAbortController = new AbortController();
|
|
76
55
|
wasAudioResponseReceived = false;
|
|
77
56
|
const formData = new FormData();
|
|
@@ -192,7 +171,7 @@ export const useAgentAudio = defineStore('agentAudio', () => {
|
|
|
192
171
|
wasAudioResponseReceived = true;
|
|
193
172
|
isStreamingResponse.value = false;
|
|
194
173
|
agentAudioMode.value = 'fetchingAudio';
|
|
195
|
-
initializeAudioStream(event.data
|
|
174
|
+
initializeAudioStream(event.data);
|
|
196
175
|
agentAudioMode.value = 'playingAgentResponse';
|
|
197
176
|
return;
|
|
198
177
|
}
|
|
@@ -215,162 +194,39 @@ export const useAgentAudio = defineStore('agentAudio', () => {
|
|
|
215
194
|
}
|
|
216
195
|
}
|
|
217
196
|
|
|
218
|
-
|
|
219
|
-
isPlaying = value;
|
|
220
|
-
|
|
221
|
-
if (!currentAudio) {
|
|
222
|
-
return;
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
if (!isPlaying) {
|
|
226
|
-
currentAudio.pause();
|
|
227
|
-
currentAudio.currentTime = 0;
|
|
228
|
-
return;
|
|
229
|
-
}
|
|
230
|
-
agentAudioMode.value = 'playingAgentResponse';
|
|
231
|
-
await void currentAudio.play().catch((error) => {
|
|
232
|
-
console.error('Failed to play audio:', error);
|
|
233
|
-
});
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
function initializeAudioStream(mimeType: string) {
|
|
197
|
+
function initializeAudioStream(audioData: Extract<SpeechStreamEvent, { type: 'audio-start' }>['data']) {
|
|
237
198
|
stopCurrentAudioPlayback();
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
const mediaSource = new MediaSource();
|
|
245
|
-
currentAudioObjectUrl = URL.createObjectURL(mediaSource);
|
|
246
|
-
currentAudio = new Audio(currentAudioObjectUrl);
|
|
247
|
-
currentAudio.addEventListener('ended', handleAudioEnded, { once: true });
|
|
248
|
-
currentStreamingAudio = {
|
|
249
|
-
mimeType,
|
|
250
|
-
mediaSource,
|
|
251
|
-
sourceBuffer: null,
|
|
252
|
-
pendingChunks: [],
|
|
253
|
-
hasStartedPlayback: false,
|
|
254
|
-
isDone: false,
|
|
255
|
-
};
|
|
256
|
-
|
|
257
|
-
mediaSource.addEventListener('sourceopen', handleMediaSourceOpen, { once: true });
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
function handleMediaSourceOpen() {
|
|
261
|
-
if (!currentStreamingAudio) {
|
|
262
|
-
return;
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
try {
|
|
266
|
-
currentStreamingAudio.sourceBuffer = currentStreamingAudio.mediaSource.addSourceBuffer(currentStreamingAudio.mimeType);
|
|
267
|
-
currentStreamingAudio.sourceBuffer.mode = 'sequence';
|
|
268
|
-
currentStreamingAudio.sourceBuffer.addEventListener('updateend', flushStreamingAudioQueue);
|
|
269
|
-
flushStreamingAudioQueue();
|
|
270
|
-
} catch (error) {
|
|
271
|
-
console.error('Failed to initialize streaming audio playback:', error);
|
|
272
|
-
bufferedAudioChunks.push(...currentStreamingAudio.pendingChunks);
|
|
273
|
-
detachStreamingAudio();
|
|
274
|
-
destroyCurrentAudioElement();
|
|
275
|
-
}
|
|
199
|
+
currentStreamingAudio = createChatResponseAudioPlayback({
|
|
200
|
+
sampleRate: audioData.sampleRate,
|
|
201
|
+
channelCount: audioData.channelCount,
|
|
202
|
+
bitsPerSample: audioData.bitsPerSample,
|
|
203
|
+
onEnded: handleAudioEnded,
|
|
204
|
+
});
|
|
276
205
|
}
|
|
277
206
|
|
|
278
207
|
function appendAudioChunk(base64: string) {
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
return;
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
currentStreamingAudio.pendingChunks.push(chunk);
|
|
287
|
-
flushStreamingAudioQueue();
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
function flushStreamingAudioQueue() {
|
|
291
|
-
if (!currentStreamingAudio?.sourceBuffer || currentStreamingAudio.sourceBuffer.updating) {
|
|
292
|
-
return;
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
const nextChunk = currentStreamingAudio.pendingChunks.shift();
|
|
296
|
-
|
|
297
|
-
if (nextChunk) {
|
|
298
|
-
currentStreamingAudio.sourceBuffer.appendBuffer(nextChunk);
|
|
299
|
-
|
|
300
|
-
if (!currentStreamingAudio.hasStartedPlayback) {
|
|
301
|
-
currentStreamingAudio.hasStartedPlayback = true;
|
|
302
|
-
setIsPlaying(true);
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
return;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
if (currentStreamingAudio.isDone && currentStreamingAudio.mediaSource.readyState === 'open') {
|
|
309
|
-
currentStreamingAudio.mediaSource.endOfStream();
|
|
310
|
-
}
|
|
208
|
+
playChatResponseCurrentChunks({
|
|
209
|
+
playback: currentStreamingAudio!,
|
|
210
|
+
chunks: [base64ToArrayBuffer(base64)],
|
|
211
|
+
});
|
|
311
212
|
}
|
|
312
213
|
|
|
313
214
|
function finishAudioStream() {
|
|
314
|
-
|
|
315
|
-
currentStreamingAudio.isDone = true;
|
|
316
|
-
flushStreamingAudioQueue();
|
|
317
|
-
return;
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
if (!bufferedAudioChunks.length) {
|
|
321
|
-
return;
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
playAudioChunks(bufferedAudioChunks, bufferedAudioMimeType);
|
|
325
|
-
bufferedAudioChunks = [];
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
function detachStreamingAudio() {
|
|
329
|
-
if (currentStreamingAudio?.sourceBuffer) {
|
|
330
|
-
currentStreamingAudio.sourceBuffer.removeEventListener('updateend', flushStreamingAudioQueue);
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
currentStreamingAudio = null;
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
function destroyCurrentAudioElement() {
|
|
337
|
-
if (currentAudio) {
|
|
338
|
-
currentAudio.pause();
|
|
339
|
-
currentAudio.currentTime = 0;
|
|
340
|
-
currentAudio.src = '';
|
|
341
|
-
currentAudio.load();
|
|
342
|
-
currentAudio = null;
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
if (currentAudioObjectUrl) {
|
|
346
|
-
URL.revokeObjectURL(currentAudioObjectUrl);
|
|
347
|
-
currentAudioObjectUrl = null;
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
isPlaying = false;
|
|
215
|
+
finishChatResponseAudio(currentStreamingAudio);
|
|
351
216
|
}
|
|
352
217
|
|
|
353
218
|
function stopCurrentAudioPlayback(dontResetMode = false) {
|
|
354
219
|
stopStandByAudio();
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
detachStreamingAudio();
|
|
358
|
-
destroyCurrentAudioElement();
|
|
220
|
+
stopChatResponseAudio(currentStreamingAudio);
|
|
221
|
+
currentStreamingAudio = null;
|
|
359
222
|
if (!dontResetMode) {
|
|
360
223
|
setAudioModeReadyToRespond();
|
|
361
224
|
}
|
|
362
225
|
}
|
|
363
226
|
|
|
364
227
|
function handleAudioEnded() {
|
|
228
|
+
currentStreamingAudio = null;
|
|
365
229
|
setAudioModeReadyToRespond();
|
|
366
|
-
stopCurrentAudioPlayback();
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
function playAudioChunks(chunks: ArrayBuffer[], mimeType: string) {
|
|
370
|
-
currentAudioObjectUrl = URL.createObjectURL(new Blob(chunks, { type: mimeType }));
|
|
371
|
-
currentAudio = new Audio(currentAudioObjectUrl);
|
|
372
|
-
currentAudio.addEventListener('ended', handleAudioEnded, { once: true });
|
|
373
|
-
setIsPlaying(true);
|
|
374
230
|
}
|
|
375
231
|
|
|
376
232
|
function base64ToArrayBuffer(base64: string) {
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
<template>
|
|
2
2
|
<div
|
|
3
|
-
class="flex rounded-xl border border-gray-200 dark:border-gray-700"
|
|
3
|
+
class="flex rounded-xl border px-4 border-gray-200 dark:border-gray-700"
|
|
4
4
|
@click="handleMarkdownLinkClick"
|
|
5
5
|
:class="[
|
|
6
|
-
hasVegaLite ? 'w-full
|
|
6
|
+
hasVegaLite ? 'w-full my-2' : 'm-2',
|
|
7
7
|
props.role === 'user' ? 'bg-lightListTableHeading dark:bg-darkListTableHeading self-end max-w-[80%] mr-4'
|
|
8
8
|
: 'border-none self-start'
|
|
9
9
|
]"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
<template>
|
|
2
2
|
<div
|
|
3
3
|
class="absolute bottom-2 flex items-center justify-center z-10 gap-4"
|
|
4
|
-
:class="[!agentStore.isAudioChatMode ? 'right-16': agentStore.isMobile ? 'right-1/2 translate-x-1/2' : 'right-1/2 translate-x-
|
|
4
|
+
:class="[!agentStore.isAudioChatMode ? 'right-16': agentStore.isMobile ? 'right-1/2 translate-x-1/2' : 'right-1/2 translate-x-1/2']"
|
|
5
5
|
>
|
|
6
6
|
<button
|
|
7
7
|
v-if="isAudioChatMode && microphoneButtonMode === 'generating'"
|
|
@@ -9,9 +9,10 @@
|
|
|
9
9
|
@click="stopCurrentGeneration()"
|
|
10
10
|
>
|
|
11
11
|
<div class="w-3 h-3 bg-white rounded-full"/>
|
|
12
|
-
{{ $t('
|
|
12
|
+
{{ $t('Stop') }}
|
|
13
13
|
</button>
|
|
14
|
-
<button
|
|
14
|
+
<button
|
|
15
|
+
v-else
|
|
15
16
|
class="h-9 bg-lightPrimary dark:bg-darkPrimary
|
|
16
17
|
hover:opacity-90 rounded-full flex items-center justify-center
|
|
17
18
|
transition-all duration-300 ease-in-out overflow-hidden"
|
|
@@ -89,6 +90,7 @@ watch(agentAudioMode, async (newVal) => {
|
|
|
89
90
|
if(isAudioChatMode.value) {
|
|
90
91
|
microphoneButtonMode.value = 'listen';
|
|
91
92
|
await requestMicAndStartVAD(saidSomething, stopRecording, onAnySound);
|
|
93
|
+
agentAudio.playBeep(1000);
|
|
92
94
|
} else {
|
|
93
95
|
microphoneButtonMode.value = 'off';
|
|
94
96
|
}
|
package/custom/types.ts
CHANGED
|
@@ -96,12 +96,15 @@ export function createAgentEventStream(res, options = {}) {
|
|
|
96
96
|
},
|
|
97
97
|
});
|
|
98
98
|
},
|
|
99
|
-
audioStart(mimeType, format) {
|
|
99
|
+
audioStart(mimeType, format, sampleRate, channelCount, bitsPerSample) {
|
|
100
100
|
stream.send({
|
|
101
101
|
type: "audio-start",
|
|
102
102
|
data: {
|
|
103
103
|
mimeType,
|
|
104
104
|
format,
|
|
105
|
+
sampleRate,
|
|
106
|
+
channelCount,
|
|
107
|
+
bitsPerSample,
|
|
105
108
|
},
|
|
106
109
|
});
|
|
107
110
|
},
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
const ctx = new AudioContext();
|
|
2
|
+
let standbySource: AudioBufferSourceNode | null = null;
|
|
3
|
+
|
|
4
|
+
const DEFAULT_PCM_SAMPLE_RATE = 24000;
|
|
5
|
+
const DEFAULT_PCM_CHANNEL_COUNT = 1;
|
|
6
|
+
const DEFAULT_PCM_BITS_PER_SAMPLE = 16;
|
|
7
|
+
|
|
8
|
+
export type ChatResponseAudioPlayback = {
|
|
9
|
+
nextStartTime: number;
|
|
10
|
+
activeSources: Set<AudioBufferSourceNode>;
|
|
11
|
+
pendingSourceCount: number;
|
|
12
|
+
pendingPcmBytes: Uint8Array;
|
|
13
|
+
isDone: boolean;
|
|
14
|
+
isStopped: boolean;
|
|
15
|
+
sampleRate: number;
|
|
16
|
+
channelCount: number;
|
|
17
|
+
bitsPerSample: number;
|
|
18
|
+
onEnded?: () => void;
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
export async function unlockAudio() {
|
|
22
|
+
await ctx.resume();
|
|
23
|
+
|
|
24
|
+
const buffer = ctx.createBuffer(1, 1, 22050);
|
|
25
|
+
const source = ctx.createBufferSource();
|
|
26
|
+
|
|
27
|
+
source.buffer = buffer;
|
|
28
|
+
source.connect(ctx.destination);
|
|
29
|
+
source.start(0);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export async function startStandByAudio() {
|
|
33
|
+
const response = await fetch(
|
|
34
|
+
`/plugins/AdminForthAgentPlugin/agentAudio/agent-processing.mp3`
|
|
35
|
+
);
|
|
36
|
+
|
|
37
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
38
|
+
const audioBuffer = await ctx.decodeAudioData(arrayBuffer);
|
|
39
|
+
|
|
40
|
+
const source = ctx.createBufferSource();
|
|
41
|
+
standbySource = source;
|
|
42
|
+
|
|
43
|
+
source.buffer = audioBuffer;
|
|
44
|
+
source.connect(ctx.destination);
|
|
45
|
+
|
|
46
|
+
source.start();
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function createChatResponseAudioPlayback(options: {
|
|
50
|
+
sampleRate?: number;
|
|
51
|
+
channelCount?: number;
|
|
52
|
+
bitsPerSample?: number;
|
|
53
|
+
onEnded?: () => void;
|
|
54
|
+
} = {}): ChatResponseAudioPlayback {
|
|
55
|
+
return {
|
|
56
|
+
nextStartTime: ctx.currentTime,
|
|
57
|
+
activeSources: new Set(),
|
|
58
|
+
pendingSourceCount: 0,
|
|
59
|
+
pendingPcmBytes: new Uint8Array(0),
|
|
60
|
+
isDone: false,
|
|
61
|
+
isStopped: false,
|
|
62
|
+
sampleRate: options.sampleRate ?? DEFAULT_PCM_SAMPLE_RATE,
|
|
63
|
+
channelCount: options.channelCount ?? DEFAULT_PCM_CHANNEL_COUNT,
|
|
64
|
+
bitsPerSample: options.bitsPerSample ?? DEFAULT_PCM_BITS_PER_SAMPLE,
|
|
65
|
+
onEnded: options.onEnded,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function playChatResponseCurrentChunks({
|
|
70
|
+
playback,
|
|
71
|
+
chunks,
|
|
72
|
+
}: {
|
|
73
|
+
playback: ChatResponseAudioPlayback;
|
|
74
|
+
chunks: ArrayBuffer[];
|
|
75
|
+
}) {
|
|
76
|
+
void ctx.resume().catch(() => undefined);
|
|
77
|
+
|
|
78
|
+
for (const chunk of chunks) {
|
|
79
|
+
if (playback.isStopped || !chunk.byteLength) {
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const pcmBytes = concatUint8Arrays(playback.pendingPcmBytes, new Uint8Array(chunk));
|
|
84
|
+
const bytesPerFrame = playback.channelCount * (playback.bitsPerSample / 8);
|
|
85
|
+
const alignedByteLength = pcmBytes.byteLength - (pcmBytes.byteLength % bytesPerFrame);
|
|
86
|
+
|
|
87
|
+
playback.pendingPcmBytes = pcmBytes.slice(alignedByteLength);
|
|
88
|
+
|
|
89
|
+
if (!alignedByteLength) {
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const audioBuffer = createAudioBufferFromPcmChunk(
|
|
94
|
+
playback,
|
|
95
|
+
pcmBytes.subarray(0, alignedByteLength)
|
|
96
|
+
);
|
|
97
|
+
const source = ctx.createBufferSource();
|
|
98
|
+
const startTime = Math.max(playback.nextStartTime, ctx.currentTime);
|
|
99
|
+
|
|
100
|
+
source.buffer = audioBuffer;
|
|
101
|
+
source.connect(ctx.destination);
|
|
102
|
+
|
|
103
|
+
playback.activeSources.add(source);
|
|
104
|
+
playback.pendingSourceCount += 1;
|
|
105
|
+
playback.nextStartTime = startTime + audioBuffer.duration;
|
|
106
|
+
|
|
107
|
+
source.addEventListener(
|
|
108
|
+
'ended',
|
|
109
|
+
() => {
|
|
110
|
+
playback.activeSources.delete(source);
|
|
111
|
+
playback.pendingSourceCount = Math.max(0, playback.pendingSourceCount - 1);
|
|
112
|
+
|
|
113
|
+
if (!playback.isStopped && playback.isDone && playback.pendingSourceCount === 0) {
|
|
114
|
+
playback.onEnded?.();
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
{ once: true }
|
|
118
|
+
);
|
|
119
|
+
|
|
120
|
+
source.start(startTime);
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
export function finishChatResponseAudio(playback: ChatResponseAudioPlayback | null) {
|
|
125
|
+
if (!playback || playback.isStopped) {
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
playback.isDone = true;
|
|
130
|
+
|
|
131
|
+
if (playback.pendingSourceCount === 0) {
|
|
132
|
+
playback.onEnded?.();
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export function stopChatResponseAudio(playback: ChatResponseAudioPlayback | null) {
|
|
137
|
+
if (!playback || playback.isStopped) {
|
|
138
|
+
return;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
playback.isStopped = true;
|
|
142
|
+
|
|
143
|
+
for (const source of playback.activeSources) {
|
|
144
|
+
source.stop();
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
playback.activeSources.clear();
|
|
148
|
+
playback.pendingSourceCount = 0;
|
|
149
|
+
playback.pendingPcmBytes = new Uint8Array(0);
|
|
150
|
+
playback.nextStartTime = ctx.currentTime;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function createAudioBufferFromPcmChunk(
|
|
154
|
+
playback: ChatResponseAudioPlayback,
|
|
155
|
+
chunk: Uint8Array
|
|
156
|
+
) {
|
|
157
|
+
if (playback.bitsPerSample !== DEFAULT_PCM_BITS_PER_SAMPLE) {
|
|
158
|
+
throw new Error(`Unsupported PCM bit depth: ${playback.bitsPerSample}`);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const bytesPerSample = playback.bitsPerSample / 8;
|
|
162
|
+
const frameCount = chunk.byteLength / playback.channelCount / bytesPerSample;
|
|
163
|
+
const audioBuffer = ctx.createBuffer(
|
|
164
|
+
playback.channelCount,
|
|
165
|
+
frameCount,
|
|
166
|
+
playback.sampleRate
|
|
167
|
+
);
|
|
168
|
+
const pcm = new DataView(chunk.buffer, chunk.byteOffset, chunk.byteLength);
|
|
169
|
+
const channelData = Array.from(
|
|
170
|
+
{ length: playback.channelCount },
|
|
171
|
+
(_, channelIndex) => audioBuffer.getChannelData(channelIndex)
|
|
172
|
+
);
|
|
173
|
+
|
|
174
|
+
for (let frameIndex = 0; frameIndex < frameCount; frameIndex += 1) {
|
|
175
|
+
for (let channelIndex = 0; channelIndex < playback.channelCount; channelIndex += 1) {
|
|
176
|
+
const sampleOffset =
|
|
177
|
+
(frameIndex * playback.channelCount + channelIndex) * bytesPerSample;
|
|
178
|
+
const sample = pcm.getInt16(sampleOffset, true) / 32768;
|
|
179
|
+
|
|
180
|
+
channelData[channelIndex][frameIndex] = sample;
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return audioBuffer;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function concatUint8Arrays(left: Uint8Array, right: Uint8Array) {
|
|
188
|
+
if (!left.byteLength) {
|
|
189
|
+
return right;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
const combined = new Uint8Array(left.byteLength + right.byteLength);
|
|
193
|
+
|
|
194
|
+
combined.set(left, 0);
|
|
195
|
+
combined.set(right, left.byteLength);
|
|
196
|
+
|
|
197
|
+
return combined;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
export function endStandByAudio() {
|
|
201
|
+
if (standbySource) {
|
|
202
|
+
standbySource.stop();
|
|
203
|
+
standbySource = null;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
@@ -4,61 +4,36 @@ import { defineStore } from 'pinia';
|
|
|
4
4
|
import type { SpeechStreamEvent } from '../types';
|
|
5
5
|
import { ref } from 'vue';
|
|
6
6
|
import { getCurrentPageContext } from './agentStore/pageContext';
|
|
7
|
+
import {
|
|
8
|
+
createChatResponseAudioPlayback,
|
|
9
|
+
endStandByAudio,
|
|
10
|
+
finishChatResponseAudio,
|
|
11
|
+
playChatResponseCurrentChunks,
|
|
12
|
+
startStandByAudio,
|
|
13
|
+
stopChatResponseAudio,
|
|
14
|
+
unlockAudio,
|
|
15
|
+
} from './agentAudio/utils';
|
|
16
|
+
import type { ChatResponseAudioPlayback } from './agentAudio/utils';
|
|
7
17
|
|
|
8
|
-
type StreamingAudioState = {
|
|
9
|
-
mimeType: string;
|
|
10
|
-
mediaSource: MediaSource;
|
|
11
|
-
sourceBuffer: SourceBuffer | null;
|
|
12
|
-
pendingChunks: ArrayBuffer[];
|
|
13
|
-
hasStartedPlayback: boolean;
|
|
14
|
-
isDone: boolean;
|
|
15
|
-
};
|
|
16
|
-
|
|
17
|
-
let standByAudio: HTMLAudioElement | null = null;
|
|
18
18
|
let isStandByAudioPlaying = false;
|
|
19
|
+
let isAudioUnlocked = false;
|
|
19
20
|
async function playStandByAudio() {
|
|
20
|
-
if (!standByAudio) {
|
|
21
|
-
standByAudio = new Audio(`/plugins/AdminForthAgentPlugin/agentAudio/agent-processing.mp3`);
|
|
22
|
-
standByAudio.addEventListener('ended', () => {
|
|
23
|
-
if (!standByAudio.paused) {
|
|
24
|
-
restartStandByAudio();
|
|
25
|
-
}
|
|
26
|
-
});
|
|
27
|
-
}
|
|
28
|
-
standByAudio.currentTime = 0;
|
|
29
|
-
await standByAudio.play();
|
|
30
21
|
isStandByAudioPlaying = true;
|
|
22
|
+
await startStandByAudio();
|
|
31
23
|
}
|
|
32
24
|
|
|
33
25
|
function stopStandByAudio() {
|
|
34
|
-
|
|
35
|
-
return;
|
|
36
|
-
}
|
|
37
|
-
standByAudio.pause();
|
|
38
|
-
standByAudio.currentTime = 0;
|
|
26
|
+
endStandByAudio();
|
|
39
27
|
isStandByAudioPlaying = false;
|
|
40
28
|
}
|
|
41
29
|
|
|
42
|
-
function restartStandByAudio() {
|
|
43
|
-
if (standByAudio) {
|
|
44
|
-
standByAudio.currentTime = 0;
|
|
45
|
-
}
|
|
46
|
-
playStandByAudio();
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
|
|
50
30
|
export const useAgentAudio = defineStore('agentAudio', () => {
|
|
51
31
|
const agentStore = useAgentStore();
|
|
52
32
|
const agentAudioMode = ref<'transcribing' | 'streaming' | 'fetchingAudio' | 'playingAgentResponse' | 'readyToRespond' >('readyToRespond');
|
|
53
33
|
const isStreamingResponse = ref(false);
|
|
54
34
|
|
|
55
35
|
let currentAbortController: AbortController | null = null;
|
|
56
|
-
let
|
|
57
|
-
let currentAudio: HTMLAudioElement | null = null;
|
|
58
|
-
let currentAudioObjectUrl: string | null = null;
|
|
59
|
-
let currentStreamingAudio: StreamingAudioState | null = null;
|
|
60
|
-
let bufferedAudioChunks: ArrayBuffer[] = [];
|
|
61
|
-
let bufferedAudioMimeType = 'audio/mpeg';
|
|
36
|
+
let currentStreamingAudio: ChatResponseAudioPlayback | null = null;
|
|
62
37
|
let wasAudioResponseReceived = false;
|
|
63
38
|
|
|
64
39
|
function stopGenerationAndAudio() {
|
|
@@ -72,6 +47,10 @@ export const useAgentAudio = defineStore('agentAudio', () => {
|
|
|
72
47
|
}
|
|
73
48
|
|
|
74
49
|
async function sendAudioToServerAndHandleResponse(blob: Blob) {
|
|
50
|
+
if (!isAudioUnlocked) {
|
|
51
|
+
await unlockAudio();
|
|
52
|
+
isAudioUnlocked = true;
|
|
53
|
+
}
|
|
75
54
|
currentAbortController = new AbortController();
|
|
76
55
|
wasAudioResponseReceived = false;
|
|
77
56
|
const formData = new FormData();
|
|
@@ -192,7 +171,7 @@ export const useAgentAudio = defineStore('agentAudio', () => {
|
|
|
192
171
|
wasAudioResponseReceived = true;
|
|
193
172
|
isStreamingResponse.value = false;
|
|
194
173
|
agentAudioMode.value = 'fetchingAudio';
|
|
195
|
-
initializeAudioStream(event.data
|
|
174
|
+
initializeAudioStream(event.data);
|
|
196
175
|
agentAudioMode.value = 'playingAgentResponse';
|
|
197
176
|
return;
|
|
198
177
|
}
|
|
@@ -215,162 +194,39 @@ export const useAgentAudio = defineStore('agentAudio', () => {
|
|
|
215
194
|
}
|
|
216
195
|
}
|
|
217
196
|
|
|
218
|
-
|
|
219
|
-
isPlaying = value;
|
|
220
|
-
|
|
221
|
-
if (!currentAudio) {
|
|
222
|
-
return;
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
if (!isPlaying) {
|
|
226
|
-
currentAudio.pause();
|
|
227
|
-
currentAudio.currentTime = 0;
|
|
228
|
-
return;
|
|
229
|
-
}
|
|
230
|
-
agentAudioMode.value = 'playingAgentResponse';
|
|
231
|
-
await void currentAudio.play().catch((error) => {
|
|
232
|
-
console.error('Failed to play audio:', error);
|
|
233
|
-
});
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
function initializeAudioStream(mimeType: string) {
|
|
197
|
+
function initializeAudioStream(audioData: Extract<SpeechStreamEvent, { type: 'audio-start' }>['data']) {
|
|
237
198
|
stopCurrentAudioPlayback();
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
const mediaSource = new MediaSource();
|
|
245
|
-
currentAudioObjectUrl = URL.createObjectURL(mediaSource);
|
|
246
|
-
currentAudio = new Audio(currentAudioObjectUrl);
|
|
247
|
-
currentAudio.addEventListener('ended', handleAudioEnded, { once: true });
|
|
248
|
-
currentStreamingAudio = {
|
|
249
|
-
mimeType,
|
|
250
|
-
mediaSource,
|
|
251
|
-
sourceBuffer: null,
|
|
252
|
-
pendingChunks: [],
|
|
253
|
-
hasStartedPlayback: false,
|
|
254
|
-
isDone: false,
|
|
255
|
-
};
|
|
256
|
-
|
|
257
|
-
mediaSource.addEventListener('sourceopen', handleMediaSourceOpen, { once: true });
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
function handleMediaSourceOpen() {
|
|
261
|
-
if (!currentStreamingAudio) {
|
|
262
|
-
return;
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
try {
|
|
266
|
-
currentStreamingAudio.sourceBuffer = currentStreamingAudio.mediaSource.addSourceBuffer(currentStreamingAudio.mimeType);
|
|
267
|
-
currentStreamingAudio.sourceBuffer.mode = 'sequence';
|
|
268
|
-
currentStreamingAudio.sourceBuffer.addEventListener('updateend', flushStreamingAudioQueue);
|
|
269
|
-
flushStreamingAudioQueue();
|
|
270
|
-
} catch (error) {
|
|
271
|
-
console.error('Failed to initialize streaming audio playback:', error);
|
|
272
|
-
bufferedAudioChunks.push(...currentStreamingAudio.pendingChunks);
|
|
273
|
-
detachStreamingAudio();
|
|
274
|
-
destroyCurrentAudioElement();
|
|
275
|
-
}
|
|
199
|
+
currentStreamingAudio = createChatResponseAudioPlayback({
|
|
200
|
+
sampleRate: audioData.sampleRate,
|
|
201
|
+
channelCount: audioData.channelCount,
|
|
202
|
+
bitsPerSample: audioData.bitsPerSample,
|
|
203
|
+
onEnded: handleAudioEnded,
|
|
204
|
+
});
|
|
276
205
|
}
|
|
277
206
|
|
|
278
207
|
function appendAudioChunk(base64: string) {
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
return;
|
|
284
|
-
}
|
|
285
|
-
|
|
286
|
-
currentStreamingAudio.pendingChunks.push(chunk);
|
|
287
|
-
flushStreamingAudioQueue();
|
|
288
|
-
}
|
|
289
|
-
|
|
290
|
-
function flushStreamingAudioQueue() {
|
|
291
|
-
if (!currentStreamingAudio?.sourceBuffer || currentStreamingAudio.sourceBuffer.updating) {
|
|
292
|
-
return;
|
|
293
|
-
}
|
|
294
|
-
|
|
295
|
-
const nextChunk = currentStreamingAudio.pendingChunks.shift();
|
|
296
|
-
|
|
297
|
-
if (nextChunk) {
|
|
298
|
-
currentStreamingAudio.sourceBuffer.appendBuffer(nextChunk);
|
|
299
|
-
|
|
300
|
-
if (!currentStreamingAudio.hasStartedPlayback) {
|
|
301
|
-
currentStreamingAudio.hasStartedPlayback = true;
|
|
302
|
-
setIsPlaying(true);
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
return;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
if (currentStreamingAudio.isDone && currentStreamingAudio.mediaSource.readyState === 'open') {
|
|
309
|
-
currentStreamingAudio.mediaSource.endOfStream();
|
|
310
|
-
}
|
|
208
|
+
playChatResponseCurrentChunks({
|
|
209
|
+
playback: currentStreamingAudio!,
|
|
210
|
+
chunks: [base64ToArrayBuffer(base64)],
|
|
211
|
+
});
|
|
311
212
|
}
|
|
312
213
|
|
|
313
214
|
function finishAudioStream() {
|
|
314
|
-
|
|
315
|
-
currentStreamingAudio.isDone = true;
|
|
316
|
-
flushStreamingAudioQueue();
|
|
317
|
-
return;
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
if (!bufferedAudioChunks.length) {
|
|
321
|
-
return;
|
|
322
|
-
}
|
|
323
|
-
|
|
324
|
-
playAudioChunks(bufferedAudioChunks, bufferedAudioMimeType);
|
|
325
|
-
bufferedAudioChunks = [];
|
|
326
|
-
}
|
|
327
|
-
|
|
328
|
-
function detachStreamingAudio() {
|
|
329
|
-
if (currentStreamingAudio?.sourceBuffer) {
|
|
330
|
-
currentStreamingAudio.sourceBuffer.removeEventListener('updateend', flushStreamingAudioQueue);
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
currentStreamingAudio = null;
|
|
334
|
-
}
|
|
335
|
-
|
|
336
|
-
function destroyCurrentAudioElement() {
|
|
337
|
-
if (currentAudio) {
|
|
338
|
-
currentAudio.pause();
|
|
339
|
-
currentAudio.currentTime = 0;
|
|
340
|
-
currentAudio.src = '';
|
|
341
|
-
currentAudio.load();
|
|
342
|
-
currentAudio = null;
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
if (currentAudioObjectUrl) {
|
|
346
|
-
URL.revokeObjectURL(currentAudioObjectUrl);
|
|
347
|
-
currentAudioObjectUrl = null;
|
|
348
|
-
}
|
|
349
|
-
|
|
350
|
-
isPlaying = false;
|
|
215
|
+
finishChatResponseAudio(currentStreamingAudio);
|
|
351
216
|
}
|
|
352
217
|
|
|
353
218
|
function stopCurrentAudioPlayback(dontResetMode = false) {
|
|
354
219
|
stopStandByAudio();
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
detachStreamingAudio();
|
|
358
|
-
destroyCurrentAudioElement();
|
|
220
|
+
stopChatResponseAudio(currentStreamingAudio);
|
|
221
|
+
currentStreamingAudio = null;
|
|
359
222
|
if (!dontResetMode) {
|
|
360
223
|
setAudioModeReadyToRespond();
|
|
361
224
|
}
|
|
362
225
|
}
|
|
363
226
|
|
|
364
227
|
function handleAudioEnded() {
|
|
228
|
+
currentStreamingAudio = null;
|
|
365
229
|
setAudioModeReadyToRespond();
|
|
366
|
-
stopCurrentAudioPlayback();
|
|
367
|
-
}
|
|
368
|
-
|
|
369
|
-
function playAudioChunks(chunks: ArrayBuffer[], mimeType: string) {
|
|
370
|
-
currentAudioObjectUrl = URL.createObjectURL(new Blob(chunks, { type: mimeType }));
|
|
371
|
-
currentAudio = new Audio(currentAudioObjectUrl);
|
|
372
|
-
currentAudio.addEventListener('ended', handleAudioEnded, { once: true });
|
|
373
|
-
setIsPlaying(true);
|
|
374
230
|
}
|
|
375
231
|
|
|
376
232
|
function base64ToArrayBuffer(base64: string) {
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
<template>
|
|
2
2
|
<div
|
|
3
|
-
class="flex rounded-xl border border-gray-200 dark:border-gray-700"
|
|
3
|
+
class="flex rounded-xl border px-4 border-gray-200 dark:border-gray-700"
|
|
4
4
|
@click="handleMarkdownLinkClick"
|
|
5
5
|
:class="[
|
|
6
|
-
hasVegaLite ? 'w-full
|
|
6
|
+
hasVegaLite ? 'w-full my-2' : 'm-2',
|
|
7
7
|
props.role === 'user' ? 'bg-lightListTableHeading dark:bg-darkListTableHeading self-end max-w-[80%] mr-4'
|
|
8
8
|
: 'border-none self-start'
|
|
9
9
|
]"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
<template>
|
|
2
2
|
<div
|
|
3
3
|
class="absolute bottom-2 flex items-center justify-center z-10 gap-4"
|
|
4
|
-
:class="[!agentStore.isAudioChatMode ? 'right-16': agentStore.isMobile ? 'right-1/2 translate-x-1/2' : 'right-1/2 translate-x-
|
|
4
|
+
:class="[!agentStore.isAudioChatMode ? 'right-16': agentStore.isMobile ? 'right-1/2 translate-x-1/2' : 'right-1/2 translate-x-1/2']"
|
|
5
5
|
>
|
|
6
6
|
<button
|
|
7
7
|
v-if="isAudioChatMode && microphoneButtonMode === 'generating'"
|
|
@@ -9,9 +9,10 @@
|
|
|
9
9
|
@click="stopCurrentGeneration()"
|
|
10
10
|
>
|
|
11
11
|
<div class="w-3 h-3 bg-white rounded-full"/>
|
|
12
|
-
{{ $t('
|
|
12
|
+
{{ $t('Stop') }}
|
|
13
13
|
</button>
|
|
14
|
-
<button
|
|
14
|
+
<button
|
|
15
|
+
v-else
|
|
15
16
|
class="h-9 bg-lightPrimary dark:bg-darkPrimary
|
|
16
17
|
hover:opacity-90 rounded-full flex items-center justify-center
|
|
17
18
|
transition-all duration-300 ease-in-out overflow-hidden"
|
|
@@ -89,6 +90,7 @@ watch(agentAudioMode, async (newVal) => {
|
|
|
89
90
|
if(isAudioChatMode.value) {
|
|
90
91
|
microphoneButtonMode.value = 'listen';
|
|
91
92
|
await requestMicAndStartVAD(saidSomething, stopRecording, onAnySound);
|
|
93
|
+
agentAudio.playBeep(1000);
|
|
92
94
|
} else {
|
|
93
95
|
microphoneButtonMode.value = 'off';
|
|
94
96
|
}
|
package/dist/custom/types.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -478,10 +478,10 @@ export default class AdminForthAgentPlugin extends AdminForthPlugin {
|
|
|
478
478
|
text: sanitizeSpeechText(agentResponse.text),
|
|
479
479
|
stream: true,
|
|
480
480
|
streamFormat: "audio",
|
|
481
|
-
format: "
|
|
481
|
+
format: "pcm",
|
|
482
482
|
abortSignal,
|
|
483
483
|
});
|
|
484
|
-
stream.audioStart(speech.mimeType, speech.format);
|
|
484
|
+
stream.audioStart(speech.mimeType, speech.format, 24000, 1, 16);
|
|
485
485
|
const reader = speech.audioStream.getReader();
|
|
486
486
|
const cancelAudioStream = () => {
|
|
487
487
|
void reader.cancel().catch(() => undefined);
|
package/index.ts
CHANGED
|
@@ -544,11 +544,11 @@ export default class AdminForthAgentPlugin extends AdminForthPlugin {
|
|
|
544
544
|
text: sanitizeSpeechText(agentResponse.text),
|
|
545
545
|
stream: true,
|
|
546
546
|
streamFormat: "audio",
|
|
547
|
-
format: "
|
|
547
|
+
format: "pcm",
|
|
548
548
|
abortSignal,
|
|
549
549
|
});
|
|
550
550
|
|
|
551
|
-
stream.audioStart(speech.mimeType, speech.format);
|
|
551
|
+
stream.audioStart(speech.mimeType, speech.format, 24000, 1, 16);
|
|
552
552
|
|
|
553
553
|
const reader = speech.audioStream.getReader();
|
|
554
554
|
const cancelAudioStream = () => {
|