@adminforth/agent 1.43.5 → 1.43.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -145,12 +145,21 @@ export function createAgentEventStream(
145
145
  });
146
146
  },
147
147
 
148
- audioStart(mimeType: string, format: string) {
148
+ audioStart(
149
+ mimeType: string,
150
+ format: string,
151
+ sampleRate: number,
152
+ channelCount: number,
153
+ bitsPerSample: number,
154
+ ) {
149
155
  stream.send({
150
156
  type: "audio-start",
151
157
  data: {
152
158
  mimeType,
153
159
  format,
160
+ sampleRate,
161
+ channelCount,
162
+ bitsPerSample,
154
163
  },
155
164
  });
156
165
  },
package/build.log CHANGED
@@ -18,6 +18,8 @@ custom/composables/
18
18
  custom/composables/useAgentAudio.ts
19
19
  custom/composables/useAgentStore.ts
20
20
  custom/composables/useAgentTransitions.ts
21
+ custom/composables/agentAudio/
22
+ custom/composables/agentAudio/utils.ts
21
23
  custom/composables/agentStore/
22
24
  custom/composables/agentStore/constants.ts
23
25
  custom/composables/agentStore/pageContext.ts
@@ -58,5 +60,5 @@ custom/speech_recognition_frontend/voiceActivityDetection.ts
58
60
  custom/speech_recognition_frontend/types/
59
61
  custom/speech_recognition_frontend/types/voice-activity-detection.d.ts
60
62
 
61
- sent 1,661,194 bytes received 860 bytes 3,324,108.00 bytes/sec
62
- total size is 1,657,278 speedup is 1.00
63
+ sent 1,663,013 bytes received 883 bytes 3,327,792.00 bytes/sec
64
+ total size is 1,658,998 speedup is 1.00
@@ -0,0 +1,205 @@
1
+ const ctx = new AudioContext();
2
+ let standbySource: AudioBufferSourceNode | null = null;
3
+
4
+ const DEFAULT_PCM_SAMPLE_RATE = 24000;
5
+ const DEFAULT_PCM_CHANNEL_COUNT = 1;
6
+ const DEFAULT_PCM_BITS_PER_SAMPLE = 16;
7
+
8
+ export type ChatResponseAudioPlayback = {
9
+ nextStartTime: number;
10
+ activeSources: Set<AudioBufferSourceNode>;
11
+ pendingSourceCount: number;
12
+ pendingPcmBytes: Uint8Array;
13
+ isDone: boolean;
14
+ isStopped: boolean;
15
+ sampleRate: number;
16
+ channelCount: number;
17
+ bitsPerSample: number;
18
+ onEnded?: () => void;
19
+ };
20
+
21
+ export async function unlockAudio() {
22
+ await ctx.resume();
23
+
24
+ const buffer = ctx.createBuffer(1, 1, 22050);
25
+ const source = ctx.createBufferSource();
26
+
27
+ source.buffer = buffer;
28
+ source.connect(ctx.destination);
29
+ source.start(0);
30
+ }
31
+
32
+ export async function startStandByAudio() {
33
+ const response = await fetch(
34
+ `/plugins/AdminForthAgentPlugin/agentAudio/agent-processing.mp3`
35
+ );
36
+
37
+ const arrayBuffer = await response.arrayBuffer();
38
+ const audioBuffer = await ctx.decodeAudioData(arrayBuffer);
39
+
40
+ const source = ctx.createBufferSource();
41
+ standbySource = source;
42
+
43
+ source.buffer = audioBuffer;
44
+ source.connect(ctx.destination);
45
+
46
+ source.start();
47
+ }
48
+
49
+ export function createChatResponseAudioPlayback(options: {
50
+ sampleRate?: number;
51
+ channelCount?: number;
52
+ bitsPerSample?: number;
53
+ onEnded?: () => void;
54
+ } = {}): ChatResponseAudioPlayback {
55
+ return {
56
+ nextStartTime: ctx.currentTime,
57
+ activeSources: new Set(),
58
+ pendingSourceCount: 0,
59
+ pendingPcmBytes: new Uint8Array(0),
60
+ isDone: false,
61
+ isStopped: false,
62
+ sampleRate: options.sampleRate ?? DEFAULT_PCM_SAMPLE_RATE,
63
+ channelCount: options.channelCount ?? DEFAULT_PCM_CHANNEL_COUNT,
64
+ bitsPerSample: options.bitsPerSample ?? DEFAULT_PCM_BITS_PER_SAMPLE,
65
+ onEnded: options.onEnded,
66
+ };
67
+ }
68
+
69
+ export function playChatResponseCurrentChunks({
70
+ playback,
71
+ chunks,
72
+ }: {
73
+ playback: ChatResponseAudioPlayback;
74
+ chunks: ArrayBuffer[];
75
+ }) {
76
+ void ctx.resume().catch(() => undefined);
77
+
78
+ for (const chunk of chunks) {
79
+ if (playback.isStopped || !chunk.byteLength) {
80
+ continue;
81
+ }
82
+
83
+ const pcmBytes = concatUint8Arrays(playback.pendingPcmBytes, new Uint8Array(chunk));
84
+ const bytesPerFrame = playback.channelCount * (playback.bitsPerSample / 8);
85
+ const alignedByteLength = pcmBytes.byteLength - (pcmBytes.byteLength % bytesPerFrame);
86
+
87
+ playback.pendingPcmBytes = pcmBytes.slice(alignedByteLength);
88
+
89
+ if (!alignedByteLength) {
90
+ continue;
91
+ }
92
+
93
+ const audioBuffer = createAudioBufferFromPcmChunk(
94
+ playback,
95
+ pcmBytes.subarray(0, alignedByteLength)
96
+ );
97
+ const source = ctx.createBufferSource();
98
+ const startTime = Math.max(playback.nextStartTime, ctx.currentTime);
99
+
100
+ source.buffer = audioBuffer;
101
+ source.connect(ctx.destination);
102
+
103
+ playback.activeSources.add(source);
104
+ playback.pendingSourceCount += 1;
105
+ playback.nextStartTime = startTime + audioBuffer.duration;
106
+
107
+ source.addEventListener(
108
+ 'ended',
109
+ () => {
110
+ playback.activeSources.delete(source);
111
+ playback.pendingSourceCount = Math.max(0, playback.pendingSourceCount - 1);
112
+
113
+ if (!playback.isStopped && playback.isDone && playback.pendingSourceCount === 0) {
114
+ playback.onEnded?.();
115
+ }
116
+ },
117
+ { once: true }
118
+ );
119
+
120
+ source.start(startTime);
121
+ }
122
+ }
123
+
124
+ export function finishChatResponseAudio(playback: ChatResponseAudioPlayback | null) {
125
+ if (!playback || playback.isStopped) {
126
+ return;
127
+ }
128
+
129
+ playback.isDone = true;
130
+
131
+ if (playback.pendingSourceCount === 0) {
132
+ playback.onEnded?.();
133
+ }
134
+ }
135
+
136
+ export function stopChatResponseAudio(playback: ChatResponseAudioPlayback | null) {
137
+ if (!playback || playback.isStopped) {
138
+ return;
139
+ }
140
+
141
+ playback.isStopped = true;
142
+
143
+ for (const source of playback.activeSources) {
144
+ source.stop();
145
+ }
146
+
147
+ playback.activeSources.clear();
148
+ playback.pendingSourceCount = 0;
149
+ playback.pendingPcmBytes = new Uint8Array(0);
150
+ playback.nextStartTime = ctx.currentTime;
151
+ }
152
+
153
+ function createAudioBufferFromPcmChunk(
154
+ playback: ChatResponseAudioPlayback,
155
+ chunk: Uint8Array
156
+ ) {
157
+ if (playback.bitsPerSample !== DEFAULT_PCM_BITS_PER_SAMPLE) {
158
+ throw new Error(`Unsupported PCM bit depth: ${playback.bitsPerSample}`);
159
+ }
160
+
161
+ const bytesPerSample = playback.bitsPerSample / 8;
162
+ const frameCount = chunk.byteLength / playback.channelCount / bytesPerSample;
163
+ const audioBuffer = ctx.createBuffer(
164
+ playback.channelCount,
165
+ frameCount,
166
+ playback.sampleRate
167
+ );
168
+ const pcm = new DataView(chunk.buffer, chunk.byteOffset, chunk.byteLength);
169
+ const channelData = Array.from(
170
+ { length: playback.channelCount },
171
+ (_, channelIndex) => audioBuffer.getChannelData(channelIndex)
172
+ );
173
+
174
+ for (let frameIndex = 0; frameIndex < frameCount; frameIndex += 1) {
175
+ for (let channelIndex = 0; channelIndex < playback.channelCount; channelIndex += 1) {
176
+ const sampleOffset =
177
+ (frameIndex * playback.channelCount + channelIndex) * bytesPerSample;
178
+ const sample = pcm.getInt16(sampleOffset, true) / 32768;
179
+
180
+ channelData[channelIndex][frameIndex] = sample;
181
+ }
182
+ }
183
+
184
+ return audioBuffer;
185
+ }
186
+
187
+ function concatUint8Arrays(left: Uint8Array, right: Uint8Array) {
188
+ if (!left.byteLength) {
189
+ return right;
190
+ }
191
+
192
+ const combined = new Uint8Array(left.byteLength + right.byteLength);
193
+
194
+ combined.set(left, 0);
195
+ combined.set(right, left.byteLength);
196
+
197
+ return combined;
198
+ }
199
+
200
+ export function endStandByAudio() {
201
+ if (standbySource) {
202
+ standbySource.stop();
203
+ standbySource = null;
204
+ }
205
+ }
@@ -4,61 +4,36 @@ import { defineStore } from 'pinia';
4
4
  import type { SpeechStreamEvent } from '../types';
5
5
  import { ref } from 'vue';
6
6
  import { getCurrentPageContext } from './agentStore/pageContext';
7
+ import {
8
+ createChatResponseAudioPlayback,
9
+ endStandByAudio,
10
+ finishChatResponseAudio,
11
+ playChatResponseCurrentChunks,
12
+ startStandByAudio,
13
+ stopChatResponseAudio,
14
+ unlockAudio,
15
+ } from './agentAudio/utils';
16
+ import type { ChatResponseAudioPlayback } from './agentAudio/utils';
7
17
 
8
- type StreamingAudioState = {
9
- mimeType: string;
10
- mediaSource: MediaSource;
11
- sourceBuffer: SourceBuffer | null;
12
- pendingChunks: ArrayBuffer[];
13
- hasStartedPlayback: boolean;
14
- isDone: boolean;
15
- };
16
-
17
- let standByAudio: HTMLAudioElement | null = null;
18
18
  let isStandByAudioPlaying = false;
19
+ let isAudioUnlocked = false;
19
20
  async function playStandByAudio() {
20
- if (!standByAudio) {
21
- standByAudio = new Audio(`/plugins/AdminForthAgentPlugin/agentAudio/agent-processing.mp3`);
22
- standByAudio.addEventListener('ended', () => {
23
- if (!standByAudio.paused) {
24
- restartStandByAudio();
25
- }
26
- });
27
- }
28
- standByAudio.currentTime = 0;
29
- await standByAudio.play();
30
21
  isStandByAudioPlaying = true;
22
+ await startStandByAudio();
31
23
  }
32
24
 
33
25
  function stopStandByAudio() {
34
- if (!standByAudio) {
35
- return;
36
- }
37
- standByAudio.pause();
38
- standByAudio.currentTime = 0;
26
+ endStandByAudio();
39
27
  isStandByAudioPlaying = false;
40
28
  }
41
29
 
42
- function restartStandByAudio() {
43
- if (standByAudio) {
44
- standByAudio.currentTime = 0;
45
- }
46
- playStandByAudio();
47
- }
48
-
49
-
50
30
  export const useAgentAudio = defineStore('agentAudio', () => {
51
31
  const agentStore = useAgentStore();
52
32
  const agentAudioMode = ref<'transcribing' | 'streaming' | 'fetchingAudio' | 'playingAgentResponse' | 'readyToRespond' >('readyToRespond');
53
33
  const isStreamingResponse = ref(false);
54
34
 
55
35
  let currentAbortController: AbortController | null = null;
56
- let isPlaying = false;
57
- let currentAudio: HTMLAudioElement | null = null;
58
- let currentAudioObjectUrl: string | null = null;
59
- let currentStreamingAudio: StreamingAudioState | null = null;
60
- let bufferedAudioChunks: ArrayBuffer[] = [];
61
- let bufferedAudioMimeType = 'audio/mpeg';
36
+ let currentStreamingAudio: ChatResponseAudioPlayback | null = null;
62
37
  let wasAudioResponseReceived = false;
63
38
 
64
39
  function stopGenerationAndAudio() {
@@ -72,6 +47,10 @@ export const useAgentAudio = defineStore('agentAudio', () => {
72
47
  }
73
48
 
74
49
  async function sendAudioToServerAndHandleResponse(blob: Blob) {
50
+ if (!isAudioUnlocked) {
51
+ await unlockAudio();
52
+ isAudioUnlocked = true;
53
+ }
75
54
  currentAbortController = new AbortController();
76
55
  wasAudioResponseReceived = false;
77
56
  const formData = new FormData();
@@ -192,7 +171,7 @@ export const useAgentAudio = defineStore('agentAudio', () => {
192
171
  wasAudioResponseReceived = true;
193
172
  isStreamingResponse.value = false;
194
173
  agentAudioMode.value = 'fetchingAudio';
195
- initializeAudioStream(event.data.mimeType);
174
+ initializeAudioStream(event.data);
196
175
  agentAudioMode.value = 'playingAgentResponse';
197
176
  return;
198
177
  }
@@ -215,162 +194,39 @@ export const useAgentAudio = defineStore('agentAudio', () => {
215
194
  }
216
195
  }
217
196
 
218
- async function setIsPlaying(value: boolean) {
219
- isPlaying = value;
220
-
221
- if (!currentAudio) {
222
- return;
223
- }
224
-
225
- if (!isPlaying) {
226
- currentAudio.pause();
227
- currentAudio.currentTime = 0;
228
- return;
229
- }
230
- agentAudioMode.value = 'playingAgentResponse';
231
- await void currentAudio.play().catch((error) => {
232
- console.error('Failed to play audio:', error);
233
- });
234
- }
235
-
236
- function initializeAudioStream(mimeType: string) {
197
+ function initializeAudioStream(audioData: Extract<SpeechStreamEvent, { type: 'audio-start' }>['data']) {
237
198
  stopCurrentAudioPlayback();
238
- bufferedAudioMimeType = mimeType;
239
-
240
- if (typeof MediaSource === 'undefined' || !MediaSource.isTypeSupported(mimeType)) {
241
- return;
242
- }
243
-
244
- const mediaSource = new MediaSource();
245
- currentAudioObjectUrl = URL.createObjectURL(mediaSource);
246
- currentAudio = new Audio(currentAudioObjectUrl);
247
- currentAudio.addEventListener('ended', handleAudioEnded, { once: true });
248
- currentStreamingAudio = {
249
- mimeType,
250
- mediaSource,
251
- sourceBuffer: null,
252
- pendingChunks: [],
253
- hasStartedPlayback: false,
254
- isDone: false,
255
- };
256
-
257
- mediaSource.addEventListener('sourceopen', handleMediaSourceOpen, { once: true });
258
- }
259
-
260
- function handleMediaSourceOpen() {
261
- if (!currentStreamingAudio) {
262
- return;
263
- }
264
-
265
- try {
266
- currentStreamingAudio.sourceBuffer = currentStreamingAudio.mediaSource.addSourceBuffer(currentStreamingAudio.mimeType);
267
- currentStreamingAudio.sourceBuffer.mode = 'sequence';
268
- currentStreamingAudio.sourceBuffer.addEventListener('updateend', flushStreamingAudioQueue);
269
- flushStreamingAudioQueue();
270
- } catch (error) {
271
- console.error('Failed to initialize streaming audio playback:', error);
272
- bufferedAudioChunks.push(...currentStreamingAudio.pendingChunks);
273
- detachStreamingAudio();
274
- destroyCurrentAudioElement();
275
- }
199
+ currentStreamingAudio = createChatResponseAudioPlayback({
200
+ sampleRate: audioData.sampleRate,
201
+ channelCount: audioData.channelCount,
202
+ bitsPerSample: audioData.bitsPerSample,
203
+ onEnded: handleAudioEnded,
204
+ });
276
205
  }
277
206
 
278
207
  function appendAudioChunk(base64: string) {
279
- const chunk = base64ToArrayBuffer(base64);
280
-
281
- if (!currentStreamingAudio) {
282
- bufferedAudioChunks.push(chunk);
283
- return;
284
- }
285
-
286
- currentStreamingAudio.pendingChunks.push(chunk);
287
- flushStreamingAudioQueue();
288
- }
289
-
290
- function flushStreamingAudioQueue() {
291
- if (!currentStreamingAudio?.sourceBuffer || currentStreamingAudio.sourceBuffer.updating) {
292
- return;
293
- }
294
-
295
- const nextChunk = currentStreamingAudio.pendingChunks.shift();
296
-
297
- if (nextChunk) {
298
- currentStreamingAudio.sourceBuffer.appendBuffer(nextChunk);
299
-
300
- if (!currentStreamingAudio.hasStartedPlayback) {
301
- currentStreamingAudio.hasStartedPlayback = true;
302
- setIsPlaying(true);
303
- }
304
-
305
- return;
306
- }
307
-
308
- if (currentStreamingAudio.isDone && currentStreamingAudio.mediaSource.readyState === 'open') {
309
- currentStreamingAudio.mediaSource.endOfStream();
310
- }
208
+ playChatResponseCurrentChunks({
209
+ playback: currentStreamingAudio!,
210
+ chunks: [base64ToArrayBuffer(base64)],
211
+ });
311
212
  }
312
213
 
313
214
  function finishAudioStream() {
314
- if (currentStreamingAudio) {
315
- currentStreamingAudio.isDone = true;
316
- flushStreamingAudioQueue();
317
- return;
318
- }
319
-
320
- if (!bufferedAudioChunks.length) {
321
- return;
322
- }
323
-
324
- playAudioChunks(bufferedAudioChunks, bufferedAudioMimeType);
325
- bufferedAudioChunks = [];
326
- }
327
-
328
- function detachStreamingAudio() {
329
- if (currentStreamingAudio?.sourceBuffer) {
330
- currentStreamingAudio.sourceBuffer.removeEventListener('updateend', flushStreamingAudioQueue);
331
- }
332
-
333
- currentStreamingAudio = null;
334
- }
335
-
336
- function destroyCurrentAudioElement() {
337
- if (currentAudio) {
338
- currentAudio.pause();
339
- currentAudio.currentTime = 0;
340
- currentAudio.src = '';
341
- currentAudio.load();
342
- currentAudio = null;
343
- }
344
-
345
- if (currentAudioObjectUrl) {
346
- URL.revokeObjectURL(currentAudioObjectUrl);
347
- currentAudioObjectUrl = null;
348
- }
349
-
350
- isPlaying = false;
215
+ finishChatResponseAudio(currentStreamingAudio);
351
216
  }
352
217
 
353
218
  function stopCurrentAudioPlayback(dontResetMode = false) {
354
219
  stopStandByAudio();
355
- bufferedAudioChunks = [];
356
- bufferedAudioMimeType = 'audio/mpeg';
357
- detachStreamingAudio();
358
- destroyCurrentAudioElement();
220
+ stopChatResponseAudio(currentStreamingAudio);
221
+ currentStreamingAudio = null;
359
222
  if (!dontResetMode) {
360
223
  setAudioModeReadyToRespond();
361
224
  }
362
225
  }
363
226
 
364
227
  function handleAudioEnded() {
228
+ currentStreamingAudio = null;
365
229
  setAudioModeReadyToRespond();
366
- stopCurrentAudioPlayback();
367
- }
368
-
369
- function playAudioChunks(chunks: ArrayBuffer[], mimeType: string) {
370
- currentAudioObjectUrl = URL.createObjectURL(new Blob(chunks, { type: mimeType }));
371
- currentAudio = new Audio(currentAudioObjectUrl);
372
- currentAudio.addEventListener('ended', handleAudioEnded, { once: true });
373
- setIsPlaying(true);
374
230
  }
375
231
 
376
232
  function base64ToArrayBuffer(base64: string) {
@@ -1,9 +1,9 @@
1
1
  <template>
2
2
  <div
3
- class="flex rounded-xl border border-gray-200 dark:border-gray-700"
3
+ class="flex rounded-xl border px-4 border-gray-200 dark:border-gray-700"
4
4
  @click="handleMarkdownLinkClick"
5
5
  :class="[
6
- hasVegaLite ? 'w-full px-6 my-2' : 'px-4 m-2',
6
+ hasVegaLite ? 'w-full my-2' : 'm-2',
7
7
  props.role === 'user' ? 'bg-lightListTableHeading dark:bg-darkListTableHeading self-end max-w-[80%] mr-4'
8
8
  : 'border-none self-start'
9
9
  ]"
@@ -1,7 +1,7 @@
1
1
  <template>
2
2
  <div
3
3
  class="absolute bottom-2 flex items-center justify-center z-10 gap-4"
4
- :class="[!agentStore.isAudioChatMode ? 'right-16': agentStore.isMobile ? 'right-1/2 translate-x-1/2' : 'right-1/2 translate-x-3/4']"
4
+ :class="[!agentStore.isAudioChatMode ? 'right-16': agentStore.isMobile ? 'right-1/2 translate-x-1/2' : 'right-1/2 translate-x-1/2']"
5
5
  >
6
6
  <button
7
7
  v-if="isAudioChatMode && microphoneButtonMode === 'generating'"
@@ -9,9 +9,10 @@
9
9
  @click="stopCurrentGeneration()"
10
10
  >
11
11
  <div class="w-3 h-3 bg-white rounded-full"/>
12
- {{ $t('Break') }}
12
+ {{ $t('Stop') }}
13
13
  </button>
14
- <button
14
+ <button
15
+ v-else
15
16
  class="h-9 bg-lightPrimary dark:bg-darkPrimary
16
17
  hover:opacity-90 rounded-full flex items-center justify-center
17
18
  transition-all duration-300 ease-in-out overflow-hidden"
@@ -89,6 +90,7 @@ watch(agentAudioMode, async (newVal) => {
89
90
  if(isAudioChatMode.value) {
90
91
  microphoneButtonMode.value = 'listen';
91
92
  await requestMicAndStartVAD(saidSomething, stopRecording, onAnySound);
93
+ agentAudio.playBeep(1000);
92
94
  } else {
93
95
  microphoneButtonMode.value = 'off';
94
96
  }
package/custom/types.ts CHANGED
@@ -74,6 +74,9 @@ export type SpeechStreamEvent =
74
74
  data: {
75
75
  mimeType: string;
76
76
  format: string;
77
+ sampleRate: number;
78
+ channelCount: number;
79
+ bitsPerSample: number;
77
80
  };
78
81
  }
79
82
  | {
@@ -96,12 +96,15 @@ export function createAgentEventStream(res, options = {}) {
96
96
  },
97
97
  });
98
98
  },
99
- audioStart(mimeType, format) {
99
+ audioStart(mimeType, format, sampleRate, channelCount, bitsPerSample) {
100
100
  stream.send({
101
101
  type: "audio-start",
102
102
  data: {
103
103
  mimeType,
104
104
  format,
105
+ sampleRate,
106
+ channelCount,
107
+ bitsPerSample,
105
108
  },
106
109
  });
107
110
  },
@@ -0,0 +1,205 @@
1
+ const ctx = new AudioContext();
2
+ let standbySource: AudioBufferSourceNode | null = null;
3
+
4
+ const DEFAULT_PCM_SAMPLE_RATE = 24000;
5
+ const DEFAULT_PCM_CHANNEL_COUNT = 1;
6
+ const DEFAULT_PCM_BITS_PER_SAMPLE = 16;
7
+
8
+ export type ChatResponseAudioPlayback = {
9
+ nextStartTime: number;
10
+ activeSources: Set<AudioBufferSourceNode>;
11
+ pendingSourceCount: number;
12
+ pendingPcmBytes: Uint8Array;
13
+ isDone: boolean;
14
+ isStopped: boolean;
15
+ sampleRate: number;
16
+ channelCount: number;
17
+ bitsPerSample: number;
18
+ onEnded?: () => void;
19
+ };
20
+
21
+ export async function unlockAudio() {
22
+ await ctx.resume();
23
+
24
+ const buffer = ctx.createBuffer(1, 1, 22050);
25
+ const source = ctx.createBufferSource();
26
+
27
+ source.buffer = buffer;
28
+ source.connect(ctx.destination);
29
+ source.start(0);
30
+ }
31
+
32
+ export async function startStandByAudio() {
33
+ const response = await fetch(
34
+ `/plugins/AdminForthAgentPlugin/agentAudio/agent-processing.mp3`
35
+ );
36
+
37
+ const arrayBuffer = await response.arrayBuffer();
38
+ const audioBuffer = await ctx.decodeAudioData(arrayBuffer);
39
+
40
+ const source = ctx.createBufferSource();
41
+ standbySource = source;
42
+
43
+ source.buffer = audioBuffer;
44
+ source.connect(ctx.destination);
45
+
46
+ source.start();
47
+ }
48
+
49
+ export function createChatResponseAudioPlayback(options: {
50
+ sampleRate?: number;
51
+ channelCount?: number;
52
+ bitsPerSample?: number;
53
+ onEnded?: () => void;
54
+ } = {}): ChatResponseAudioPlayback {
55
+ return {
56
+ nextStartTime: ctx.currentTime,
57
+ activeSources: new Set(),
58
+ pendingSourceCount: 0,
59
+ pendingPcmBytes: new Uint8Array(0),
60
+ isDone: false,
61
+ isStopped: false,
62
+ sampleRate: options.sampleRate ?? DEFAULT_PCM_SAMPLE_RATE,
63
+ channelCount: options.channelCount ?? DEFAULT_PCM_CHANNEL_COUNT,
64
+ bitsPerSample: options.bitsPerSample ?? DEFAULT_PCM_BITS_PER_SAMPLE,
65
+ onEnded: options.onEnded,
66
+ };
67
+ }
68
+
69
+ export function playChatResponseCurrentChunks({
70
+ playback,
71
+ chunks,
72
+ }: {
73
+ playback: ChatResponseAudioPlayback;
74
+ chunks: ArrayBuffer[];
75
+ }) {
76
+ void ctx.resume().catch(() => undefined);
77
+
78
+ for (const chunk of chunks) {
79
+ if (playback.isStopped || !chunk.byteLength) {
80
+ continue;
81
+ }
82
+
83
+ const pcmBytes = concatUint8Arrays(playback.pendingPcmBytes, new Uint8Array(chunk));
84
+ const bytesPerFrame = playback.channelCount * (playback.bitsPerSample / 8);
85
+ const alignedByteLength = pcmBytes.byteLength - (pcmBytes.byteLength % bytesPerFrame);
86
+
87
+ playback.pendingPcmBytes = pcmBytes.slice(alignedByteLength);
88
+
89
+ if (!alignedByteLength) {
90
+ continue;
91
+ }
92
+
93
+ const audioBuffer = createAudioBufferFromPcmChunk(
94
+ playback,
95
+ pcmBytes.subarray(0, alignedByteLength)
96
+ );
97
+ const source = ctx.createBufferSource();
98
+ const startTime = Math.max(playback.nextStartTime, ctx.currentTime);
99
+
100
+ source.buffer = audioBuffer;
101
+ source.connect(ctx.destination);
102
+
103
+ playback.activeSources.add(source);
104
+ playback.pendingSourceCount += 1;
105
+ playback.nextStartTime = startTime + audioBuffer.duration;
106
+
107
+ source.addEventListener(
108
+ 'ended',
109
+ () => {
110
+ playback.activeSources.delete(source);
111
+ playback.pendingSourceCount = Math.max(0, playback.pendingSourceCount - 1);
112
+
113
+ if (!playback.isStopped && playback.isDone && playback.pendingSourceCount === 0) {
114
+ playback.onEnded?.();
115
+ }
116
+ },
117
+ { once: true }
118
+ );
119
+
120
+ source.start(startTime);
121
+ }
122
+ }
123
+
124
+ export function finishChatResponseAudio(playback: ChatResponseAudioPlayback | null) {
125
+ if (!playback || playback.isStopped) {
126
+ return;
127
+ }
128
+
129
+ playback.isDone = true;
130
+
131
+ if (playback.pendingSourceCount === 0) {
132
+ playback.onEnded?.();
133
+ }
134
+ }
135
+
136
+ export function stopChatResponseAudio(playback: ChatResponseAudioPlayback | null) {
137
+ if (!playback || playback.isStopped) {
138
+ return;
139
+ }
140
+
141
+ playback.isStopped = true;
142
+
143
+ for (const source of playback.activeSources) {
144
+ source.stop();
145
+ }
146
+
147
+ playback.activeSources.clear();
148
+ playback.pendingSourceCount = 0;
149
+ playback.pendingPcmBytes = new Uint8Array(0);
150
+ playback.nextStartTime = ctx.currentTime;
151
+ }
152
+
153
+ function createAudioBufferFromPcmChunk(
154
+ playback: ChatResponseAudioPlayback,
155
+ chunk: Uint8Array
156
+ ) {
157
+ if (playback.bitsPerSample !== DEFAULT_PCM_BITS_PER_SAMPLE) {
158
+ throw new Error(`Unsupported PCM bit depth: ${playback.bitsPerSample}`);
159
+ }
160
+
161
+ const bytesPerSample = playback.bitsPerSample / 8;
162
+ const frameCount = chunk.byteLength / playback.channelCount / bytesPerSample;
163
+ const audioBuffer = ctx.createBuffer(
164
+ playback.channelCount,
165
+ frameCount,
166
+ playback.sampleRate
167
+ );
168
+ const pcm = new DataView(chunk.buffer, chunk.byteOffset, chunk.byteLength);
169
+ const channelData = Array.from(
170
+ { length: playback.channelCount },
171
+ (_, channelIndex) => audioBuffer.getChannelData(channelIndex)
172
+ );
173
+
174
+ for (let frameIndex = 0; frameIndex < frameCount; frameIndex += 1) {
175
+ for (let channelIndex = 0; channelIndex < playback.channelCount; channelIndex += 1) {
176
+ const sampleOffset =
177
+ (frameIndex * playback.channelCount + channelIndex) * bytesPerSample;
178
+ const sample = pcm.getInt16(sampleOffset, true) / 32768;
179
+
180
+ channelData[channelIndex][frameIndex] = sample;
181
+ }
182
+ }
183
+
184
+ return audioBuffer;
185
+ }
186
+
187
+ function concatUint8Arrays(left: Uint8Array, right: Uint8Array) {
188
+ if (!left.byteLength) {
189
+ return right;
190
+ }
191
+
192
+ const combined = new Uint8Array(left.byteLength + right.byteLength);
193
+
194
+ combined.set(left, 0);
195
+ combined.set(right, left.byteLength);
196
+
197
+ return combined;
198
+ }
199
+
200
+ export function endStandByAudio() {
201
+ if (standbySource) {
202
+ standbySource.stop();
203
+ standbySource = null;
204
+ }
205
+ }
@@ -4,61 +4,36 @@ import { defineStore } from 'pinia';
4
4
  import type { SpeechStreamEvent } from '../types';
5
5
  import { ref } from 'vue';
6
6
  import { getCurrentPageContext } from './agentStore/pageContext';
7
+ import {
8
+ createChatResponseAudioPlayback,
9
+ endStandByAudio,
10
+ finishChatResponseAudio,
11
+ playChatResponseCurrentChunks,
12
+ startStandByAudio,
13
+ stopChatResponseAudio,
14
+ unlockAudio,
15
+ } from './agentAudio/utils';
16
+ import type { ChatResponseAudioPlayback } from './agentAudio/utils';
7
17
 
8
- type StreamingAudioState = {
9
- mimeType: string;
10
- mediaSource: MediaSource;
11
- sourceBuffer: SourceBuffer | null;
12
- pendingChunks: ArrayBuffer[];
13
- hasStartedPlayback: boolean;
14
- isDone: boolean;
15
- };
16
-
17
- let standByAudio: HTMLAudioElement | null = null;
18
18
  let isStandByAudioPlaying = false;
19
+ let isAudioUnlocked = false;
19
20
  async function playStandByAudio() {
20
- if (!standByAudio) {
21
- standByAudio = new Audio(`/plugins/AdminForthAgentPlugin/agentAudio/agent-processing.mp3`);
22
- standByAudio.addEventListener('ended', () => {
23
- if (!standByAudio.paused) {
24
- restartStandByAudio();
25
- }
26
- });
27
- }
28
- standByAudio.currentTime = 0;
29
- await standByAudio.play();
30
21
  isStandByAudioPlaying = true;
22
+ await startStandByAudio();
31
23
  }
32
24
 
33
25
  function stopStandByAudio() {
34
- if (!standByAudio) {
35
- return;
36
- }
37
- standByAudio.pause();
38
- standByAudio.currentTime = 0;
26
+ endStandByAudio();
39
27
  isStandByAudioPlaying = false;
40
28
  }
41
29
 
42
- function restartStandByAudio() {
43
- if (standByAudio) {
44
- standByAudio.currentTime = 0;
45
- }
46
- playStandByAudio();
47
- }
48
-
49
-
50
30
  export const useAgentAudio = defineStore('agentAudio', () => {
51
31
  const agentStore = useAgentStore();
52
32
  const agentAudioMode = ref<'transcribing' | 'streaming' | 'fetchingAudio' | 'playingAgentResponse' | 'readyToRespond' >('readyToRespond');
53
33
  const isStreamingResponse = ref(false);
54
34
 
55
35
  let currentAbortController: AbortController | null = null;
56
- let isPlaying = false;
57
- let currentAudio: HTMLAudioElement | null = null;
58
- let currentAudioObjectUrl: string | null = null;
59
- let currentStreamingAudio: StreamingAudioState | null = null;
60
- let bufferedAudioChunks: ArrayBuffer[] = [];
61
- let bufferedAudioMimeType = 'audio/mpeg';
36
+ let currentStreamingAudio: ChatResponseAudioPlayback | null = null;
62
37
  let wasAudioResponseReceived = false;
63
38
 
64
39
  function stopGenerationAndAudio() {
@@ -72,6 +47,10 @@ export const useAgentAudio = defineStore('agentAudio', () => {
72
47
  }
73
48
 
74
49
  async function sendAudioToServerAndHandleResponse(blob: Blob) {
50
+ if (!isAudioUnlocked) {
51
+ await unlockAudio();
52
+ isAudioUnlocked = true;
53
+ }
75
54
  currentAbortController = new AbortController();
76
55
  wasAudioResponseReceived = false;
77
56
  const formData = new FormData();
@@ -192,7 +171,7 @@ export const useAgentAudio = defineStore('agentAudio', () => {
192
171
  wasAudioResponseReceived = true;
193
172
  isStreamingResponse.value = false;
194
173
  agentAudioMode.value = 'fetchingAudio';
195
- initializeAudioStream(event.data.mimeType);
174
+ initializeAudioStream(event.data);
196
175
  agentAudioMode.value = 'playingAgentResponse';
197
176
  return;
198
177
  }
@@ -215,162 +194,39 @@ export const useAgentAudio = defineStore('agentAudio', () => {
215
194
  }
216
195
  }
217
196
 
218
- async function setIsPlaying(value: boolean) {
219
- isPlaying = value;
220
-
221
- if (!currentAudio) {
222
- return;
223
- }
224
-
225
- if (!isPlaying) {
226
- currentAudio.pause();
227
- currentAudio.currentTime = 0;
228
- return;
229
- }
230
- agentAudioMode.value = 'playingAgentResponse';
231
- await void currentAudio.play().catch((error) => {
232
- console.error('Failed to play audio:', error);
233
- });
234
- }
235
-
236
- function initializeAudioStream(mimeType: string) {
197
+ function initializeAudioStream(audioData: Extract<SpeechStreamEvent, { type: 'audio-start' }>['data']) {
237
198
  stopCurrentAudioPlayback();
238
- bufferedAudioMimeType = mimeType;
239
-
240
- if (typeof MediaSource === 'undefined' || !MediaSource.isTypeSupported(mimeType)) {
241
- return;
242
- }
243
-
244
- const mediaSource = new MediaSource();
245
- currentAudioObjectUrl = URL.createObjectURL(mediaSource);
246
- currentAudio = new Audio(currentAudioObjectUrl);
247
- currentAudio.addEventListener('ended', handleAudioEnded, { once: true });
248
- currentStreamingAudio = {
249
- mimeType,
250
- mediaSource,
251
- sourceBuffer: null,
252
- pendingChunks: [],
253
- hasStartedPlayback: false,
254
- isDone: false,
255
- };
256
-
257
- mediaSource.addEventListener('sourceopen', handleMediaSourceOpen, { once: true });
258
- }
259
-
260
- function handleMediaSourceOpen() {
261
- if (!currentStreamingAudio) {
262
- return;
263
- }
264
-
265
- try {
266
- currentStreamingAudio.sourceBuffer = currentStreamingAudio.mediaSource.addSourceBuffer(currentStreamingAudio.mimeType);
267
- currentStreamingAudio.sourceBuffer.mode = 'sequence';
268
- currentStreamingAudio.sourceBuffer.addEventListener('updateend', flushStreamingAudioQueue);
269
- flushStreamingAudioQueue();
270
- } catch (error) {
271
- console.error('Failed to initialize streaming audio playback:', error);
272
- bufferedAudioChunks.push(...currentStreamingAudio.pendingChunks);
273
- detachStreamingAudio();
274
- destroyCurrentAudioElement();
275
- }
199
+ currentStreamingAudio = createChatResponseAudioPlayback({
200
+ sampleRate: audioData.sampleRate,
201
+ channelCount: audioData.channelCount,
202
+ bitsPerSample: audioData.bitsPerSample,
203
+ onEnded: handleAudioEnded,
204
+ });
276
205
  }
277
206
 
278
207
  function appendAudioChunk(base64: string) {
279
- const chunk = base64ToArrayBuffer(base64);
280
-
281
- if (!currentStreamingAudio) {
282
- bufferedAudioChunks.push(chunk);
283
- return;
284
- }
285
-
286
- currentStreamingAudio.pendingChunks.push(chunk);
287
- flushStreamingAudioQueue();
288
- }
289
-
290
- function flushStreamingAudioQueue() {
291
- if (!currentStreamingAudio?.sourceBuffer || currentStreamingAudio.sourceBuffer.updating) {
292
- return;
293
- }
294
-
295
- const nextChunk = currentStreamingAudio.pendingChunks.shift();
296
-
297
- if (nextChunk) {
298
- currentStreamingAudio.sourceBuffer.appendBuffer(nextChunk);
299
-
300
- if (!currentStreamingAudio.hasStartedPlayback) {
301
- currentStreamingAudio.hasStartedPlayback = true;
302
- setIsPlaying(true);
303
- }
304
-
305
- return;
306
- }
307
-
308
- if (currentStreamingAudio.isDone && currentStreamingAudio.mediaSource.readyState === 'open') {
309
- currentStreamingAudio.mediaSource.endOfStream();
310
- }
208
+ playChatResponseCurrentChunks({
209
+ playback: currentStreamingAudio!,
210
+ chunks: [base64ToArrayBuffer(base64)],
211
+ });
311
212
  }
312
213
 
313
214
  function finishAudioStream() {
314
- if (currentStreamingAudio) {
315
- currentStreamingAudio.isDone = true;
316
- flushStreamingAudioQueue();
317
- return;
318
- }
319
-
320
- if (!bufferedAudioChunks.length) {
321
- return;
322
- }
323
-
324
- playAudioChunks(bufferedAudioChunks, bufferedAudioMimeType);
325
- bufferedAudioChunks = [];
326
- }
327
-
328
- function detachStreamingAudio() {
329
- if (currentStreamingAudio?.sourceBuffer) {
330
- currentStreamingAudio.sourceBuffer.removeEventListener('updateend', flushStreamingAudioQueue);
331
- }
332
-
333
- currentStreamingAudio = null;
334
- }
335
-
336
- function destroyCurrentAudioElement() {
337
- if (currentAudio) {
338
- currentAudio.pause();
339
- currentAudio.currentTime = 0;
340
- currentAudio.src = '';
341
- currentAudio.load();
342
- currentAudio = null;
343
- }
344
-
345
- if (currentAudioObjectUrl) {
346
- URL.revokeObjectURL(currentAudioObjectUrl);
347
- currentAudioObjectUrl = null;
348
- }
349
-
350
- isPlaying = false;
215
+ finishChatResponseAudio(currentStreamingAudio);
351
216
  }
352
217
 
353
218
  function stopCurrentAudioPlayback(dontResetMode = false) {
354
219
  stopStandByAudio();
355
- bufferedAudioChunks = [];
356
- bufferedAudioMimeType = 'audio/mpeg';
357
- detachStreamingAudio();
358
- destroyCurrentAudioElement();
220
+ stopChatResponseAudio(currentStreamingAudio);
221
+ currentStreamingAudio = null;
359
222
  if (!dontResetMode) {
360
223
  setAudioModeReadyToRespond();
361
224
  }
362
225
  }
363
226
 
364
227
  function handleAudioEnded() {
228
+ currentStreamingAudio = null;
365
229
  setAudioModeReadyToRespond();
366
- stopCurrentAudioPlayback();
367
- }
368
-
369
- function playAudioChunks(chunks: ArrayBuffer[], mimeType: string) {
370
- currentAudioObjectUrl = URL.createObjectURL(new Blob(chunks, { type: mimeType }));
371
- currentAudio = new Audio(currentAudioObjectUrl);
372
- currentAudio.addEventListener('ended', handleAudioEnded, { once: true });
373
- setIsPlaying(true);
374
230
  }
375
231
 
376
232
  function base64ToArrayBuffer(base64: string) {
@@ -1,9 +1,9 @@
1
1
  <template>
2
2
  <div
3
- class="flex rounded-xl border border-gray-200 dark:border-gray-700"
3
+ class="flex rounded-xl border px-4 border-gray-200 dark:border-gray-700"
4
4
  @click="handleMarkdownLinkClick"
5
5
  :class="[
6
- hasVegaLite ? 'w-full px-6 my-2' : 'px-4 m-2',
6
+ hasVegaLite ? 'w-full my-2' : 'm-2',
7
7
  props.role === 'user' ? 'bg-lightListTableHeading dark:bg-darkListTableHeading self-end max-w-[80%] mr-4'
8
8
  : 'border-none self-start'
9
9
  ]"
@@ -1,7 +1,7 @@
1
1
  <template>
2
2
  <div
3
3
  class="absolute bottom-2 flex items-center justify-center z-10 gap-4"
4
- :class="[!agentStore.isAudioChatMode ? 'right-16': agentStore.isMobile ? 'right-1/2 translate-x-1/2' : 'right-1/2 translate-x-3/4']"
4
+ :class="[!agentStore.isAudioChatMode ? 'right-16': agentStore.isMobile ? 'right-1/2 translate-x-1/2' : 'right-1/2 translate-x-1/2']"
5
5
  >
6
6
  <button
7
7
  v-if="isAudioChatMode && microphoneButtonMode === 'generating'"
@@ -9,9 +9,10 @@
9
9
  @click="stopCurrentGeneration()"
10
10
  >
11
11
  <div class="w-3 h-3 bg-white rounded-full"/>
12
- {{ $t('Break') }}
12
+ {{ $t('Stop') }}
13
13
  </button>
14
- <button
14
+ <button
15
+ v-else
15
16
  class="h-9 bg-lightPrimary dark:bg-darkPrimary
16
17
  hover:opacity-90 rounded-full flex items-center justify-center
17
18
  transition-all duration-300 ease-in-out overflow-hidden"
@@ -89,6 +90,7 @@ watch(agentAudioMode, async (newVal) => {
89
90
  if(isAudioChatMode.value) {
90
91
  microphoneButtonMode.value = 'listen';
91
92
  await requestMicAndStartVAD(saidSomething, stopRecording, onAnySound);
93
+ agentAudio.playBeep(1000);
92
94
  } else {
93
95
  microphoneButtonMode.value = 'off';
94
96
  }
@@ -74,6 +74,9 @@ export type SpeechStreamEvent =
74
74
  data: {
75
75
  mimeType: string;
76
76
  format: string;
77
+ sampleRate: number;
78
+ channelCount: number;
79
+ bitsPerSample: number;
77
80
  };
78
81
  }
79
82
  | {
package/dist/index.js CHANGED
@@ -478,10 +478,10 @@ export default class AdminForthAgentPlugin extends AdminForthPlugin {
478
478
  text: sanitizeSpeechText(agentResponse.text),
479
479
  stream: true,
480
480
  streamFormat: "audio",
481
- format: "mp3",
481
+ format: "pcm",
482
482
  abortSignal,
483
483
  });
484
- stream.audioStart(speech.mimeType, speech.format);
484
+ stream.audioStart(speech.mimeType, speech.format, 24000, 1, 16);
485
485
  const reader = speech.audioStream.getReader();
486
486
  const cancelAudioStream = () => {
487
487
  void reader.cancel().catch(() => undefined);
package/index.ts CHANGED
@@ -544,11 +544,11 @@ export default class AdminForthAgentPlugin extends AdminForthPlugin {
544
544
  text: sanitizeSpeechText(agentResponse.text),
545
545
  stream: true,
546
546
  streamFormat: "audio",
547
- format: "mp3",
547
+ format: "pcm",
548
548
  abortSignal,
549
549
  });
550
550
 
551
- stream.audioStart(speech.mimeType, speech.format);
551
+ stream.audioStart(speech.mimeType, speech.format, 24000, 1, 16);
552
552
 
553
553
  const reader = speech.audioStream.getReader();
554
554
  const cancelAudioStream = () => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adminforth/agent",
3
- "version": "1.43.5",
3
+ "version": "1.43.7",
4
4
  "main": "dist/index.js",
5
5
  "types": "dist/index.d.ts",
6
6
  "type": "module",