@adminforth/agent 1.37.0 → 1.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/agent/languageDetect.ts +0 -8
  2. package/agent/simpleAgent.ts +5 -5
  3. package/agent/systemPrompt.ts +35 -4
  4. package/agent/toolCallEvents.ts +31 -2
  5. package/agent/tools/apiTool.ts +1 -1
  6. package/agentResponseEvents.ts +197 -0
  7. package/apiBasedTools.ts +118 -284
  8. package/build.log +12 -2
  9. package/custom/ChatSurface.vue +31 -21
  10. package/custom/composables/agentAudio/agent-processing.mp3 +0 -0
  11. package/custom/composables/agentStore/constants.ts +8 -1
  12. package/custom/composables/agentStore/useAgentSessions.ts +85 -12
  13. package/custom/composables/useAgentAudio.ts +392 -0
  14. package/custom/composables/useAgentStore.ts +52 -5
  15. package/custom/conversation_area/ConversationArea.vue +1 -1
  16. package/custom/conversation_area/MessageRenderer.vue +12 -1
  17. package/custom/conversation_area/SystemMessageRenderer.vue +28 -0
  18. package/custom/conversation_area/TextRenderer.vue +4 -3
  19. package/custom/conversation_area/ToolRenderer.vue +1 -1
  20. package/custom/package.json +2 -1
  21. package/custom/pnpm-lock.yaml +29 -0
  22. package/custom/speech_recognition_frontend/AudioLines.vue +97 -0
  23. package/custom/speech_recognition_frontend/MicrophoneButon.vue +157 -0
  24. package/custom/speech_recognition_frontend/types/voice-activity-detection.d.ts +22 -0
  25. package/custom/speech_recognition_frontend/voiceActivityDetection.ts +151 -0
  26. package/custom/types.ts +52 -2
  27. package/dist/agent/languageDetect.js +0 -6
  28. package/dist/agent/simpleAgent.js +4 -3
  29. package/dist/agent/systemPrompt.js +24 -3
  30. package/dist/agent/toolCallEvents.js +24 -2
  31. package/dist/agent/tools/apiTool.js +1 -1
  32. package/dist/agentResponseEvents.js +141 -0
  33. package/dist/apiBasedTools.js +95 -211
  34. package/dist/custom/ChatSurface.vue +31 -21
  35. package/dist/custom/composables/agentAudio/agent-processing.mp3 +0 -0
  36. package/dist/custom/composables/agentStore/constants.ts +8 -1
  37. package/dist/custom/composables/agentStore/useAgentSessions.ts +85 -12
  38. package/dist/custom/composables/useAgentAudio.ts +392 -0
  39. package/dist/custom/composables/useAgentStore.ts +52 -5
  40. package/dist/custom/conversation_area/ConversationArea.vue +1 -1
  41. package/dist/custom/conversation_area/MessageRenderer.vue +12 -1
  42. package/dist/custom/conversation_area/SystemMessageRenderer.vue +28 -0
  43. package/dist/custom/conversation_area/TextRenderer.vue +4 -3
  44. package/dist/custom/conversation_area/ToolRenderer.vue +1 -1
  45. package/dist/custom/package.json +2 -1
  46. package/dist/custom/pnpm-lock.yaml +29 -0
  47. package/dist/custom/speech_recognition_frontend/AudioLines.vue +97 -0
  48. package/dist/custom/speech_recognition_frontend/MicrophoneButon.vue +157 -0
  49. package/dist/custom/speech_recognition_frontend/types/voice-activity-detection.d.ts +22 -0
  50. package/dist/custom/speech_recognition_frontend/voiceActivityDetection.ts +151 -0
  51. package/dist/custom/types.ts +52 -2
  52. package/dist/index.js +290 -400
  53. package/index.ts +318 -492
  54. package/package.json +3 -2
  55. package/types.ts +1 -1
@@ -8,11 +8,15 @@
8
8
  :key="part.type"
9
9
  >
10
10
  <TextRenderer
11
- v-if="part.type === 'text'"
11
+ v-if="part.type === 'text' && !checkIfMessageSystemMessage(part.text ?? '')"
12
12
  :message="part.text"
13
13
  :role="props.message.role"
14
14
  :state="part.state ?? (props.message.role === 'user' ? 'done' : undefined)"
15
15
  />
16
+ <SystemMessageRenderer
17
+ v-else
18
+ :message="part.text"
19
+ />
16
20
  </template>
17
21
 
18
22
  </template>
@@ -25,9 +29,16 @@
25
29
  import type { IMessage } from '../types';
26
30
  import { getMessageParts } from '../utils';
27
31
  import ProcessingTimeline from './ProcessingTimeline.vue';
32
+ import SystemMessageRenderer from './SystemMessageRenderer.vue';
33
+ import { RESERVED_SYSTEM_MESSAGE_CONTENT } from '../composables/agentStore/constants';
28
34
 
29
35
  const props = defineProps<{
30
36
  message: IMessage
31
37
  isLastMessageInChat: boolean
32
38
  }>();
39
+
40
+ function checkIfMessageSystemMessage(message: IMessage): boolean {
41
+ const isReserved = Object.values(RESERVED_SYSTEM_MESSAGE_CONTENT).includes(message as RESERVED_SYSTEM_MESSAGE_CONTENT);
42
+ return isReserved;
43
+ }
33
44
  </script>
@@ -0,0 +1,28 @@
1
+ <template>
2
+ <p :class="`${TEXT_CLASSES}`" v-if="message === RESERVED_SYSTEM_MESSAGE_CONTENT.START_AUDIO_CHAT">
3
+ <IconPhoneSolid class="inline-block w-4 h-4 mr-1" />
4
+ {{$t('Audio chat started')}}
5
+ </p>
6
+ <p :class="`${TEXT_CLASSES}`" v-else-if="message === RESERVED_SYSTEM_MESSAGE_CONTENT.END_AUDIO_CHAT">
7
+ <IconPhoneHangupSolid class="inline-block w-4 h-4 mr-1" />
8
+ {{$t('Audio chat ended')}}
9
+ </p>
10
+ <p :class="`${TEXT_CLASSES}`" v-else-if="message === RESERVED_SYSTEM_MESSAGE_CONTENT.AGENT_RESPONSE_ABORTED">
11
+ {{$t('Agent response aborted')}}
12
+ </p>
13
+
14
+ </template>
15
+
16
+
17
+
18
+ <script setup lang="ts">
19
+ import { RESERVED_SYSTEM_MESSAGE_CONTENT } from '../composables/agentStore/constants';
20
+ import { IconPhoneSolid, IconPhoneHangupSolid } from '@iconify-prerendered/vue-flowbite';
21
+
22
+ const TEXT_CLASSES = 'text-center italic text-lightListTableHeadingText dark:text-darkListTableHeadingText flex items-center justify-center';
23
+
24
+ const props = defineProps<{
25
+ message: string
26
+ }>();
27
+
28
+ </script>
@@ -16,9 +16,9 @@
16
16
  :components="incremarkComponents"
17
17
  :incremark-options="incremarkOptions"
18
18
  />
19
- <p v-else class="text-red-500 py-2">
19
+ <!-- <p v-else class="text-red-500 py-2">
20
20
  {{ $t('No content to render') }}
21
- </p>
21
+ </p> -->
22
22
  </div>
23
23
  </template>
24
24
 
@@ -28,11 +28,12 @@
28
28
  import { useRouter } from 'vue-router';
29
29
  import { useAgentStore } from '../composables/useAgentStore';
30
30
  import { useCoreStore } from '@/stores/core';
31
+ import type { IMessage } from '../types';
31
32
 
32
33
  const props = defineProps<{
33
34
  message: string | undefined,
34
35
  state: string | undefined,
35
- role: 'user' | 'assistant'
36
+ role: IMessage['role']
36
37
  }>();
37
38
 
38
39
  const emit = defineEmits(['toggle-thoughts']);
@@ -109,7 +109,7 @@
109
109
  const hasToolSections = computed(() => toolSections.value.length > 0);
110
110
 
111
111
  onMounted(() => {
112
- if (toolRendererRef.value) {
112
+ if (toolRendererRef.value && props.data.toolInfo) {
113
113
  toolRendererInitialWidth.value = toolRendererRef.value.offsetWidth;
114
114
  }
115
115
  });
@@ -22,6 +22,7 @@
22
22
  "dompurify": "^3.3.3",
23
23
  "katex": "^0.16.45",
24
24
  "marked": "^18.0.0",
25
- "vega-embed": "^7.1.0"
25
+ "vega-embed": "^7.1.0",
26
+ "voice-activity-detection": "^0.0.5"
26
27
  }
27
28
  }
@@ -44,6 +44,9 @@ importers:
44
44
  vega-embed:
45
45
  specifier: ^7.1.0
46
46
  version: 7.1.0(vega-lite@6.4.2(vega@6.2.0))(vega@6.2.0)
47
+ voice-activity-detection:
48
+ specifier: ^0.0.5
49
+ version: 0.0.5
47
50
 
48
51
  packages:
49
52
 
@@ -263,6 +266,9 @@ packages:
263
266
  peerDependencies:
264
267
  zod: ^3.25.76 || ^4.1.8
265
268
 
269
+ analyser-frequency-average@1.0.0:
270
+ resolution: {integrity: sha512-Y8HRgDfMWpefR286IAT7w9WsZ2r2dLOAkUNz8SQgsTAM0GsM9SAAqr1psqOr1scN76cL0pfuNZoQTnuvdoM0RA==}
271
+
266
272
  ansi-regex@6.2.2:
267
273
  resolution: {integrity: sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==}
268
274
  engines: {node: '>=12'}
@@ -271,6 +277,9 @@ packages:
271
277
  resolution: {integrity: sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==}
272
278
  engines: {node: '>=12'}
273
279
 
280
+ audio-frequency-to-index@2.0.0:
281
+ resolution: {integrity: sha512-7zqlDEAsEkPB0ORRhjBlsK7KBZQtdgLLQcmemFD2V2KHPH4flqzDOheWl+U69K0P/LA7J/H5YBNzNWaoS/7WAQ==}
282
+
274
283
  ccount@2.0.1:
275
284
  resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==}
276
285
 
@@ -286,6 +295,9 @@ packages:
286
295
  character-reference-invalid@2.0.1:
287
296
  resolution: {integrity: sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==}
288
297
 
298
+ clamp@1.0.1:
299
+ resolution: {integrity: sha512-kgMuFyE78OC6Dyu3Dy7vcx4uy97EIbVxJB/B0eJ3bUNAkwdNcxYzgKltnyADiYwsR7SEqkkUPsEUT//OVS6XMA==}
300
+
289
301
  cliui@9.0.1:
290
302
  resolution: {integrity: sha512-k7ndgKhwoQveBL+/1tqGJYNz097I7WOvwbmmU2AR5+magtbjPWQTS1C5vzGkBC8Ym8UWRzfKUzUUqFLypY4Q+w==}
291
303
  engines: {node: '>=20'}
@@ -882,6 +894,9 @@ packages:
882
894
  vfile@6.0.3:
883
895
  resolution: {integrity: sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==}
884
896
 
897
+ voice-activity-detection@0.0.5:
898
+ resolution: {integrity: sha512-Ezq+k0cICI67XR/R9KvTRhJ7/u6U0Zk9I+8VNs3hHTAvB6cA3glCa+cHgCRWSW/SV6cRrufSQsV1YxpNtsQb1g==}
899
+
885
900
  vue@3.5.32:
886
901
  resolution: {integrity: sha512-vM4z4Q9tTafVfMAK7IVzmxg34rSzTFMyIe0UUEijUCkn9+23lj0WRfA83dg7eQZIUlgOSGrkViIaCfqSAUXsMw==}
887
902
  peerDependencies:
@@ -1187,10 +1202,18 @@ snapshots:
1187
1202
  '@opentelemetry/api': 1.9.0
1188
1203
  zod: 4.3.6
1189
1204
 
1205
+ analyser-frequency-average@1.0.0:
1206
+ dependencies:
1207
+ audio-frequency-to-index: 2.0.0
1208
+
1190
1209
  ansi-regex@6.2.2: {}
1191
1210
 
1192
1211
  ansi-styles@6.2.3: {}
1193
1212
 
1213
+ audio-frequency-to-index@2.0.0:
1214
+ dependencies:
1215
+ clamp: 1.0.1
1216
+
1194
1217
  ccount@2.0.1: {}
1195
1218
 
1196
1219
  character-entities-html4@2.1.0: {}
@@ -1201,6 +1224,8 @@ snapshots:
1201
1224
 
1202
1225
  character-reference-invalid@2.0.1: {}
1203
1226
 
1227
+ clamp@1.0.1: {}
1228
+
1204
1229
  cliui@9.0.1:
1205
1230
  dependencies:
1206
1231
  string-width: 7.2.0
@@ -2143,6 +2168,10 @@ snapshots:
2143
2168
  '@types/unist': 3.0.3
2144
2169
  vfile-message: 4.0.3
2145
2170
 
2171
+ voice-activity-detection@0.0.5:
2172
+ dependencies:
2173
+ analyser-frequency-average: 1.0.0
2174
+
2146
2175
  vue@3.5.32:
2147
2176
  dependencies:
2148
2177
  '@vue/compiler-dom': 3.5.32
@@ -0,0 +1,97 @@
1
+ <template>
2
+ <div
3
+ class=" bg-white w-[0.2rem] rounded-sm transition-all duration-300 ease-in-out"
4
+ :class="{
5
+ 'recordingAnimation1' : showAnimation,
6
+ 'h-2': !isRecording,
7
+ 'h-1': isRecording,
8
+ }"
9
+ />
10
+ <div
11
+ class=" bg-white w-[0.2rem] rounded-sm transition-all duration-300 ease-in-out"
12
+ :class="{
13
+ 'recordingAnimation2' : showAnimation,
14
+ 'h-4': !isRecording,
15
+ 'h-1': isRecording,
16
+ }"
17
+ />
18
+ <div
19
+ class=" bg-white w-[0.2rem] rounded-sm transition-all duration-300 ease-in-out"
20
+ :class="{
21
+ 'recordingAnimation3' : showAnimation,
22
+ 'h-3': !isRecording,
23
+ 'h-1': isRecording,
24
+ }"
25
+ />
26
+ <div
27
+ class=" bg-white w-[0.2rem] rounded-sm transition-all duration-300 ease-in-out"
28
+ :class="{
29
+ 'recordingAnimation4' : showAnimation,
30
+ 'h-2': !isRecording,
31
+ 'h-1': isRecording,
32
+ }"
33
+ />
34
+ <template v-if="isRecording">
35
+ <div
36
+ class=" bg-white w-[0.2rem] rounded-sm h-1 transition-all duration-300 ease-in-out"
37
+ :class="{
38
+ 'recordingAnimation5' : showAnimation,
39
+ }"
40
+ />
41
+ <p class="text-white ml-2">End</p>
42
+ </template>
43
+ </template>
44
+
45
+
46
+
47
+ <script setup lang="ts">
48
+
49
+ const props = defineProps<{
50
+ showAnimation: boolean;
51
+ isRecording: boolean;
52
+ }>();
53
+
54
+ </script>
55
+
56
+ <style scoped lang="scss">
57
+ .recordingAnimation1 {
58
+ animation: recordingAnimation 1s infinite;
59
+ height: 0.3rem;
60
+ }
61
+
62
+ .recordingAnimation2 {
63
+ animation: recordingAnimation 1s infinite;
64
+ animation-delay: 0.2s;
65
+ height: 0.5rem;
66
+ }
67
+
68
+ .recordingAnimation3 {
69
+ animation: recordingAnimation 1s infinite;
70
+ animation-delay: 0.4s;
71
+ height: 0.4rem;
72
+ }
73
+
74
+ .recordingAnimation4 {
75
+ animation: recordingAnimation 1s infinite;
76
+ animation-delay: 0.6s;
77
+ height: 0.5rem;
78
+ }
79
+
80
+ .recordingAnimation5 {
81
+ animation: recordingAnimation 1s infinite;
82
+ animation-delay: 0.8s;
83
+ height: 0.3rem;
84
+ }
85
+
86
+ @keyframes recordingAnimation {
87
+ 0% {
88
+ transform: scaleY(1);
89
+ }
90
+ 50% {
91
+ transform: scaleY(2);
92
+ }
93
+ 100% {
94
+ transform: scaleY(1);
95
+ }
96
+ }
97
+ </style>
@@ -0,0 +1,157 @@
1
+ <template>
2
+ <button
3
+ class="absolute bottom-2 h-9 bg-lightPrimary dark:bg-darkPrimary
4
+ hover:opacity-90 rounded-full flex items-center justify-center
5
+ transition-all duration-300 ease-in-out overflow-hidden"
6
+ :class="[isAudioChatMode ? 'w-32 px-2': 'w-9', !agentStore.isAudioChatMode ? 'right-16': 'right-1/2 translate-x-1/2']"
7
+ @click="toggleChatMode"
8
+ >
9
+ <div class="w-5 h-5 flex items-center justify-center">
10
+ <div v-if="microphoneButtonMode === 'listen' || microphoneButtonMode === 'off'" class="flex justify-evenly items-center gap-[0.1rem]">
11
+ <AudioLines :showAnimation="showAudioWavesAnimation" :isRecording="microphoneButtonMode === 'listen'" />
12
+ </div>
13
+ <div v-else-if="microphoneButtonMode === 'generating'" class="flex items-center justify-center gap-2 text-white text-sm">
14
+ <span class="w-3 h-3 bg-white rounded-sm" />
15
+ {{ $t('Stop') }}
16
+ </div>
17
+ <Spinner v-else class="w-4 h-4 text-lightButtonsText dark:text-darkButtonsText fill-lightButtonsBackground dark:fill-darkPrimary" />
18
+ </div>
19
+
20
+ </button>
21
+ </template>
22
+
23
+
24
+ <script setup lang="ts">
25
+ import { computed, onMounted, onBeforeUnmount, ref, watch } from 'vue';
26
+ import debounce from 'lodash/debounce';
27
+ import { requestMicAndStartVAD, stopUserMedia, getRecorder, CALIBRATION_DURATION } from './voiceActivityDetection';
28
+ import { Spinner } from '@/afcl'
29
+ import { storeToRefs } from 'pinia';
30
+ import { useAgentStore } from '../composables/useAgentStore';
31
+ import { useAgentAudio } from '../composables/useAgentAudio';
32
+ import AudioLines from './AudioLines.vue';
33
+
34
+ const agentStore = useAgentStore();
35
+ const agentAudio = useAgentAudio();
36
+ const { sendAudioToServerAndHandleResponse } = agentAudio;
37
+ const { stopGenerationAndAudio } = agentAudio;
38
+ const { stopCurrentAudioPlayback } = agentAudio;
39
+ const { agentAudioMode } = storeToRefs(agentAudio);
40
+ const microphoneButtonMode = ref<'off' | 'calibrating' | 'listen' | 'transcribing' | 'generating'>('off');
41
+ const showAudioWavesAnimation = ref(false);
42
+ const hideAnimationDebounced = debounce(() => {
43
+ showAudioWavesAnimation.value = false;
44
+ }, 100);
45
+ const sendUserRecordDebounced = debounce(() => {
46
+ sendRecordForTranscription();
47
+ }, 500);
48
+
49
+ const isAudioChatMode = computed(() => agentStore.isAudioChatMode);
50
+
51
+ onMounted(() => {
52
+ agentStore.registerOnBeforeChatCloseCallback(async () => {
53
+ if(agentStore.isAudioChatMode) {
54
+ onStopRecording();
55
+ resetAll();
56
+ agentStore.setIsAudioChatMode(false);
57
+ }
58
+ });
59
+ });
60
+
61
+ watch(agentAudioMode, (newVal) => {
62
+ if(newVal === 'streaming') {
63
+ stopCurrentAudioPlayback(true);
64
+ microphoneButtonMode.value = 'generating';
65
+ } else if (newVal === 'transcribing') {
66
+ microphoneButtonMode.value = 'transcribing';
67
+ } else if (newVal === 'fetchingAudio') {
68
+ //Generation is done, waiting for audio to be ready
69
+ } else if (newVal === 'playingAgentResponse') {
70
+ // Audio is playing
71
+ } else {
72
+ if(isAudioChatMode.value) {
73
+ microphoneButtonMode.value = 'listen';
74
+ } else {
75
+ microphoneButtonMode.value = 'off';
76
+ }
77
+ }
78
+ })
79
+
80
+ function toggleChatMode() {
81
+ agentStore.setIsAudioChatMode(!isAudioChatMode.value);
82
+ if (isAudioChatMode.value) {
83
+ onStartRecording();
84
+ } else {
85
+ resetAll();
86
+ onStopRecording();
87
+ }
88
+ }
89
+
90
+ async function onStartRecording() {
91
+ microphoneButtonMode.value = 'calibrating';
92
+ await requestMicAndStartVAD(saidSomething, stopRecording, onAnySound);
93
+ setTimeout(() => {
94
+ microphoneButtonMode.value = 'listen';
95
+ agentAudio.playBeep(1000);
96
+ }, CALIBRATION_DURATION);
97
+ }
98
+
99
+ function onStopRecording() {
100
+ agentAudio.playBeep(600);
101
+ stopUserMedia();
102
+ showAudioWavesAnimation.value = false;
103
+ }
104
+
105
+ function resetAll() {
106
+ stopGenerationAndAudio();
107
+ microphoneButtonMode.value = 'off';
108
+ showAudioWavesAnimation.value = false;
109
+ hideAnimationDebounced.cancel();
110
+ sendUserRecordDebounced.cancel();
111
+ }
112
+
113
+
114
+ function saidSomething() {
115
+ showAudioWavesAnimation.value = true;
116
+ hideAnimationDebounced();
117
+ sendUserRecordDebounced();
118
+ }
119
+
120
+ function stopRecording() {
121
+ hideAnimationDebounced.cancel();
122
+ sendUserRecordDebounced.cancel();
123
+ }
124
+
125
+ function onAnySound(amplitude: number) {
126
+ if(amplitude < 0.01) {
127
+ showAudioWavesAnimation.value = false;
128
+ return;
129
+ }
130
+ showAudioWavesAnimation.value = true;
131
+ hideAnimationDebounced.cancel();
132
+ }
133
+
134
+ async function sendRecordForTranscription() {
135
+ showAudioWavesAnimation.value = false;
136
+ const recordBlob = await getRecorder();
137
+ if (recordBlob) {
138
+ onStopRecording();
139
+ await sendAudioToServerAndHandleResponse(recordBlob);
140
+ if (agentStore.isAudioChatMode) {
141
+ await requestMicAndStartVAD(saidSomething, stopRecording, onAnySound);
142
+ }
143
+ } else {
144
+ console.error('No audio recorded');
145
+ }
146
+ }
147
+
148
+ onBeforeUnmount(() => {
149
+ stopUserMedia();
150
+ agentStore.setIsAudioChatMode(false);
151
+ onStopRecording();
152
+ hideAnimationDebounced.cancel();
153
+ sendUserRecordDebounced.cancel();
154
+ });
155
+
156
+
157
+ </script>
@@ -0,0 +1,22 @@
1
+ declare module 'voice-activity-detection' {
2
+ type VADOptions = {
3
+ fftSize: number,
4
+ bufferLen: number,
5
+ smoothingTimeConstant: number,
6
+ minCaptureFreq: number, // in Hz
7
+ maxCaptureFreq: number, // in Hz
8
+ noiseCaptureDuration: number, // in ms
9
+ minNoiseLevel: number, // from 0 to 1
10
+ maxNoiseLevel: number, // from 0 to 1
11
+ avgNoiseMultiplier: number, // from 0 to 1
12
+ onVoiceStart?: () => void;
13
+ onVoiceStop?: () => void;
14
+ onUpdate?: (value: number) => void;
15
+ };
16
+
17
+ export default function vad(
18
+ audioContext: AudioContext,
19
+ stream: MediaStream,
20
+ options: VADOptions,
21
+ ): void;
22
+ }
@@ -0,0 +1,151 @@
1
+ import vad from 'voice-activity-detection';
2
+
3
+ let currentStream: MediaStream | null = null;
4
+ let vadInstance: any = null;
5
+ let audioContext: AudioContext | null = null;
6
+ let mediaRecorder: MediaRecorder | null = null;
7
+ let recordedChunks: BlobPart[] = [];
8
+ let wasVoiceStarted = false;
9
+
10
+ export const CALIBRATION_DURATION = 1000; // in ms
11
+
12
+ export async function requestMicAndStartVAD(
13
+ onVoiceStopCallback: () => void,
14
+ onVoiceStartCallback: () => void,
15
+ onUpdateCallback: (amplitude: number) => void
16
+ ) {
17
+ return new Promise<void>((resolve, reject) => {
18
+ try {
19
+ audioContext = new AudioContext();
20
+
21
+ navigator.mediaDevices
22
+ .getUserMedia({ audio: true })
23
+ .then((stream) => {
24
+ currentStream = stream;
25
+ startRecording(stream);
26
+ resolve();
27
+ startUserMedia(audioContext as AudioContext, stream, onVoiceStartCallback, onVoiceStopCallback, onUpdateCallback);
28
+ })
29
+ .catch((error) => {
30
+ handleMicConnectError();
31
+ reject(error);
32
+ });
33
+ } catch (e) {
34
+ handleUserMediaError();
35
+ reject(e);
36
+ }
37
+ });
38
+ }
39
+
40
+ function handleUserMediaError() {
41
+ console.error('Mic input is not supported by the browser.');
42
+ }
43
+
44
+ function handleMicConnectError() {
45
+ console.error('Could not connect microphone. Possible rejected by the user or is blocked by the browser.');
46
+ }
47
+
48
+ export async function stopUserMedia() {
49
+ wasVoiceStarted = false;
50
+ if (vadInstance && vadInstance.destroy) {
51
+ vadInstance.destroy();
52
+ vadInstance = null;
53
+ }
54
+
55
+ if (currentStream) {
56
+ currentStream.getTracks().forEach(track => track.stop());
57
+ currentStream = null;
58
+ }
59
+
60
+ if (audioContext) {
61
+ audioContext.close();
62
+ audioContext = null;
63
+ }
64
+
65
+ if (mediaRecorder) {
66
+ mediaRecorder.stop();
67
+ mediaRecorder = null;
68
+ }
69
+
70
+ }
71
+
72
+ function startRecording(stream: MediaStream) {
73
+ recordedChunks = [];
74
+ mediaRecorder = new MediaRecorder(stream);
75
+ mediaRecorder.ondataavailable = (event: BlobEvent) => {
76
+ if (event.data.size > 0) {
77
+ recordedChunks.push(event.data);
78
+ }
79
+ };
80
+ mediaRecorder.start();
81
+ }
82
+
83
+ export async function getRecorder(): Promise<Blob | null> {
84
+ if (!mediaRecorder) {
85
+ return Promise.resolve(null);
86
+ }
87
+
88
+ const recorder = mediaRecorder;
89
+ mediaRecorder = null;
90
+
91
+ const finalizeBlob = () => {
92
+ const blob = new Blob(recordedChunks, { type: recorder.mimeType || 'audio/webm' });
93
+ recordedChunks = [];
94
+ return blob;
95
+ };
96
+
97
+ if (recorder.state === 'inactive') {
98
+ return Promise.resolve(finalizeBlob());
99
+ }
100
+
101
+ return new Promise<Blob>((resolve, reject) => {
102
+ recorder.onstop = () => {
103
+ resolve(finalizeBlob());
104
+ };
105
+ recorder.onerror = () => {
106
+ recordedChunks = [];
107
+ reject(new Error('Failed to finalize audio recording.'));
108
+ };
109
+ recorder.stop();
110
+ });
111
+ }
112
+
113
+ function startUserMedia(
114
+ audioContext: AudioContext,
115
+ stream: MediaStream,
116
+ onVoiceStartCallback: () => void,
117
+ onVoiceStopCallback: () => void,
118
+ onUpdateCallback: (amplitude: number) => void
119
+ ) {
120
+ const options = {
121
+ fftSize: 1024,
122
+ bufferLen: 1024,
123
+ smoothingTimeConstant: 0.2,
124
+ minCaptureFreq: 85, // in Hz
125
+ maxCaptureFreq: 255, // in Hz
126
+ noiseCaptureDuration: CALIBRATION_DURATION, // in ms
127
+ minNoiseLevel: 0.5, // from 0 to 1
128
+ maxNoiseLevel: 0.7, // from 0 to 1
129
+ avgNoiseMultiplier: 1.2,
130
+ onVoiceStart() {
131
+ wasVoiceStarted = true;
132
+ if (!mediaRecorder || mediaRecorder.state === 'inactive') {
133
+ startRecording(currentStream as MediaStream);
134
+ }
135
+ console.log('👹👹👹voice start👹👹👹');
136
+ onVoiceStartCallback();
137
+ },
138
+ onVoiceStop() {
139
+ if (!wasVoiceStarted) {
140
+ return;
141
+ }
142
+ console.log('👿👿👿voice stop👿👿👿');
143
+ onVoiceStopCallback();
144
+ }, //Doesn't work properly, so we will handle it with onUpdate callback
145
+ onUpdate(val: number) {
146
+ onUpdateCallback(val);
147
+ }
148
+ };
149
+
150
+ vadInstance = vad(audioContext, stream, options);
151
+ }
package/custom/types.ts CHANGED
@@ -25,7 +25,7 @@ export interface IToolGroup {
25
25
  }
26
26
 
27
27
  export interface IMessage {
28
- role: 'user' | 'assistant';
28
+ role: 'user' | 'assistant' | 'system';
29
29
  metadata?: any,
30
30
  parts: IPart[];
31
31
  }
@@ -41,4 +41,54 @@ export interface ISessionsListItem {
41
41
  sessionId: string;
42
42
  title: string;
43
43
  timestamp: string;
44
- }
44
+ }
45
+
46
+ export type SpeechStreamEvent =
47
+ | {
48
+ type: 'error';
49
+ error: string;
50
+ }
51
+ | {
52
+ type: 'transcript';
53
+ data: {
54
+ text: string;
55
+ language?: string;
56
+ };
57
+ }
58
+ | {
59
+ type: 'speech-response';
60
+ data: {
61
+ transcript: {
62
+ text: string;
63
+ language?: string;
64
+ };
65
+ response: {
66
+ text: string;
67
+ };
68
+ sessionId: string;
69
+ turnId: string;
70
+ };
71
+ }
72
+ | {
73
+ type: 'audio-start';
74
+ data: {
75
+ mimeType: string;
76
+ format: string;
77
+ };
78
+ }
79
+ | {
80
+ type: 'audio-delta';
81
+ data: {
82
+ base64: string;
83
+ };
84
+ }
85
+ | {
86
+ type: 'audio-done';
87
+ }
88
+ | {
89
+ type: 'data-tool-call';
90
+ data: any;
91
+ }
92
+ | {
93
+ type: 'finish';
94
+ };
@@ -27,12 +27,6 @@ const USER_LANGUAGE_OUTPUT_SCHEMA = {
27
27
  required: ["language", "code"],
28
28
  },
29
29
  };
30
- export function formatLanguagePrompt(language) {
31
- if (!language) {
32
- return "Respond in the user's language.";
33
- }
34
- return `Respond in ${language.language} (${language.code}).`;
35
- }
36
30
  function parseUserLanguage(content) {
37
31
  if (!content) {
38
32
  return null;