@adminforth/agent 1.37.0 → 1.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/agent/languageDetect.ts +0 -8
  2. package/agent/simpleAgent.ts +5 -5
  3. package/agent/systemPrompt.ts +35 -4
  4. package/agent/toolCallEvents.ts +31 -2
  5. package/agent/tools/apiTool.ts +1 -1
  6. package/agentResponseEvents.ts +197 -0
  7. package/apiBasedTools.ts +118 -284
  8. package/build.log +12 -2
  9. package/custom/ChatSurface.vue +31 -21
  10. package/custom/composables/agentAudio/agent-processing.mp3 +0 -0
  11. package/custom/composables/agentStore/constants.ts +8 -1
  12. package/custom/composables/agentStore/useAgentSessions.ts +85 -12
  13. package/custom/composables/useAgentAudio.ts +392 -0
  14. package/custom/composables/useAgentStore.ts +52 -5
  15. package/custom/conversation_area/ConversationArea.vue +1 -1
  16. package/custom/conversation_area/MessageRenderer.vue +12 -1
  17. package/custom/conversation_area/SystemMessageRenderer.vue +28 -0
  18. package/custom/conversation_area/TextRenderer.vue +4 -3
  19. package/custom/conversation_area/ToolRenderer.vue +1 -1
  20. package/custom/package.json +2 -1
  21. package/custom/pnpm-lock.yaml +29 -0
  22. package/custom/speech_recognition_frontend/AudioLines.vue +97 -0
  23. package/custom/speech_recognition_frontend/MicrophoneButon.vue +157 -0
  24. package/custom/speech_recognition_frontend/types/voice-activity-detection.d.ts +22 -0
  25. package/custom/speech_recognition_frontend/voiceActivityDetection.ts +151 -0
  26. package/custom/types.ts +52 -2
  27. package/dist/agent/languageDetect.js +0 -6
  28. package/dist/agent/simpleAgent.js +4 -3
  29. package/dist/agent/systemPrompt.js +24 -3
  30. package/dist/agent/toolCallEvents.js +24 -2
  31. package/dist/agent/tools/apiTool.js +1 -1
  32. package/dist/agentResponseEvents.js +141 -0
  33. package/dist/apiBasedTools.js +95 -211
  34. package/dist/custom/ChatSurface.vue +31 -21
  35. package/dist/custom/composables/agentAudio/agent-processing.mp3 +0 -0
  36. package/dist/custom/composables/agentStore/constants.ts +8 -1
  37. package/dist/custom/composables/agentStore/useAgentSessions.ts +85 -12
  38. package/dist/custom/composables/useAgentAudio.ts +392 -0
  39. package/dist/custom/composables/useAgentStore.ts +52 -5
  40. package/dist/custom/conversation_area/ConversationArea.vue +1 -1
  41. package/dist/custom/conversation_area/MessageRenderer.vue +12 -1
  42. package/dist/custom/conversation_area/SystemMessageRenderer.vue +28 -0
  43. package/dist/custom/conversation_area/TextRenderer.vue +4 -3
  44. package/dist/custom/conversation_area/ToolRenderer.vue +1 -1
  45. package/dist/custom/package.json +2 -1
  46. package/dist/custom/pnpm-lock.yaml +29 -0
  47. package/dist/custom/speech_recognition_frontend/AudioLines.vue +97 -0
  48. package/dist/custom/speech_recognition_frontend/MicrophoneButon.vue +157 -0
  49. package/dist/custom/speech_recognition_frontend/types/voice-activity-detection.d.ts +22 -0
  50. package/dist/custom/speech_recognition_frontend/voiceActivityDetection.ts +151 -0
  51. package/dist/custom/types.ts +52 -2
  52. package/dist/index.js +290 -400
  53. package/index.ts +318 -492
  54. package/package.json +3 -2
  55. package/types.ts +1 -1
@@ -0,0 +1,97 @@
1
+ <template>
2
+ <div
3
+ class=" bg-white w-[0.2rem] rounded-sm transition-all duration-300 ease-in-out"
4
+ :class="{
5
+ 'recordingAnimation1' : showAnimation,
6
+ 'h-2': !isRecording,
7
+ 'h-1': isRecording,
8
+ }"
9
+ />
10
+ <div
11
+ class=" bg-white w-[0.2rem] rounded-sm transition-all duration-300 ease-in-out"
12
+ :class="{
13
+ 'recordingAnimation2' : showAnimation,
14
+ 'h-4': !isRecording,
15
+ 'h-1': isRecording,
16
+ }"
17
+ />
18
+ <div
19
+ class=" bg-white w-[0.2rem] rounded-sm transition-all duration-300 ease-in-out"
20
+ :class="{
21
+ 'recordingAnimation3' : showAnimation,
22
+ 'h-3': !isRecording,
23
+ 'h-1': isRecording,
24
+ }"
25
+ />
26
+ <div
27
+ class=" bg-white w-[0.2rem] rounded-sm transition-all duration-300 ease-in-out"
28
+ :class="{
29
+ 'recordingAnimation4' : showAnimation,
30
+ 'h-2': !isRecording,
31
+ 'h-1': isRecording,
32
+ }"
33
+ />
34
+ <template v-if="isRecording">
35
+ <div
36
+ class=" bg-white w-[0.2rem] rounded-sm h-1 transition-all duration-300 ease-in-out"
37
+ :class="{
38
+ 'recordingAnimation5' : showAnimation,
39
+ }"
40
+ />
41
+ <p class="text-white ml-2">End</p>
42
+ </template>
43
+ </template>
44
+
45
+
46
+
47
+ <script setup lang="ts">
48
+
49
+ const props = defineProps<{
50
+ showAnimation: boolean;
51
+ isRecording: boolean;
52
+ }>();
53
+
54
+ </script>
55
+
56
+ <style scoped lang="scss">
57
+ .recordingAnimation1 {
58
+ animation: recordingAnimation 1s infinite;
59
+ height: 0.3rem;
60
+ }
61
+
62
+ .recordingAnimation2 {
63
+ animation: recordingAnimation 1s infinite;
64
+ animation-delay: 0.2s;
65
+ height: 0.5rem;
66
+ }
67
+
68
+ .recordingAnimation3 {
69
+ animation: recordingAnimation 1s infinite;
70
+ animation-delay: 0.4s;
71
+ height: 0.4rem;
72
+ }
73
+
74
+ .recordingAnimation4 {
75
+ animation: recordingAnimation 1s infinite;
76
+ animation-delay: 0.6s;
77
+ height: 0.5rem;
78
+ }
79
+
80
+ .recordingAnimation5 {
81
+ animation: recordingAnimation 1s infinite;
82
+ animation-delay: 0.8s;
83
+ height: 0.3rem;
84
+ }
85
+
86
+ @keyframes recordingAnimation {
87
+ 0% {
88
+ transform: scaleY(1);
89
+ }
90
+ 50% {
91
+ transform: scaleY(2);
92
+ }
93
+ 100% {
94
+ transform: scaleY(1);
95
+ }
96
+ }
97
+ </style>
@@ -0,0 +1,157 @@
1
+ <template>
2
+ <button
3
+ class="absolute bottom-2 h-9 bg-lightPrimary dark:bg-darkPrimary
4
+ hover:opacity-90 rounded-full flex items-center justify-center
5
+ transition-all duration-300 ease-in-out overflow-hidden"
6
+ :class="[isAudioChatMode ? 'w-32 px-2': 'w-9', !agentStore.isAudioChatMode ? 'right-16': 'right-1/2 translate-x-1/2']"
7
+ @click="toggleChatMode"
8
+ >
9
+ <div class="w-5 h-5 flex items-center justify-center">
10
+ <div v-if="microphoneButtonMode === 'listen' || microphoneButtonMode === 'off'" class="flex justify-evenly items-center gap-[0.1rem]">
11
+ <AudioLines :showAnimation="showAudioWavesAnimation" :isRecording="microphoneButtonMode === 'listen'" />
12
+ </div>
13
+ <div v-else-if="microphoneButtonMode === 'generating'" class="flex items-center justify-center gap-2 text-white text-sm">
14
+ <span class="w-3 h-3 bg-white rounded-sm" />
15
+ {{ $t('Stop') }}
16
+ </div>
17
+ <Spinner v-else class="w-4 h-4 text-lightButtonsText dark:text-darkButtonsText fill-lightButtonsBackground dark:fill-darkPrimary" />
18
+ </div>
19
+
20
+ </button>
21
+ </template>
22
+
23
+
24
+ <script setup lang="ts">
25
+ import { computed, onMounted, onBeforeUnmount, ref, watch } from 'vue';
26
+ import debounce from 'lodash/debounce';
27
+ import { requestMicAndStartVAD, stopUserMedia, getRecorder, CALIBRATION_DURATION } from './voiceActivityDetection';
28
+ import { Spinner } from '@/afcl'
29
+ import { storeToRefs } from 'pinia';
30
+ import { useAgentStore } from '../composables/useAgentStore';
31
+ import { useAgentAudio } from '../composables/useAgentAudio';
32
+ import AudioLines from './AudioLines.vue';
33
+
34
+ const agentStore = useAgentStore();
35
+ const agentAudio = useAgentAudio();
36
+ const { sendAudioToServerAndHandleResponse } = agentAudio;
37
+ const { stopGenerationAndAudio } = agentAudio;
38
+ const { stopCurrentAudioPlayback } = agentAudio;
39
+ const { agentAudioMode } = storeToRefs(agentAudio);
40
+ const microphoneButtonMode = ref<'off' | 'calibrating' | 'listen' | 'transcribing' | 'generating'>('off');
41
+ const showAudioWavesAnimation = ref(false);
42
+ const hideAnimationDebounced = debounce(() => {
43
+ showAudioWavesAnimation.value = false;
44
+ }, 100);
45
+ const sendUserRecordDebounced = debounce(() => {
46
+ sendRecordForTranscription();
47
+ }, 500);
48
+
49
+ const isAudioChatMode = computed(() => agentStore.isAudioChatMode);
50
+
51
+ onMounted(() => {
52
+ agentStore.registerOnBeforeChatCloseCallback(async () => {
53
+ if(agentStore.isAudioChatMode) {
54
+ onStopRecording();
55
+ resetAll();
56
+ agentStore.setIsAudioChatMode(false);
57
+ }
58
+ });
59
+ });
60
+
61
+ watch(agentAudioMode, (newVal) => {
62
+ if(newVal === 'streaming') {
63
+ stopCurrentAudioPlayback(true);
64
+ microphoneButtonMode.value = 'generating';
65
+ } else if (newVal === 'transcribing') {
66
+ microphoneButtonMode.value = 'transcribing';
67
+ } else if (newVal === 'fetchingAudio') {
68
+ //Generation is done, waiting for audio to be ready
69
+ } else if (newVal === 'playingAgentResponse') {
70
+ // Audio is playing
71
+ } else {
72
+ if(isAudioChatMode.value) {
73
+ microphoneButtonMode.value = 'listen';
74
+ } else {
75
+ microphoneButtonMode.value = 'off';
76
+ }
77
+ }
78
+ })
79
+
80
+ function toggleChatMode() {
81
+ agentStore.setIsAudioChatMode(!isAudioChatMode.value);
82
+ if (isAudioChatMode.value) {
83
+ onStartRecording();
84
+ } else {
85
+ resetAll();
86
+ onStopRecording();
87
+ }
88
+ }
89
+
90
+ async function onStartRecording() {
91
+ microphoneButtonMode.value = 'calibrating';
92
+ await requestMicAndStartVAD(saidSomething, stopRecording, onAnySound);
93
+ setTimeout(() => {
94
+ microphoneButtonMode.value = 'listen';
95
+ agentAudio.playBeep(1000);
96
+ }, CALIBRATION_DURATION);
97
+ }
98
+
99
+ function onStopRecording() {
100
+ agentAudio.playBeep(600);
101
+ stopUserMedia();
102
+ showAudioWavesAnimation.value = false;
103
+ }
104
+
105
+ function resetAll() {
106
+ stopGenerationAndAudio();
107
+ microphoneButtonMode.value = 'off';
108
+ showAudioWavesAnimation.value = false;
109
+ hideAnimationDebounced.cancel();
110
+ sendUserRecordDebounced.cancel();
111
+ }
112
+
113
+
114
+ function saidSomething() {
115
+ showAudioWavesAnimation.value = true;
116
+ hideAnimationDebounced();
117
+ sendUserRecordDebounced();
118
+ }
119
+
120
+ function stopRecording() {
121
+ hideAnimationDebounced.cancel();
122
+ sendUserRecordDebounced.cancel();
123
+ }
124
+
125
+ function onAnySound(amplitude: number) {
126
+ if(amplitude < 0.01) {
127
+ showAudioWavesAnimation.value = false;
128
+ return;
129
+ }
130
+ showAudioWavesAnimation.value = true;
131
+ hideAnimationDebounced.cancel();
132
+ }
133
+
134
+ async function sendRecordForTranscription() {
135
+ showAudioWavesAnimation.value = false;
136
+ const recordBlob = await getRecorder();
137
+ if (recordBlob) {
138
+ onStopRecording();
139
+ await sendAudioToServerAndHandleResponse(recordBlob);
140
+ if (agentStore.isAudioChatMode) {
141
+ await requestMicAndStartVAD(saidSomething, stopRecording, onAnySound);
142
+ }
143
+ } else {
144
+ console.error('No audio recorded');
145
+ }
146
+ }
147
+
148
+ onBeforeUnmount(() => {
149
+ stopUserMedia();
150
+ agentStore.setIsAudioChatMode(false);
151
+ onStopRecording();
152
+ hideAnimationDebounced.cancel();
153
+ sendUserRecordDebounced.cancel();
154
+ });
155
+
156
+
157
+ </script>
@@ -0,0 +1,22 @@
1
+ declare module 'voice-activity-detection' {
2
+ type VADOptions = {
3
+ fftSize: number,
4
+ bufferLen: number,
5
+ smoothingTimeConstant: number,
6
+ minCaptureFreq: number, // in Hz
7
+ maxCaptureFreq: number, // in Hz
8
+ noiseCaptureDuration: number, // in ms
9
+ minNoiseLevel: number, // from 0 to 1
10
+ maxNoiseLevel: number, // from 0 to 1
11
+ avgNoiseMultiplier: number, // from 0 to 1
12
+ onVoiceStart?: () => void;
13
+ onVoiceStop?: () => void;
14
+ onUpdate?: (value: number) => void;
15
+ };
16
+
17
+ export default function vad(
18
+ audioContext: AudioContext,
19
+ stream: MediaStream,
20
+ options: VADOptions,
21
+ ): void;
22
+ }
@@ -0,0 +1,151 @@
1
+ import vad from 'voice-activity-detection';
2
+
3
+ let currentStream: MediaStream | null = null;
4
+ let vadInstance: any = null;
5
+ let audioContext: AudioContext | null = null;
6
+ let mediaRecorder: MediaRecorder | null = null;
7
+ let recordedChunks: BlobPart[] = [];
8
+ let wasVoiceStarted = false;
9
+
10
+ export const CALIBRATION_DURATION = 1000; // in ms
11
+
12
+ export async function requestMicAndStartVAD(
13
+ onVoiceStopCallback: () => void,
14
+ onVoiceStartCallback: () => void,
15
+ onUpdateCallback: (amplitude: number) => void
16
+ ) {
17
+ return new Promise<void>((resolve, reject) => {
18
+ try {
19
+ audioContext = new AudioContext();
20
+
21
+ navigator.mediaDevices
22
+ .getUserMedia({ audio: true })
23
+ .then((stream) => {
24
+ currentStream = stream;
25
+ startRecording(stream);
26
+ resolve();
27
+ startUserMedia(audioContext as AudioContext, stream, onVoiceStartCallback, onVoiceStopCallback, onUpdateCallback);
28
+ })
29
+ .catch((error) => {
30
+ handleMicConnectError();
31
+ reject(error);
32
+ });
33
+ } catch (e) {
34
+ handleUserMediaError();
35
+ reject(e);
36
+ }
37
+ });
38
+ }
39
+
40
+ function handleUserMediaError() {
41
+ console.error('Mic input is not supported by the browser.');
42
+ }
43
+
44
+ function handleMicConnectError() {
45
+ console.error('Could not connect microphone. Possible rejected by the user or is blocked by the browser.');
46
+ }
47
+
48
+ export async function stopUserMedia() {
49
+ wasVoiceStarted = false;
50
+ if (vadInstance && vadInstance.destroy) {
51
+ vadInstance.destroy();
52
+ vadInstance = null;
53
+ }
54
+
55
+ if (currentStream) {
56
+ currentStream.getTracks().forEach(track => track.stop());
57
+ currentStream = null;
58
+ }
59
+
60
+ if (audioContext) {
61
+ audioContext.close();
62
+ audioContext = null;
63
+ }
64
+
65
+ if (mediaRecorder) {
66
+ mediaRecorder.stop();
67
+ mediaRecorder = null;
68
+ }
69
+
70
+ }
71
+
72
+ function startRecording(stream: MediaStream) {
73
+ recordedChunks = [];
74
+ mediaRecorder = new MediaRecorder(stream);
75
+ mediaRecorder.ondataavailable = (event: BlobEvent) => {
76
+ if (event.data.size > 0) {
77
+ recordedChunks.push(event.data);
78
+ }
79
+ };
80
+ mediaRecorder.start();
81
+ }
82
+
83
+ export async function getRecorder(): Promise<Blob | null> {
84
+ if (!mediaRecorder) {
85
+ return Promise.resolve(null);
86
+ }
87
+
88
+ const recorder = mediaRecorder;
89
+ mediaRecorder = null;
90
+
91
+ const finalizeBlob = () => {
92
+ const blob = new Blob(recordedChunks, { type: recorder.mimeType || 'audio/webm' });
93
+ recordedChunks = [];
94
+ return blob;
95
+ };
96
+
97
+ if (recorder.state === 'inactive') {
98
+ return Promise.resolve(finalizeBlob());
99
+ }
100
+
101
+ return new Promise<Blob>((resolve, reject) => {
102
+ recorder.onstop = () => {
103
+ resolve(finalizeBlob());
104
+ };
105
+ recorder.onerror = () => {
106
+ recordedChunks = [];
107
+ reject(new Error('Failed to finalize audio recording.'));
108
+ };
109
+ recorder.stop();
110
+ });
111
+ }
112
+
113
+ function startUserMedia(
114
+ audioContext: AudioContext,
115
+ stream: MediaStream,
116
+ onVoiceStartCallback: () => void,
117
+ onVoiceStopCallback: () => void,
118
+ onUpdateCallback: (amplitude: number) => void
119
+ ) {
120
+ const options = {
121
+ fftSize: 1024,
122
+ bufferLen: 1024,
123
+ smoothingTimeConstant: 0.2,
124
+ minCaptureFreq: 85, // in Hz
125
+ maxCaptureFreq: 255, // in Hz
126
+ noiseCaptureDuration: CALIBRATION_DURATION, // in ms
127
+ minNoiseLevel: 0.5, // from 0 to 1
128
+ maxNoiseLevel: 0.7, // from 0 to 1
129
+ avgNoiseMultiplier: 1.2,
130
+ onVoiceStart() {
131
+ wasVoiceStarted = true;
132
+ if (!mediaRecorder || mediaRecorder.state === 'inactive') {
133
+ startRecording(currentStream as MediaStream);
134
+ }
135
+ console.log('👹👹👹voice start👹👹👹');
136
+ onVoiceStartCallback();
137
+ },
138
+ onVoiceStop() {
139
+ if (!wasVoiceStarted) {
140
+ return;
141
+ }
142
+ console.log('👿👿👿voice stop👿👿👿');
143
+ onVoiceStopCallback();
144
+ }, //Doesn't work properly, so we will handle it with onUpdate callback
145
+ onUpdate(val: number) {
146
+ onUpdateCallback(val);
147
+ }
148
+ };
149
+
150
+ vadInstance = vad(audioContext, stream, options);
151
+ }
@@ -25,7 +25,7 @@ export interface IToolGroup {
25
25
  }
26
26
 
27
27
  export interface IMessage {
28
- role: 'user' | 'assistant';
28
+ role: 'user' | 'assistant' | 'system';
29
29
  metadata?: any,
30
30
  parts: IPart[];
31
31
  }
@@ -41,4 +41,54 @@ export interface ISessionsListItem {
41
41
  sessionId: string;
42
42
  title: string;
43
43
  timestamp: string;
44
- }
44
+ }
45
+
46
+ export type SpeechStreamEvent =
47
+ | {
48
+ type: 'error';
49
+ error: string;
50
+ }
51
+ | {
52
+ type: 'transcript';
53
+ data: {
54
+ text: string;
55
+ language?: string;
56
+ };
57
+ }
58
+ | {
59
+ type: 'speech-response';
60
+ data: {
61
+ transcript: {
62
+ text: string;
63
+ language?: string;
64
+ };
65
+ response: {
66
+ text: string;
67
+ };
68
+ sessionId: string;
69
+ turnId: string;
70
+ };
71
+ }
72
+ | {
73
+ type: 'audio-start';
74
+ data: {
75
+ mimeType: string;
76
+ format: string;
77
+ };
78
+ }
79
+ | {
80
+ type: 'audio-delta';
81
+ data: {
82
+ base64: string;
83
+ };
84
+ }
85
+ | {
86
+ type: 'audio-done';
87
+ }
88
+ | {
89
+ type: 'data-tool-call';
90
+ data: any;
91
+ }
92
+ | {
93
+ type: 'finish';
94
+ };