@hivegpt/hiveai-angular 0.0.584 → 0.0.586

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/bundles/hivegpt-hiveai-angular.umd.js +630 -215
  2. package/bundles/hivegpt-hiveai-angular.umd.js.map +1 -1
  3. package/bundles/hivegpt-hiveai-angular.umd.min.js +1 -1
  4. package/bundles/hivegpt-hiveai-angular.umd.min.js.map +1 -1
  5. package/esm2015/hivegpt-hiveai-angular.js +6 -4
  6. package/esm2015/lib/components/voice-agent/services/audio-analyzer.service.js +3 -3
  7. package/esm2015/lib/components/voice-agent/services/daily-voice-client.service.js +312 -0
  8. package/esm2015/lib/components/voice-agent/services/voice-agent.service.js +155 -181
  9. package/esm2015/lib/components/voice-agent/services/websocket-voice-client.service.js +95 -0
  10. package/esm2015/lib/components/voice-agent/voice-agent.module.js +7 -3
  11. package/fesm2015/hivegpt-hiveai-angular.js +555 -183
  12. package/fesm2015/hivegpt-hiveai-angular.js.map +1 -1
  13. package/hivegpt-hiveai-angular.d.ts +5 -3
  14. package/hivegpt-hiveai-angular.d.ts.map +1 -1
  15. package/hivegpt-hiveai-angular.metadata.json +1 -1
  16. package/lib/components/voice-agent/services/audio-analyzer.service.d.ts +2 -2
  17. package/lib/components/voice-agent/services/daily-voice-client.service.d.ts +65 -0
  18. package/lib/components/voice-agent/services/daily-voice-client.service.d.ts.map +1 -0
  19. package/lib/components/voice-agent/services/voice-agent.service.d.ts +17 -23
  20. package/lib/components/voice-agent/services/voice-agent.service.d.ts.map +1 -1
  21. package/lib/components/voice-agent/services/websocket-voice-client.service.d.ts +49 -0
  22. package/lib/components/voice-agent/services/websocket-voice-client.service.d.ts.map +1 -0
  23. package/lib/components/voice-agent/voice-agent.module.d.ts +2 -2
  24. package/lib/components/voice-agent/voice-agent.module.d.ts.map +1 -1
  25. package/package.json +1 -1
@@ -5,16 +5,15 @@ import { HttpClient, HttpHeaders } from '@angular/common/http';
5
5
  import * as i0 from '@angular/core';
6
6
  import { Injectable, InjectionToken, Inject, PLATFORM_ID, Optional, NgZone, EventEmitter, Component, Injector, Output, Input, ElementRef, ChangeDetectionStrategy, ChangeDetectorRef, Renderer2, ViewContainerRef, ViewChild, ViewChildren, NgModule, Pipe } from '@angular/core';
7
7
  import { DomSanitizer } from '@angular/platform-browser';
8
- import { BehaviorSubject, of, throwError, Subject, Subscription } from 'rxjs';
9
- import { switchMap, catchError, filter, take, map, tap } from 'rxjs/operators';
8
+ import { BehaviorSubject, of, throwError, Subject, Subscription, combineLatest } from 'rxjs';
9
+ import { switchMap, catchError, filter, take, map, takeUntil, tap } from 'rxjs/operators';
10
10
  import { isPlatformBrowser, CommonModule, DOCUMENT } from '@angular/common';
11
11
  import { Socket } from 'ngx-socket-io';
12
12
  import { Validators, FormBuilder, FormsModule, ReactiveFormsModule } from '@angular/forms';
13
13
  import * as SpeechSDK from 'microsoft-cognitiveservices-speech-sdk';
14
14
  import * as marked from 'marked';
15
15
  import { __awaiter } from 'tslib';
16
- import { PipecatClient, RTVIEvent } from '@pipecat-ai/client-js';
17
- import { WebSocketTransport } from '@pipecat-ai/websocket-transport';
16
+ import Daily from '@daily-co/daily-js';
18
17
  import { MatIconModule } from '@angular/material/icon';
19
18
  import { MatSidenavModule } from '@angular/material/sidenav';
20
19
  import { QuillModule } from 'ngx-quill';
@@ -685,8 +684,8 @@ BotsService.ctorParameters = () => [
685
684
  ];
686
685
 
687
686
  /**
688
- * Audio analyzer for waveform visualization and local (mic) speaking detection.
689
- * VoiceAgentService may combine this with WebSocket server events for call state.
687
+ * Audio analyzer for waveform visualization only.
688
+ * Do NOT use isUserSpeaking$ for call state; speaking state must come from Daily.js.
690
689
  */
691
690
  class AudioAnalyzerService {
692
691
  constructor() {
@@ -807,23 +806,423 @@ AudioAnalyzerService.decorators = [
807
806
  ];
808
807
 
809
808
  /**
810
- * Voice agent orchestrator using the official PipecatClient SDK.
809
+ * WebSocket-only client for voice agent signaling.
810
+ * CRITICAL: Uses native WebSocket only. NO Socket.IO, NO ngx-socket-io.
811
811
  *
812
- * Audio flow (mirrors the React reference implementation):
813
- * - Local mic: acquired by PipecatClient.initDevices(); local track fed to
814
- * AudioAnalyzerService for waveform visualisation.
815
- * - Bot audio: received as a MediaStreamTrack via RTVIEvent.TrackStarted,
816
- * played through a hidden <audio> element.
817
- * - All binary protobuf framing / RTVI protocol handled by
818
- * @pipecat-ai/client-js + @pipecat-ai/websocket-transport.
812
+ * Responsibilities:
813
+ * - Connect to ws_url (from POST /ai/ask-voice response)
814
+ * - Parse JSON messages (room_created, user_transcript, bot_transcript)
815
+ * - Emit roomCreated$, userTranscript$, botTranscript$
816
+ * - NO audio logic, NO mic logic. Audio is handled by Daily.js (WebRTC).
817
+ */
818
+ class WebSocketVoiceClientService {
819
+ constructor() {
820
+ this.ws = null;
821
+ this.roomCreatedSubject = new Subject();
822
+ this.userTranscriptSubject = new Subject();
823
+ this.botTranscriptSubject = new Subject();
824
+ /** Emits room_url when backend sends room_created. */
825
+ this.roomCreated$ = this.roomCreatedSubject.asObservable();
826
+ /** Emits user transcript updates. */
827
+ this.userTranscript$ = this.userTranscriptSubject.asObservable();
828
+ /** Emits bot transcript updates. */
829
+ this.botTranscript$ = this.botTranscriptSubject.asObservable();
830
+ }
831
+ /** Connect to signaling WebSocket. No audio over this connection. */
832
+ connect(wsUrl) {
833
+ var _a;
834
+ if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
835
+ return;
836
+ }
837
+ if (this.ws) {
838
+ this.ws.close();
839
+ this.ws = null;
840
+ }
841
+ try {
842
+ this.ws = new WebSocket(wsUrl);
843
+ this.ws.onmessage = (event) => {
844
+ var _a;
845
+ try {
846
+ const msg = JSON.parse(event.data);
847
+ if ((msg === null || msg === void 0 ? void 0 : msg.type) === 'room_created') {
848
+ const roomUrl = ((_a = msg.room_url) !== null && _a !== void 0 ? _a : msg.roomUrl);
849
+ if (typeof roomUrl === 'string') {
850
+ this.roomCreatedSubject.next(roomUrl);
851
+ }
852
+ }
853
+ else if ((msg === null || msg === void 0 ? void 0 : msg.type) === 'user_transcript' && typeof msg.text === 'string') {
854
+ this.userTranscriptSubject.next({
855
+ text: msg.text,
856
+ final: msg.final === true,
857
+ });
858
+ }
859
+ else if ((msg === null || msg === void 0 ? void 0 : msg.type) === 'bot_transcript' && typeof msg.text === 'string') {
860
+ this.botTranscriptSubject.next(msg.text);
861
+ }
862
+ }
863
+ catch (_b) {
864
+ // Ignore non-JSON or unknown messages
865
+ }
866
+ };
867
+ this.ws.onerror = () => {
868
+ this.disconnect();
869
+ };
870
+ this.ws.onclose = () => {
871
+ this.ws = null;
872
+ };
873
+ }
874
+ catch (err) {
875
+ console.error('WebSocketVoiceClient: connect failed', err);
876
+ this.ws = null;
877
+ throw err;
878
+ }
879
+ }
880
+ /** Disconnect and cleanup. */
881
+ disconnect() {
882
+ if (this.ws) {
883
+ this.ws.close();
884
+ this.ws = null;
885
+ }
886
+ }
887
+ /** Whether the WebSocket is open. */
888
+ get isConnected() {
889
+ var _a;
890
+ return ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN;
891
+ }
892
+ }
893
+ WebSocketVoiceClientService.ɵprov = i0.ɵɵdefineInjectable({ factory: function WebSocketVoiceClientService_Factory() { return new WebSocketVoiceClientService(); }, token: WebSocketVoiceClientService, providedIn: "root" });
894
+ WebSocketVoiceClientService.decorators = [
895
+ { type: Injectable, args: [{
896
+ providedIn: 'root',
897
+ },] }
898
+ ];
899
+
900
+ /**
901
+ * Daily.js WebRTC client for voice agent audio.
902
+ * Responsibilities:
903
+ * - Create and manage Daily CallObject
904
+ * - Join Daily room using room_url
905
+ * - Handle mic capture + speaker playback
906
+ * - Bot speaking detection via AnalyserNode on remote track (instant)
907
+ * - User speaking detection via active-speaker-change
908
+ * - Expose speaking$ (bot speaking), userSpeaking$ (user speaking), micMuted$
909
+ * - Expose localStream$ for waveform visualization (AudioAnalyzerService)
910
+ */
911
+ class DailyVoiceClientService {
912
+ constructor(ngZone) {
913
+ this.ngZone = ngZone;
914
+ this.callObject = null;
915
+ this.localStream = null;
916
+ this.localSessionId = null;
917
+ /** Explicit playback of remote (bot) audio; required in some browsers. */
918
+ this.remoteAudioElement = null;
919
+ /** AnalyserNode-based remote audio monitor for instant bot speaking detection. */
920
+ this.remoteAudioContext = null;
921
+ this.remoteSpeakingRAF = null;
922
+ this.speakingSubject = new BehaviorSubject(false);
923
+ this.userSpeakingSubject = new BehaviorSubject(false);
924
+ this.micMutedSubject = new BehaviorSubject(false);
925
+ this.localStreamSubject = new BehaviorSubject(null);
926
+ this.firstRemoteAudioFrameSubject = new BehaviorSubject(false);
927
+ /** True when bot (remote participant) is the active speaker. */
928
+ this.speaking$ = this.speakingSubject.asObservable();
929
+ /** True when user (local participant) is the active speaker. */
930
+ this.userSpeaking$ = this.userSpeakingSubject.asObservable();
931
+ /** True when mic is muted. */
932
+ this.micMuted$ = this.micMutedSubject.asObservable();
933
+ /** Emits local mic stream for waveform visualization. */
934
+ this.localStream$ = this.localStreamSubject.asObservable();
935
+ /** Emits true once when first remote audio frame starts playing. */
936
+ this.firstRemoteAudioFrame$ = this.firstRemoteAudioFrameSubject.asObservable();
937
+ }
938
+ /**
939
+ * Connect to Daily room. Acquires mic first for waveform, then joins with audio.
940
+ * @param roomUrl Daily room URL (from room_created)
941
+ * @param token Optional meeting token
942
+ */
943
+ connect(roomUrl, token) {
944
+ return __awaiter(this, void 0, void 0, function* () {
945
+ if (this.callObject) {
946
+ yield this.disconnect();
947
+ }
948
+ try {
949
+ // Get mic stream for both Daily and waveform (single capture)
950
+ const stream = yield navigator.mediaDevices.getUserMedia({ audio: true });
951
+ const audioTrack = stream.getAudioTracks()[0];
952
+ if (!audioTrack) {
953
+ stream.getTracks().forEach((t) => t.stop());
954
+ throw new Error('No audio track');
955
+ }
956
+ this.localStream = stream;
957
+ this.localStreamSubject.next(stream);
958
+ // Create audio-only call object
959
+ // videoSource: false = no camera, audioSource = our mic track
960
+ const callObject = Daily.createCallObject({
961
+ videoSource: false,
962
+ audioSource: audioTrack,
963
+ });
964
+ this.callObject = callObject;
965
+ this.setupEventHandlers(callObject);
966
+ // Join room; Daily handles playback of remote (bot) audio automatically.
967
+ // Only pass token when it's a non-empty string (Daily rejects undefined/non-string).
968
+ const joinOptions = { url: roomUrl };
969
+ if (typeof token === 'string' && token.trim() !== '') {
970
+ joinOptions.token = token;
971
+ }
972
+ yield callObject.join(joinOptions);
973
+ console.log(`[VoiceDebug] Room connected (Daily join complete) — ${new Date().toISOString()}`);
974
+ const participants = callObject.participants();
975
+ if (participants === null || participants === void 0 ? void 0 : participants.local) {
976
+ this.localSessionId = participants.local.session_id;
977
+ }
978
+ // Start with mic muted; VoiceAgentService auto-unmutes after first remote audio frame.
979
+ callObject.setLocalAudio(false);
980
+ this.micMutedSubject.next(true);
981
+ }
982
+ catch (err) {
983
+ this.cleanup();
984
+ throw err;
985
+ }
986
+ });
987
+ }
988
+ setupEventHandlers(call) {
989
+ // active-speaker-change: used ONLY for user speaking detection.
990
+ // Bot speaking is detected by our own AnalyserNode (instant, no debounce).
991
+ call.on('active-speaker-change', (event) => {
992
+ this.ngZone.run(() => {
993
+ var _a;
994
+ const peerId = (_a = event === null || event === void 0 ? void 0 : event.activeSpeaker) === null || _a === void 0 ? void 0 : _a.peerId;
995
+ if (!peerId || !this.localSessionId) {
996
+ this.userSpeakingSubject.next(false);
997
+ return;
998
+ }
999
+ const isLocal = peerId === this.localSessionId;
1000
+ this.userSpeakingSubject.next(isLocal);
1001
+ });
1002
+ });
1003
+ // track-started / track-stopped: set up remote audio playback + AnalyserNode monitor.
1004
+ call.on('track-started', (event) => {
1005
+ this.ngZone.run(() => {
1006
+ var _a, _b, _c, _d;
1007
+ const p = event === null || event === void 0 ? void 0 : event.participant;
1008
+ const type = (_a = event === null || event === void 0 ? void 0 : event.type) !== null && _a !== void 0 ? _a : (_b = event === null || event === void 0 ? void 0 : event.track) === null || _b === void 0 ? void 0 : _b.kind;
1009
+ const track = event === null || event === void 0 ? void 0 : event.track;
1010
+ if (p && !p.local && type === 'audio') {
1011
+ console.log(`[VoiceDebug] Got audio track from backend (track-started) — readyState=${track === null || track === void 0 ? void 0 : track.readyState}, muted=${track === null || track === void 0 ? void 0 : track.muted} — ${new Date().toISOString()}`);
1012
+ const audioTrack = track !== null && track !== void 0 ? track : (_d = (_c = p.tracks) === null || _c === void 0 ? void 0 : _c.audio) === null || _d === void 0 ? void 0 : _d.track;
1013
+ if (audioTrack && typeof audioTrack === 'object') {
1014
+ this.playRemoteTrack(audioTrack);
1015
+ this.monitorRemoteAudio(audioTrack);
1016
+ }
1017
+ }
1018
+ });
1019
+ });
1020
+ call.on('track-stopped', (event) => {
1021
+ this.ngZone.run(() => {
1022
+ var _a, _b;
1023
+ const p = event === null || event === void 0 ? void 0 : event.participant;
1024
+ const type = (_a = event === null || event === void 0 ? void 0 : event.type) !== null && _a !== void 0 ? _a : (_b = event === null || event === void 0 ? void 0 : event.track) === null || _b === void 0 ? void 0 : _b.kind;
1025
+ if (p && !p.local && type === 'audio') {
1026
+ this.stopRemoteAudioMonitor();
1027
+ this.stopRemoteAudio();
1028
+ }
1029
+ });
1030
+ });
1031
+ call.on('left-meeting', () => {
1032
+ this.ngZone.run(() => this.cleanup());
1033
+ });
1034
+ call.on('error', (event) => {
1035
+ this.ngZone.run(() => {
1036
+ var _a;
1037
+ console.error('DailyVoiceClient: Daily error', (_a = event === null || event === void 0 ? void 0 : event.errorMsg) !== null && _a !== void 0 ? _a : event);
1038
+ this.cleanup();
1039
+ });
1040
+ });
1041
+ }
1042
+ /**
1043
+ * Play remote (bot) audio track via a dedicated audio element.
1044
+ * Required in many browsers where Daily's internal playback does not output to speakers.
1045
+ */
1046
+ playRemoteTrack(track) {
1047
+ this.stopRemoteAudio();
1048
+ try {
1049
+ console.log(`[VoiceDebug] playRemoteTrack called — track.readyState=${track.readyState}, track.muted=${track.muted} — ${new Date().toISOString()}`);
1050
+ track.onunmute = () => {
1051
+ console.log(`[VoiceDebug] Remote audio track UNMUTED (audio data arriving) — ${new Date().toISOString()}`);
1052
+ };
1053
+ const stream = new MediaStream([track]);
1054
+ const audio = new Audio();
1055
+ audio.autoplay = true;
1056
+ audio.srcObject = stream;
1057
+ this.remoteAudioElement = audio;
1058
+ audio.onplaying = () => {
1059
+ console.log(`[VoiceDebug] Audio element PLAYING (browser started playback) — ${new Date().toISOString()}`);
1060
+ };
1061
+ let firstTimeUpdate = true;
1062
+ audio.ontimeupdate = () => {
1063
+ if (firstTimeUpdate) {
1064
+ firstTimeUpdate = false;
1065
+ console.log(`[VoiceDebug] Audio element first TIMEUPDATE (actual audio output) — ${new Date().toISOString()}`);
1066
+ this.firstRemoteAudioFrameSubject.next(true);
1067
+ }
1068
+ };
1069
+ const p = audio.play();
1070
+ if (p && typeof p.then === 'function') {
1071
+ p.then(() => {
1072
+ console.log(`[VoiceDebug] audio.play() resolved — ${new Date().toISOString()}`);
1073
+ this.firstRemoteAudioFrameSubject.next(true);
1074
+ }).catch((err) => {
1075
+ console.warn('DailyVoiceClient: remote audio play failed (may need user gesture)', err);
1076
+ });
1077
+ }
1078
+ }
1079
+ catch (err) {
1080
+ console.warn('DailyVoiceClient: failed to create remote audio element', err);
1081
+ }
1082
+ }
1083
+ /**
1084
+ * Monitor remote audio track energy via AnalyserNode.
1085
+ * Polls at ~60fps and flips speakingSubject based on actual audio energy.
1086
+ */
1087
+ monitorRemoteAudio(track) {
1088
+ this.stopRemoteAudioMonitor();
1089
+ try {
1090
+ const ctx = new AudioContext();
1091
+ const source = ctx.createMediaStreamSource(new MediaStream([track]));
1092
+ const analyser = ctx.createAnalyser();
1093
+ analyser.fftSize = 256;
1094
+ source.connect(analyser);
1095
+ this.remoteAudioContext = ctx;
1096
+ const dataArray = new Uint8Array(analyser.frequencyBinCount);
1097
+ const THRESHOLD = 5;
1098
+ const SILENCE_MS = 1500;
1099
+ let lastSoundTime = 0;
1100
+ let isSpeaking = false;
1101
+ const poll = () => {
1102
+ if (!this.remoteAudioContext)
1103
+ return;
1104
+ analyser.getByteFrequencyData(dataArray);
1105
+ let sum = 0;
1106
+ for (let i = 0; i < dataArray.length; i++) {
1107
+ sum += dataArray[i];
1108
+ }
1109
+ const avg = sum / dataArray.length;
1110
+ const now = Date.now();
1111
+ if (avg > THRESHOLD) {
1112
+ lastSoundTime = now;
1113
+ if (!isSpeaking) {
1114
+ isSpeaking = true;
1115
+ console.log(`[VoiceDebug] Bot audio energy detected (speaking=true) — avg=${avg.toFixed(1)} — ${new Date().toISOString()}`);
1116
+ this.ngZone.run(() => {
1117
+ this.userSpeakingSubject.next(false);
1118
+ this.speakingSubject.next(true);
1119
+ });
1120
+ }
1121
+ }
1122
+ else if (isSpeaking && now - lastSoundTime > SILENCE_MS) {
1123
+ isSpeaking = false;
1124
+ console.log(`[VoiceDebug] Bot audio silence detected (speaking=false) — ${new Date().toISOString()}`);
1125
+ this.ngZone.run(() => this.speakingSubject.next(false));
1126
+ }
1127
+ this.remoteSpeakingRAF = requestAnimationFrame(poll);
1128
+ };
1129
+ this.remoteSpeakingRAF = requestAnimationFrame(poll);
1130
+ }
1131
+ catch (err) {
1132
+ console.warn('DailyVoiceClient: failed to create remote audio monitor', err);
1133
+ }
1134
+ }
1135
+ stopRemoteAudioMonitor() {
1136
+ if (this.remoteSpeakingRAF) {
1137
+ cancelAnimationFrame(this.remoteSpeakingRAF);
1138
+ this.remoteSpeakingRAF = null;
1139
+ }
1140
+ if (this.remoteAudioContext) {
1141
+ this.remoteAudioContext.close().catch(() => { });
1142
+ this.remoteAudioContext = null;
1143
+ }
1144
+ }
1145
+ stopRemoteAudio() {
1146
+ if (this.remoteAudioElement) {
1147
+ try {
1148
+ this.remoteAudioElement.pause();
1149
+ this.remoteAudioElement.srcObject = null;
1150
+ }
1151
+ catch (_) { }
1152
+ this.remoteAudioElement = null;
1153
+ }
1154
+ }
1155
+ /** Set mic muted state. */
1156
+ setMuted(muted) {
1157
+ if (!this.callObject)
1158
+ return;
1159
+ this.callObject.setLocalAudio(!muted);
1160
+ this.micMutedSubject.next(muted);
1161
+ }
1162
+ /** Disconnect and cleanup. */
1163
+ disconnect() {
1164
+ return __awaiter(this, void 0, void 0, function* () {
1165
+ if (!this.callObject) {
1166
+ this.cleanup();
1167
+ return;
1168
+ }
1169
+ try {
1170
+ yield this.callObject.leave();
1171
+ }
1172
+ catch (e) {
1173
+ // ignore
1174
+ }
1175
+ this.cleanup();
1176
+ });
1177
+ }
1178
+ cleanup() {
1179
+ this.stopRemoteAudioMonitor();
1180
+ this.stopRemoteAudio();
1181
+ if (this.callObject) {
1182
+ this.callObject.destroy().catch(() => { });
1183
+ this.callObject = null;
1184
+ }
1185
+ if (this.localStream) {
1186
+ this.localStream.getTracks().forEach((t) => t.stop());
1187
+ this.localStream = null;
1188
+ }
1189
+ this.localSessionId = null;
1190
+ this.speakingSubject.next(false);
1191
+ this.userSpeakingSubject.next(false);
1192
+ this.localStreamSubject.next(null);
1193
+ this.firstRemoteAudioFrameSubject.next(false);
1194
+ // Keep last micMuted state; will reset on next connect
1195
+ }
1196
+ }
1197
+ DailyVoiceClientService.ɵprov = i0.ɵɵdefineInjectable({ factory: function DailyVoiceClientService_Factory() { return new DailyVoiceClientService(i0.ɵɵinject(i0.NgZone)); }, token: DailyVoiceClientService, providedIn: "root" });
1198
+ DailyVoiceClientService.decorators = [
1199
+ { type: Injectable, args: [{
1200
+ providedIn: 'root',
1201
+ },] }
1202
+ ];
1203
+ DailyVoiceClientService.ctorParameters = () => [
1204
+ { type: NgZone }
1205
+ ];
1206
+
1207
+ /**
1208
+ * Voice agent orchestrator. Coordinates WebSocket (signaling) and Daily.js (WebRTC audio).
1209
+ *
1210
+ * CRITICAL: This service must NEVER use Socket.IO or ngx-socket-io. Voice flow uses only:
1211
+ * - Native WebSocket (WebSocketVoiceClientService) for signaling (room_created, transcripts)
1212
+ * - Daily.js (DailyVoiceClientService) for WebRTC audio. Audio does NOT flow over WebSocket.
1213
+ *
1214
+ * - Maintains callState, statusText, duration, isMicMuted, isUserSpeaking, audioLevels
1215
+ * - Uses WebSocket for room_created and transcripts only (no audio)
1216
+ * - Uses Daily.js for all audio, mic, and real-time speaking detection
819
1217
  */
820
1218
  class VoiceAgentService {
821
- constructor(audioAnalyzer, platformTokenRefresh, ngZone,
1219
+ constructor(audioAnalyzer, wsClient, dailyClient, platformTokenRefresh,
822
1220
  /** `Object` not `object` — ngc metadata collection rejects the `object` type in DI params. */
823
1221
  platformId) {
824
1222
  this.audioAnalyzer = audioAnalyzer;
1223
+ this.wsClient = wsClient;
1224
+ this.dailyClient = dailyClient;
825
1225
  this.platformTokenRefresh = platformTokenRefresh;
826
- this.ngZone = ngZone;
827
1226
  this.platformId = platformId;
828
1227
  this.callStateSubject = new BehaviorSubject('idle');
829
1228
  this.statusTextSubject = new BehaviorSubject('');
@@ -835,10 +1234,9 @@ class VoiceAgentService {
835
1234
  this.botTranscriptSubject = new Subject();
836
1235
  this.callStartTime = 0;
837
1236
  this.durationInterval = null;
838
- this.pcClient = null;
839
- this.botAudioElement = null;
840
1237
  this.subscriptions = new Subscription();
841
1238
  this.destroy$ = new Subject();
1239
+ this.hasAutoUnmutedAfterFirstAudio = false;
842
1240
  this.callState$ = this.callStateSubject.asObservable();
843
1241
  this.statusText$ = this.statusTextSubject.asObservable();
844
1242
  this.duration$ = this.durationSubject.asObservable();
@@ -847,228 +1245,199 @@ class VoiceAgentService {
847
1245
  this.audioLevels$ = this.audioLevelsSubject.asObservable();
848
1246
  this.userTranscript$ = this.userTranscriptSubject.asObservable();
849
1247
  this.botTranscript$ = this.botTranscriptSubject.asObservable();
1248
+ // Waveform visualization only - do NOT use for speaking state
850
1249
  this.subscriptions.add(this.audioAnalyzer.audioLevels$.subscribe((levels) => this.audioLevelsSubject.next(levels)));
851
1250
  }
852
1251
  ngOnDestroy() {
853
1252
  this.destroy$.next();
854
1253
  this.subscriptions.unsubscribe();
855
- void this.disconnect();
1254
+ this.disconnect();
856
1255
  }
857
- /** Reset to idle (e.g. when modal re-opens so user can click Start Call). */
1256
+ /** Reset to idle state (e.g. when modal opens so user can click Start Call). */
858
1257
  resetToIdle() {
859
1258
  if (this.callStateSubject.value === 'idle')
860
1259
  return;
861
- void this.disconnect();
1260
+ this.stopDurationTimer();
1261
+ this.audioAnalyzer.stop();
1262
+ this.wsClient.disconnect();
1263
+ // Fire-and-forget: Daily disconnect is async; connect() will await if needed
1264
+ void this.dailyClient.disconnect();
862
1265
  this.callStateSubject.next('idle');
863
1266
  this.statusTextSubject.next('');
864
1267
  this.durationSubject.next('0:00');
1268
+ this.hasAutoUnmutedAfterFirstAudio = false;
865
1269
  }
866
1270
  connect(apiUrl, token, botId, conversationId, apiKey, eventToken, eventId, eventUrl, domainAuthority, usersApiUrl) {
867
1271
  return __awaiter(this, void 0, void 0, function* () {
868
1272
  if (this.callStateSubject.value !== 'idle') {
869
- console.warn('[HiveGpt Voice] Call already in progress');
1273
+ console.warn('Call already in progress');
870
1274
  return;
871
1275
  }
872
1276
  try {
873
1277
  this.callStateSubject.next('connecting');
874
1278
  this.statusTextSubject.next('Connecting...');
875
1279
  let accessToken = token;
1280
+ // Align with chat drawer token handling: always delegate to
1281
+ // PlatformTokenRefreshService when we have a usersApiUrl, so it can
1282
+ // fall back to stored tokens even if the caller passed an empty token.
876
1283
  if (usersApiUrl && isPlatformBrowser(this.platformId)) {
877
1284
  try {
878
1285
  const ensured = yield this.platformTokenRefresh
879
1286
  .ensureValidAccessToken(token, usersApiUrl)
880
1287
  .pipe(take(1))
881
1288
  .toPromise();
882
- if (ensured === null || ensured === void 0 ? void 0 : ensured.accessToken)
1289
+ if (ensured === null || ensured === void 0 ? void 0 : ensured.accessToken) {
883
1290
  accessToken = ensured.accessToken;
1291
+ }
884
1292
  }
885
1293
  catch (e) {
886
- console.warn('[HiveGpt Voice] Token refresh failed', e);
1294
+ console.warn('[HiveGpt Voice] Token refresh before connect failed', e);
887
1295
  }
888
1296
  }
889
1297
  const baseUrl = apiUrl.replace(/\/$/, '');
890
- const pcClient = new PipecatClient({
891
- transport: new WebSocketTransport(),
892
- enableMic: true,
893
- enableCam: false,
894
- callbacks: {
895
- onConnected: () => this.ngZone.run(() => this.onPipecatConnected()),
896
- onDisconnected: () => this.ngZone.run(() => this.onPipecatDisconnected()),
897
- onBotReady: () => this.ngZone.run(() => this.onBotReady()),
898
- onUserTranscript: (data) => this.ngZone.run(() => this.userTranscriptSubject.next({ text: data.text, final: !!data.final })),
899
- onBotTranscript: (data) => this.ngZone.run(() => this.botTranscriptSubject.next(data.text)),
900
- onError: (err) => {
901
- this.ngZone.run(() => {
902
- console.error('[HiveGpt Voice] PipecatClient error', err);
903
- this.callStateSubject.next('ended');
904
- this.statusTextSubject.next('Connection failed');
905
- });
906
- },
907
- },
908
- });
909
- this.pcClient = pcClient;
910
- // Bot audio arrives as a MediaStreamTrack — wire to a hidden <audio> element
911
- pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
912
- if (!(participant === null || participant === void 0 ? void 0 : participant.local) && track.kind === 'audio') {
913
- this.ngZone.run(() => this.setupBotAudioTrack(track));
914
- }
915
- });
916
- // Speaking state comes straight from RTVI events
917
- pcClient.on(RTVIEvent.BotStartedSpeaking, () => this.ngZone.run(() => this.onBotStartedSpeaking()));
918
- pcClient.on(RTVIEvent.BotStoppedSpeaking, () => this.ngZone.run(() => this.onBotStoppedSpeaking()));
919
- pcClient.on(RTVIEvent.UserStartedSpeaking, () => this.ngZone.run(() => {
920
- this.isUserSpeakingSubject.next(true);
921
- this.callStateSubject.next('listening');
922
- }));
923
- pcClient.on(RTVIEvent.UserStoppedSpeaking, () => this.ngZone.run(() => {
924
- this.isUserSpeakingSubject.next(false);
925
- if (this.callStateSubject.value === 'listening') {
926
- this.callStateSubject.next('connected');
927
- }
928
- }));
929
- // Acquire mic (triggers browser permission prompt)
930
- yield pcClient.initDevices();
931
- // Build headers using the browser Headers API (required by pipecat's APIRequest type)
932
- const requestHeaders = new Headers();
933
- requestHeaders.append('Authorization', `Bearer ${accessToken}`);
934
- requestHeaders.append('x-api-key', apiKey);
935
- requestHeaders.append('hive-bot-id', botId);
936
- requestHeaders.append('domain-authority', domainAuthority);
937
- requestHeaders.append('eventUrl', eventUrl);
938
- requestHeaders.append('eventId', eventId);
939
- requestHeaders.append('eventToken', eventToken);
940
- requestHeaders.append('ngrok-skip-browser-warning', 'true');
941
- // POST to /ai/ask-voice-socket → receives { ws_url } → WebSocketTransport connects
942
- yield pcClient.startBotAndConnect({
943
- endpoint: `${baseUrl}/ai/ask-voice-socket`,
944
- headers: requestHeaders,
945
- requestData: {
1298
+ const postUrl = `${baseUrl}/ai/ask-voice`;
1299
+ const headers = {
1300
+ 'Content-Type': 'application/json',
1301
+ Authorization: `Bearer ${accessToken}`,
1302
+ 'x-api-key': apiKey,
1303
+ 'hive-bot-id': botId,
1304
+ 'domain-authority': domainAuthority,
1305
+ eventUrl,
1306
+ eventId,
1307
+ eventToken,
1308
+ 'ngrok-skip-browser-warning': 'true',
1309
+ };
1310
+ // POST to get ws_url for signaling
1311
+ const res = yield fetch(postUrl, {
1312
+ method: 'POST',
1313
+ headers,
1314
+ body: JSON.stringify({
946
1315
  bot_id: botId,
947
1316
  conversation_id: conversationId,
948
1317
  voice: 'alloy',
949
- },
1318
+ }),
950
1319
  });
1320
+ if (!res.ok) {
1321
+ throw new Error(`HTTP ${res.status}`);
1322
+ }
1323
+ const json = yield res.json();
1324
+ const wsUrl = json === null || json === void 0 ? void 0 : json.rn_ws_url;
1325
+ if (!wsUrl || typeof wsUrl !== 'string') {
1326
+ throw new Error('No ws_url in response');
1327
+ }
1328
+ // Subscribe to room_created BEFORE connecting to avoid race
1329
+ this.wsClient.roomCreated$
1330
+ .pipe(take(1), takeUntil(this.destroy$))
1331
+ .subscribe((roomUrl) => __awaiter(this, void 0, void 0, function* () {
1332
+ try {
1333
+ yield this.onRoomCreated(roomUrl);
1334
+ }
1335
+ catch (err) {
1336
+ console.error('Daily join failed:', err);
1337
+ this.callStateSubject.next('ended');
1338
+ this.statusTextSubject.next('Connection failed');
1339
+ yield this.disconnect();
1340
+ throw err;
1341
+ }
1342
+ }));
1343
+ // Forward transcripts from WebSocket
1344
+ this.subscriptions.add(this.wsClient.userTranscript$
1345
+ .pipe(takeUntil(this.destroy$))
1346
+ .subscribe((t) => this.userTranscriptSubject.next(t)));
1347
+ this.subscriptions.add(this.wsClient.botTranscript$
1348
+ .pipe(takeUntil(this.destroy$))
1349
+ .subscribe((t) => this.botTranscriptSubject.next(t)));
1350
+ // Connect signaling WebSocket (no audio over WS)
1351
+ this.wsClient.connect(wsUrl);
951
1352
  }
952
1353
  catch (error) {
953
- console.error('[HiveGpt Voice] connect failed', error);
1354
+ console.error('Error connecting voice agent:', error);
954
1355
  this.callStateSubject.next('ended');
955
- yield this.cleanupPipecatClient();
1356
+ yield this.disconnect();
956
1357
  this.statusTextSubject.next('Connection failed');
957
1358
  throw error;
958
1359
  }
959
1360
  });
960
1361
  }
961
- onPipecatConnected() {
962
- this.callStateSubject.next('connected');
963
- this.statusTextSubject.next('Connected');
964
- this.isMicMutedSubject.next(false);
965
- this.startLocalMicAnalyzer();
966
- }
967
- onPipecatDisconnected() {
968
- this.stopDurationTimer();
969
- this.callStartTime = 0;
970
- this.audioAnalyzer.stop();
971
- this.stopBotAudio();
972
- this.callStateSubject.next('ended');
973
- this.statusTextSubject.next('Call Ended');
974
- }
975
- onBotReady() {
976
- var _a, _b, _c;
977
- // Retry track wiring in case tracks weren't ready at onConnected
978
- this.startLocalMicAnalyzer();
979
- const botTrack = (_c = (_b = (_a = this.pcClient) === null || _a === void 0 ? void 0 : _a.tracks()) === null || _b === void 0 ? void 0 : _b.bot) === null || _c === void 0 ? void 0 : _c.audio;
980
- if (botTrack)
981
- this.setupBotAudioTrack(botTrack);
982
- }
983
- startLocalMicAnalyzer() {
984
- var _a, _b, _c;
985
- const localTrack = (_c = (_b = (_a = this.pcClient) === null || _a === void 0 ? void 0 : _a.tracks()) === null || _b === void 0 ? void 0 : _b.local) === null || _c === void 0 ? void 0 : _c.audio;
986
- if (localTrack) {
987
- this.audioAnalyzer.start(new MediaStream([localTrack]));
988
- }
989
- }
990
- onBotStartedSpeaking() {
991
- if (this.callStartTime === 0) {
992
- this.callStartTime = Date.now();
993
- this.startDurationTimer();
994
- }
995
- this.callStateSubject.next('talking');
996
- }
997
- onBotStoppedSpeaking() {
998
- if (this.callStateSubject.value === 'talking') {
999
- this.callStateSubject.next('connected');
1000
- }
1001
- }
1002
- setupBotAudioTrack(track) {
1003
- var _a;
1004
- if (!this.botAudioElement) {
1005
- this.botAudioElement = new Audio();
1006
- this.botAudioElement.autoplay = true;
1007
- }
1008
- const existing = (_a = this.botAudioElement.srcObject) === null || _a === void 0 ? void 0 : _a.getAudioTracks()[0];
1009
- if ((existing === null || existing === void 0 ? void 0 : existing.id) === track.id)
1010
- return;
1011
- this.botAudioElement.srcObject = new MediaStream([track]);
1012
- this.botAudioElement.play().catch((err) => console.warn('[HiveGpt Voice] Bot audio play blocked', err));
1013
- }
1014
- stopBotAudio() {
1015
- var _a;
1016
- if (this.botAudioElement) {
1017
- try {
1018
- this.botAudioElement.pause();
1019
- (_a = this.botAudioElement.srcObject) === null || _a === void 0 ? void 0 : _a.getAudioTracks().forEach((t) => t.stop());
1020
- this.botAudioElement.srcObject = null;
1021
- }
1022
- catch (_b) {
1023
- // ignore
1024
- }
1025
- this.botAudioElement = null;
1026
- }
1362
+ onRoomCreated(roomUrl) {
1363
+ return __awaiter(this, void 0, void 0, function* () {
1364
+ // Connect Daily.js for WebRTC audio
1365
+ yield this.dailyClient.connect(roomUrl);
1366
+ this.hasAutoUnmutedAfterFirstAudio = false;
1367
+ // Waveform: use local mic stream from Daily client
1368
+ this.dailyClient.localStream$
1369
+ .pipe(filter((s) => s != null), take(1))
1370
+ .subscribe((stream) => {
1371
+ this.audioAnalyzer.start(stream);
1372
+ });
1373
+ this.subscriptions.add(this.dailyClient.userSpeaking$.subscribe((s) => this.isUserSpeakingSubject.next(s)));
1374
+ this.subscriptions.add(combineLatest([
1375
+ this.dailyClient.speaking$,
1376
+ this.dailyClient.userSpeaking$,
1377
+ ]).subscribe(([bot, user]) => {
1378
+ const current = this.callStateSubject.value;
1379
+ if (current === 'connecting' && !bot) {
1380
+ return;
1381
+ }
1382
+ if (current === 'connecting' && bot) {
1383
+ this.callStartTime = Date.now();
1384
+ this.startDurationTimer();
1385
+ this.callStateSubject.next('talking');
1386
+ return;
1387
+ }
1388
+ if (user) {
1389
+ this.callStateSubject.next('listening');
1390
+ }
1391
+ else if (bot) {
1392
+ this.callStateSubject.next('talking');
1393
+ }
1394
+ else if (current === 'talking' || current === 'listening') {
1395
+ this.callStateSubject.next('connected');
1396
+ }
1397
+ }));
1398
+ this.subscriptions.add(this.dailyClient.micMuted$.subscribe((muted) => this.isMicMutedSubject.next(muted)));
1399
+ // One-time auto-unmute after first remote audio frame starts playing.
1400
+ // This keeps initial capture muted until bot audio is heard, then restores normal mic flow.
1401
+ this.subscriptions.add(this.dailyClient.firstRemoteAudioFrame$
1402
+ .pipe(filter((hasFirstFrame) => hasFirstFrame), take(1))
1403
+ .subscribe(() => {
1404
+ if (this.hasAutoUnmutedAfterFirstAudio)
1405
+ return;
1406
+ this.hasAutoUnmutedAfterFirstAudio = true;
1407
+ if (this.isMicMutedSubject.value) {
1408
+ this.dailyClient.setMuted(false);
1409
+ }
1410
+ }));
1411
+ this.statusTextSubject.next('Connecting...');
1412
+ });
1027
1413
  }
1028
1414
  disconnect() {
1029
1415
  return __awaiter(this, void 0, void 0, function* () {
1030
1416
  this.stopDurationTimer();
1031
- this.callStartTime = 0;
1032
1417
  this.audioAnalyzer.stop();
1033
- this.stopBotAudio();
1034
- yield this.cleanupPipecatClient();
1418
+ // Daily first, then WebSocket
1419
+ yield this.dailyClient.disconnect();
1420
+ this.wsClient.disconnect();
1035
1421
  this.callStateSubject.next('ended');
1036
1422
  this.statusTextSubject.next('Call Ended');
1037
- });
1038
- }
1039
- cleanupPipecatClient() {
1040
- return __awaiter(this, void 0, void 0, function* () {
1041
- if (this.pcClient) {
1042
- try {
1043
- yield this.pcClient.disconnect();
1044
- }
1045
- catch (_a) {
1046
- // ignore
1047
- }
1048
- this.pcClient = null;
1049
- }
1423
+ this.hasAutoUnmutedAfterFirstAudio = false;
1050
1424
  });
1051
1425
  }
1052
1426
  toggleMic() {
1053
- if (!this.pcClient)
1054
- return;
1055
- const nextMuted = !this.isMicMutedSubject.value;
1056
- this.pcClient.enableMic(!nextMuted);
1057
- this.isMicMutedSubject.next(nextMuted);
1058
- if (nextMuted)
1059
- this.isUserSpeakingSubject.next(false);
1427
+ const current = this.isMicMutedSubject.value;
1428
+ this.dailyClient.setMuted(!current);
1060
1429
  }
1061
1430
  startDurationTimer() {
1062
- const tick = () => {
1431
+ const updateDuration = () => {
1063
1432
  if (this.callStartTime > 0) {
1064
1433
  const elapsed = Math.floor((Date.now() - this.callStartTime) / 1000);
1065
- const m = Math.floor(elapsed / 60);
1066
- const s = elapsed % 60;
1067
- this.durationSubject.next(`${m}:${String(s).padStart(2, '0')}`);
1434
+ const minutes = Math.floor(elapsed / 60);
1435
+ const seconds = elapsed % 60;
1436
+ this.durationSubject.next(`${minutes}:${String(seconds).padStart(2, '0')}`);
1068
1437
  }
1069
1438
  };
1070
- tick();
1071
- this.durationInterval = setInterval(tick, 1000);
1439
+ updateDuration();
1440
+ this.durationInterval = setInterval(updateDuration, 1000);
1072
1441
  }
1073
1442
  stopDurationTimer() {
1074
1443
  if (this.durationInterval) {
@@ -1077,7 +1446,7 @@ class VoiceAgentService {
1077
1446
  }
1078
1447
  }
1079
1448
  }
1080
- VoiceAgentService.ɵprov = i0.ɵɵdefineInjectable({ factory: function VoiceAgentService_Factory() { return new VoiceAgentService(i0.ɵɵinject(AudioAnalyzerService), i0.ɵɵinject(PlatformTokenRefreshService), i0.ɵɵinject(i0.NgZone), i0.ɵɵinject(i0.PLATFORM_ID)); }, token: VoiceAgentService, providedIn: "root" });
1449
+ VoiceAgentService.ɵprov = i0.ɵɵdefineInjectable({ factory: function VoiceAgentService_Factory() { return new VoiceAgentService(i0.ɵɵinject(AudioAnalyzerService), i0.ɵɵinject(WebSocketVoiceClientService), i0.ɵɵinject(DailyVoiceClientService), i0.ɵɵinject(PlatformTokenRefreshService), i0.ɵɵinject(i0.PLATFORM_ID)); }, token: VoiceAgentService, providedIn: "root" });
1081
1450
  VoiceAgentService.decorators = [
1082
1451
  { type: Injectable, args: [{
1083
1452
  providedIn: 'root',
@@ -1085,8 +1454,9 @@ VoiceAgentService.decorators = [
1085
1454
  ];
1086
1455
  VoiceAgentService.ctorParameters = () => [
1087
1456
  { type: AudioAnalyzerService },
1457
+ { type: WebSocketVoiceClientService },
1458
+ { type: DailyVoiceClientService },
1088
1459
  { type: PlatformTokenRefreshService },
1089
- { type: NgZone },
1090
1460
  { type: Object, decorators: [{ type: Inject, args: [PLATFORM_ID,] }] }
1091
1461
  ];
1092
1462
 
@@ -5125,8 +5495,8 @@ ChatBotComponent.propDecorators = {
5125
5495
  };
5126
5496
 
5127
5497
  /**
5128
- * Voice agent module. Uses @pipecat-ai/client-js + @pipecat-ai/websocket-transport
5129
- * (peer dependencies) for WebSocket transport, RTVI protocol, and audio.
5498
+ * Voice agent module. Uses native WebSocket + Daily.js only.
5499
+ * Does NOT use Socket.IO or ngx-socket-io.
5130
5500
  */
5131
5501
  class VoiceAgentModule {
5132
5502
  }
@@ -5141,6 +5511,8 @@ VoiceAgentModule.decorators = [
5141
5511
  providers: [
5142
5512
  VoiceAgentService,
5143
5513
  AudioAnalyzerService,
5514
+ WebSocketVoiceClientService,
5515
+ DailyVoiceClientService
5144
5516
  ],
5145
5517
  exports: [
5146
5518
  VoiceAgentModalComponent
@@ -5411,5 +5783,5 @@ HiveGptModule.decorators = [
5411
5783
  * Generated bundle index. Do not edit.
5412
5784
  */
5413
5785
 
5414
- export { AudioAnalyzerService, ChatBotComponent, ChatDrawerComponent, HIVEGPT_AUTH_STORAGE_KEY, HiveGptModule, PlatformTokenRefreshService, VOICE_MODAL_CLOSE_CALLBACK, VOICE_MODAL_CONFIG, VoiceAgentModalComponent, VoiceAgentModule, VoiceAgentService, eClassificationType, hiveGptAuthStorageKeyFactory, BotsService as ɵa, SocketService as ɵb, ConversationService as ɵc, NotificationSocket as ɵd, TranslationService as ɵe, VideoPlayerComponent as ɵf, SafeHtmlPipe as ɵg, BotHtmlEditorComponent as ɵh };
5786
+ export { AudioAnalyzerService, ChatBotComponent, ChatDrawerComponent, HIVEGPT_AUTH_STORAGE_KEY, HiveGptModule, PlatformTokenRefreshService, VOICE_MODAL_CLOSE_CALLBACK, VOICE_MODAL_CONFIG, VoiceAgentModalComponent, VoiceAgentModule, VoiceAgentService, eClassificationType, hiveGptAuthStorageKeyFactory, BotsService as ɵa, SocketService as ɵb, ConversationService as ɵc, NotificationSocket as ɵd, TranslationService as ɵe, WebSocketVoiceClientService as ɵf, DailyVoiceClientService as ɵg, VideoPlayerComponent as ɵh, SafeHtmlPipe as ɵi, BotHtmlEditorComponent as ɵj };
5415
5787
  //# sourceMappingURL=hivegpt-hiveai-angular.js.map