@hivegpt/hiveai-angular 0.0.583 → 0.0.584

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,14 +5,16 @@ import { HttpClient, HttpHeaders } from '@angular/common/http';
5
5
  import * as i0 from '@angular/core';
6
6
  import { Injectable, InjectionToken, Inject, PLATFORM_ID, Optional, NgZone, EventEmitter, Component, Injector, Output, Input, ElementRef, ChangeDetectionStrategy, ChangeDetectorRef, Renderer2, ViewContainerRef, ViewChild, ViewChildren, NgModule, Pipe } from '@angular/core';
7
7
  import { DomSanitizer } from '@angular/platform-browser';
8
- import { BehaviorSubject, of, throwError, Subject, Subscription, merge, concat, timer, combineLatest } from 'rxjs';
9
- import { switchMap, catchError, filter, take, map, takeUntil, distinctUntilChanged, startWith, tap } from 'rxjs/operators';
8
+ import { BehaviorSubject, of, throwError, Subject, Subscription } from 'rxjs';
9
+ import { switchMap, catchError, filter, take, map, tap } from 'rxjs/operators';
10
10
  import { isPlatformBrowser, CommonModule, DOCUMENT } from '@angular/common';
11
11
  import { Socket } from 'ngx-socket-io';
12
12
  import { Validators, FormBuilder, FormsModule, ReactiveFormsModule } from '@angular/forms';
13
13
  import * as SpeechSDK from 'microsoft-cognitiveservices-speech-sdk';
14
14
  import * as marked from 'marked';
15
15
  import { __awaiter } from 'tslib';
16
+ import { PipecatClient, RTVIEvent } from '@pipecat-ai/client-js';
17
+ import { WebSocketTransport } from '@pipecat-ai/websocket-transport';
16
18
  import { MatIconModule } from '@angular/material/icon';
17
19
  import { MatSidenavModule } from '@angular/material/sidenav';
18
20
  import { QuillModule } from 'ngx-quill';
@@ -805,219 +807,23 @@ AudioAnalyzerService.decorators = [
805
807
  ];
806
808
 
807
809
  /**
808
- * Native WebSocket client for voice session (signaling, transcripts, speaking hints).
809
- * CRITICAL: Uses native WebSocket only. NO Socket.IO, NO ngx-socket-io.
810
+ * Voice agent orchestrator using the official PipecatClient SDK.
810
811
  *
811
- * Connects to `ws_url` from `POST {baseUrl}/ai/ask-voice-socket`.
812
- * Parses JSON messages for transcripts and optional assistant/user speaking flags.
813
- */
814
- class WebSocketVoiceClientService {
815
- constructor(ngZone) {
816
- this.ngZone = ngZone;
817
- this.ws = null;
818
- /** True when {@link disconnect} initiated the close (not counted as remote close). */
819
- this.closeInitiatedByClient = false;
820
- this.openedSubject = new Subject();
821
- this.remoteCloseSubject = new Subject();
822
- this.userTranscriptSubject = new Subject();
823
- this.botTranscriptSubject = new Subject();
824
- this.assistantSpeakingSubject = new Subject();
825
- this.serverUserSpeakingSubject = new Subject();
826
- this.audioChunkSubject = new Subject();
827
- /** Fires once each time the WebSocket reaches OPEN. */
828
- this.opened$ = this.openedSubject.asObservable();
829
- /** Fires when the socket closes without a client-initiated {@link disconnect}. */
830
- this.remoteClose$ = this.remoteCloseSubject.asObservable();
831
- this.userTranscript$ = this.userTranscriptSubject.asObservable();
832
- this.botTranscript$ = this.botTranscriptSubject.asObservable();
833
- /** Assistant/bot speaking, when the server sends explicit events (see {@link handleJsonMessage}). */
834
- this.assistantSpeaking$ = this.assistantSpeakingSubject.asObservable();
835
- /** User speaking from server-side VAD, if provided. */
836
- this.serverUserSpeaking$ = this.serverUserSpeakingSubject.asObservable();
837
- /** Binary audio frames from server (when backend streams bot audio over WS). */
838
- this.audioChunk$ = this.audioChunkSubject.asObservable();
839
- }
840
- connect(wsUrl) {
841
- var _a;
842
- if (((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN) {
843
- return;
844
- }
845
- if (this.ws) {
846
- this.closeInitiatedByClient = true;
847
- this.ws.close();
848
- }
849
- try {
850
- const socket = new WebSocket(wsUrl);
851
- this.ws = socket;
852
- socket.onopen = () => {
853
- if (this.ws !== socket)
854
- return;
855
- this.ngZone.run(() => this.openedSubject.next());
856
- };
857
- socket.onmessage = (event) => {
858
- if (this.ws !== socket)
859
- return;
860
- void this.handleIncomingMessage(event.data);
861
- };
862
- socket.onerror = () => {
863
- this.ngZone.run(() => {
864
- if (this.ws === socket && socket.readyState !== WebSocket.CLOSED) {
865
- socket.close();
866
- }
867
- });
868
- };
869
- socket.onclose = () => {
870
- if (this.ws === socket) {
871
- this.ws = null;
872
- }
873
- const client = this.closeInitiatedByClient;
874
- this.closeInitiatedByClient = false;
875
- if (!client) {
876
- this.ngZone.run(() => this.remoteCloseSubject.next());
877
- }
878
- };
879
- }
880
- catch (err) {
881
- console.error('WebSocketVoiceClient: connect failed', err);
882
- this.ws = null;
883
- throw err;
884
- }
885
- }
886
- handleIncomingMessage(payload) {
887
- return __awaiter(this, void 0, void 0, function* () {
888
- if (typeof payload === 'string') {
889
- this.handleJsonString(payload);
890
- return;
891
- }
892
- if (payload instanceof ArrayBuffer) {
893
- this.handleBinaryMessage(payload);
894
- return;
895
- }
896
- if (payload instanceof Blob) {
897
- const ab = yield payload.arrayBuffer();
898
- this.handleBinaryMessage(ab);
899
- }
900
- });
901
- }
902
- handleJsonString(jsonText) {
903
- try {
904
- const msg = JSON.parse(jsonText);
905
- this.ngZone.run(() => this.handleJsonMessage(msg));
906
- }
907
- catch (_a) {
908
- // Ignore non-JSON
909
- }
910
- }
911
- handleBinaryMessage(buffer) {
912
- // Some backends wrap JSON events inside binary WS frames.
913
- const maybeText = this.tryDecodeUtf8(buffer);
914
- if (maybeText !== null) {
915
- this.handleJsonString(maybeText);
916
- return;
917
- }
918
- // Otherwise treat binary as streamed assistant audio.
919
- this.ngZone.run(() => this.audioChunkSubject.next(buffer));
920
- }
921
- tryDecodeUtf8(buffer) {
922
- try {
923
- const text = new TextDecoder('utf-8', { fatal: true }).decode(buffer);
924
- const trimmed = text.trim();
925
- if (!trimmed || (trimmed[0] !== '{' && trimmed[0] !== '[')) {
926
- return null;
927
- }
928
- return trimmed;
929
- }
930
- catch (_a) {
931
- return null;
932
- }
933
- }
934
- handleJsonMessage(msg) {
935
- const type = msg.type;
936
- const typeStr = typeof type === 'string' ? type : '';
937
- if (typeStr === 'session_ready' || typeStr === 'connected' || typeStr === 'voice_session_started') {
938
- return;
939
- }
940
- if (typeStr === 'assistant_speaking' ||
941
- typeStr === 'bot_speaking') {
942
- if (msg.active === true || msg.speaking === true) {
943
- this.assistantSpeakingSubject.next(true);
944
- }
945
- else if (msg.active === false || msg.speaking === false) {
946
- this.assistantSpeakingSubject.next(false);
947
- }
948
- return;
949
- }
950
- if (typeStr === 'user_speaking') {
951
- if (msg.active === true || msg.speaking === true) {
952
- this.serverUserSpeakingSubject.next(true);
953
- }
954
- else if (msg.active === false || msg.speaking === false) {
955
- this.serverUserSpeakingSubject.next(false);
956
- }
957
- return;
958
- }
959
- if (typeStr === 'input_audio_buffer.speech_started') {
960
- this.serverUserSpeakingSubject.next(true);
961
- return;
962
- }
963
- if (typeStr === 'input_audio_buffer.speech_stopped') {
964
- this.serverUserSpeakingSubject.next(false);
965
- return;
966
- }
967
- if (typeStr === 'response.audio.delta') {
968
- this.assistantSpeakingSubject.next(true);
969
- return;
970
- }
971
- if (typeStr === 'response.audio.done' ||
972
- typeStr === 'response.output_audio.done') {
973
- this.assistantSpeakingSubject.next(false);
974
- return;
975
- }
976
- if (typeStr === 'user_transcript' && typeof msg.text === 'string') {
977
- this.userTranscriptSubject.next({
978
- text: msg.text,
979
- final: msg.final === true,
980
- });
981
- return;
982
- }
983
- if (typeStr === 'bot_transcript' && typeof msg.text === 'string') {
984
- this.botTranscriptSubject.next(msg.text);
985
- }
986
- }
987
- disconnect() {
988
- if (!this.ws) {
989
- return;
990
- }
991
- this.closeInitiatedByClient = true;
992
- this.ws.close();
993
- }
994
- get isConnected() {
995
- var _a;
996
- return ((_a = this.ws) === null || _a === void 0 ? void 0 : _a.readyState) === WebSocket.OPEN;
997
- }
998
- }
999
- WebSocketVoiceClientService.ɵprov = i0.ɵɵdefineInjectable({ factory: function WebSocketVoiceClientService_Factory() { return new WebSocketVoiceClientService(i0.ɵɵinject(i0.NgZone)); }, token: WebSocketVoiceClientService, providedIn: "root" });
1000
- WebSocketVoiceClientService.decorators = [
1001
- { type: Injectable, args: [{
1002
- providedIn: 'root',
1003
- },] }
1004
- ];
1005
- WebSocketVoiceClientService.ctorParameters = () => [
1006
- { type: NgZone }
1007
- ];
1008
-
1009
- /**
1010
- * Voice agent orchestrator: single WebSocket (`ws_url` from POST /ai/ask-voice-socket)
1011
- * for session events, transcripts, and optional speaking hints; local mic for capture
1012
- * and waveform only (no Daily/WebRTC room).
812
+ * Audio flow (mirrors the React reference implementation):
813
+ * - Local mic: acquired by PipecatClient.initDevices(); local track fed to
814
+ * AudioAnalyzerService for waveform visualisation.
815
+ * - Bot audio: received as a MediaStreamTrack via RTVIEvent.TrackStarted,
816
+ * played through a hidden <audio> element.
817
+ * - All binary protobuf framing / RTVI protocol handled by
818
+ * @pipecat-ai/client-js + @pipecat-ai/websocket-transport.
1013
819
  */
1014
820
  class VoiceAgentService {
1015
- constructor(audioAnalyzer, wsClient, platformTokenRefresh,
821
+ constructor(audioAnalyzer, platformTokenRefresh, ngZone,
1016
822
  /** `Object` not `object` — ngc metadata collection rejects the `object` type in DI params. */
1017
823
  platformId) {
1018
824
  this.audioAnalyzer = audioAnalyzer;
1019
- this.wsClient = wsClient;
1020
825
  this.platformTokenRefresh = platformTokenRefresh;
826
+ this.ngZone = ngZone;
1021
827
  this.platformId = platformId;
1022
828
  this.callStateSubject = new BehaviorSubject('idle');
1023
829
  this.statusTextSubject = new BehaviorSubject('');
@@ -1029,11 +835,8 @@ class VoiceAgentService {
1029
835
  this.botTranscriptSubject = new Subject();
1030
836
  this.callStartTime = 0;
1031
837
  this.durationInterval = null;
1032
- this.localMicStream = null;
1033
- this.remoteAudioContext = null;
1034
- this.pendingRemoteAudio = [];
1035
- this.remoteAudioPlaying = false;
1036
- this.endCall$ = new Subject();
838
+ this.pcClient = null;
839
+ this.botAudioElement = null;
1037
840
  this.subscriptions = new Subscription();
1038
841
  this.destroy$ = new Subject();
1039
842
  this.callState$ = this.callStateSubject.asObservable();
@@ -1045,29 +848,17 @@ class VoiceAgentService {
1045
848
  this.userTranscript$ = this.userTranscriptSubject.asObservable();
1046
849
  this.botTranscript$ = this.botTranscriptSubject.asObservable();
1047
850
  this.subscriptions.add(this.audioAnalyzer.audioLevels$.subscribe((levels) => this.audioLevelsSubject.next(levels)));
1048
- this.subscriptions.add(this.wsClient.remoteClose$
1049
- .pipe(takeUntil(this.destroy$))
1050
- .subscribe(() => void this.handleRemoteClose()));
1051
- this.subscriptions.add(this.wsClient.audioChunk$
1052
- .pipe(takeUntil(this.destroy$))
1053
- .subscribe((chunk) => this.enqueueRemoteAudio(chunk)));
1054
851
  }
1055
852
  ngOnDestroy() {
1056
853
  this.destroy$.next();
1057
854
  this.subscriptions.unsubscribe();
1058
- this.disconnect();
855
+ void this.disconnect();
1059
856
  }
1060
- /** Reset to idle state (e.g. when modal opens so user can click Start Call). */
857
+ /** Reset to idle (e.g. when modal re-opens so user can click Start Call). */
1061
858
  resetToIdle() {
1062
859
  if (this.callStateSubject.value === 'idle')
1063
860
  return;
1064
- this.endCall$.next();
1065
- this.stopDurationTimer();
1066
- this.callStartTime = 0;
1067
- this.audioAnalyzer.stop();
1068
- this.stopLocalMic();
1069
- this.resetRemoteAudioPlayback();
1070
- this.wsClient.disconnect();
861
+ void this.disconnect();
1071
862
  this.callStateSubject.next('idle');
1072
863
  this.statusTextSubject.next('');
1073
864
  this.durationSubject.next('0:00');
@@ -1075,7 +866,7 @@ class VoiceAgentService {
1075
866
  connect(apiUrl, token, botId, conversationId, apiKey, eventToken, eventId, eventUrl, domainAuthority, usersApiUrl) {
1076
867
  return __awaiter(this, void 0, void 0, function* () {
1077
868
  if (this.callStateSubject.value !== 'idle') {
1078
- console.warn('Call already in progress');
869
+ console.warn('[HiveGpt Voice] Call already in progress');
1079
870
  return;
1080
871
  }
1081
872
  try {
@@ -1088,257 +879,196 @@ class VoiceAgentService {
1088
879
  .ensureValidAccessToken(token, usersApiUrl)
1089
880
  .pipe(take(1))
1090
881
  .toPromise();
1091
- if (ensured === null || ensured === void 0 ? void 0 : ensured.accessToken) {
882
+ if (ensured === null || ensured === void 0 ? void 0 : ensured.accessToken)
1092
883
  accessToken = ensured.accessToken;
1093
- }
1094
884
  }
1095
885
  catch (e) {
1096
- console.warn('[HiveGpt Voice] Token refresh before connect failed', e);
886
+ console.warn('[HiveGpt Voice] Token refresh failed', e);
1097
887
  }
1098
888
  }
1099
889
  const baseUrl = apiUrl.replace(/\/$/, '');
1100
- const postUrl = `${baseUrl}/ai/ask-voice-socket`;
1101
- const headers = {
1102
- 'Content-Type': 'application/json',
1103
- Authorization: `Bearer ${accessToken}`,
1104
- 'x-api-key': apiKey,
1105
- 'hive-bot-id': botId,
1106
- 'domain-authority': domainAuthority,
1107
- eventUrl,
1108
- eventId,
1109
- eventToken,
1110
- 'ngrok-skip-browser-warning': 'true',
1111
- };
1112
- const res = yield fetch(postUrl, {
1113
- method: 'POST',
1114
- headers,
1115
- body: JSON.stringify({
890
+ const pcClient = new PipecatClient({
891
+ transport: new WebSocketTransport(),
892
+ enableMic: true,
893
+ enableCam: false,
894
+ callbacks: {
895
+ onConnected: () => this.ngZone.run(() => this.onPipecatConnected()),
896
+ onDisconnected: () => this.ngZone.run(() => this.onPipecatDisconnected()),
897
+ onBotReady: () => this.ngZone.run(() => this.onBotReady()),
898
+ onUserTranscript: (data) => this.ngZone.run(() => this.userTranscriptSubject.next({ text: data.text, final: !!data.final })),
899
+ onBotTranscript: (data) => this.ngZone.run(() => this.botTranscriptSubject.next(data.text)),
900
+ onError: (err) => {
901
+ this.ngZone.run(() => {
902
+ console.error('[HiveGpt Voice] PipecatClient error', err);
903
+ this.callStateSubject.next('ended');
904
+ this.statusTextSubject.next('Connection failed');
905
+ });
906
+ },
907
+ },
908
+ });
909
+ this.pcClient = pcClient;
910
+ // Bot audio arrives as a MediaStreamTrack — wire to a hidden <audio> element
911
+ pcClient.on(RTVIEvent.TrackStarted, (track, participant) => {
912
+ if (!(participant === null || participant === void 0 ? void 0 : participant.local) && track.kind === 'audio') {
913
+ this.ngZone.run(() => this.setupBotAudioTrack(track));
914
+ }
915
+ });
916
+ // Speaking state comes straight from RTVI events
917
+ pcClient.on(RTVIEvent.BotStartedSpeaking, () => this.ngZone.run(() => this.onBotStartedSpeaking()));
918
+ pcClient.on(RTVIEvent.BotStoppedSpeaking, () => this.ngZone.run(() => this.onBotStoppedSpeaking()));
919
+ pcClient.on(RTVIEvent.UserStartedSpeaking, () => this.ngZone.run(() => {
920
+ this.isUserSpeakingSubject.next(true);
921
+ this.callStateSubject.next('listening');
922
+ }));
923
+ pcClient.on(RTVIEvent.UserStoppedSpeaking, () => this.ngZone.run(() => {
924
+ this.isUserSpeakingSubject.next(false);
925
+ if (this.callStateSubject.value === 'listening') {
926
+ this.callStateSubject.next('connected');
927
+ }
928
+ }));
929
+ // Acquire mic (triggers browser permission prompt)
930
+ yield pcClient.initDevices();
931
+ // Build headers using the browser Headers API (required by pipecat's APIRequest type)
932
+ const requestHeaders = new Headers();
933
+ requestHeaders.append('Authorization', `Bearer ${accessToken}`);
934
+ requestHeaders.append('x-api-key', apiKey);
935
+ requestHeaders.append('hive-bot-id', botId);
936
+ requestHeaders.append('domain-authority', domainAuthority);
937
+ requestHeaders.append('eventUrl', eventUrl);
938
+ requestHeaders.append('eventId', eventId);
939
+ requestHeaders.append('eventToken', eventToken);
940
+ requestHeaders.append('ngrok-skip-browser-warning', 'true');
941
+ // POST to /ai/ask-voice-socket → receives { ws_url } → WebSocketTransport connects
942
+ yield pcClient.startBotAndConnect({
943
+ endpoint: `${baseUrl}/ai/ask-voice-socket`,
944
+ headers: requestHeaders,
945
+ requestData: {
1116
946
  bot_id: botId,
1117
947
  conversation_id: conversationId,
1118
948
  voice: 'alloy',
1119
- }),
949
+ },
1120
950
  });
1121
- if (!res.ok) {
1122
- throw new Error(`HTTP ${res.status}`);
1123
- }
1124
- const json = yield res.json();
1125
- const wsUrl = (typeof (json === null || json === void 0 ? void 0 : json.ws_url) === 'string' && json.ws_url) ||
1126
- (typeof (json === null || json === void 0 ? void 0 : json.rn_ws_url) === 'string' && json.rn_ws_url);
1127
- if (!wsUrl) {
1128
- throw new Error('No ws_url in response');
1129
- }
1130
- const untilCallEnds$ = merge(this.destroy$, this.endCall$);
1131
- this.subscriptions.add(this.wsClient.userTranscript$
1132
- .pipe(takeUntil(untilCallEnds$))
1133
- .subscribe((t) => this.userTranscriptSubject.next(t)));
1134
- this.subscriptions.add(this.wsClient.botTranscript$
1135
- .pipe(takeUntil(untilCallEnds$))
1136
- .subscribe((t) => this.botTranscriptSubject.next(t)));
1137
- this.subscriptions.add(this.wsClient.opened$
1138
- .pipe(takeUntil(untilCallEnds$), take(1))
1139
- .subscribe(() => void this.onWebsocketOpened()));
1140
- this.wsClient.connect(wsUrl);
1141
951
  }
1142
952
  catch (error) {
1143
- console.error('Error connecting voice agent:', error);
953
+ console.error('[HiveGpt Voice] connect failed', error);
1144
954
  this.callStateSubject.next('ended');
1145
- yield this.disconnect();
955
+ yield this.cleanupPipecatClient();
1146
956
  this.statusTextSubject.next('Connection failed');
1147
957
  throw error;
1148
958
  }
1149
959
  });
1150
960
  }
1151
- onWebsocketOpened() {
1152
- return __awaiter(this, void 0, void 0, function* () {
1153
- if (this.callStateSubject.value !== 'connecting') {
1154
- return;
1155
- }
1156
- try {
1157
- yield this.startLocalMic();
1158
- this.statusTextSubject.next('Connected');
1159
- this.callStateSubject.next('connected');
1160
- this.wireSpeakingState();
1161
- }
1162
- catch (err) {
1163
- console.error('[HiveGpt Voice] Mic or session setup failed', err);
1164
- this.callStateSubject.next('ended');
1165
- this.statusTextSubject.next('Microphone unavailable');
1166
- yield this.disconnect();
1167
- }
1168
- });
1169
- }
1170
- wireSpeakingState() {
1171
- const untilCallEnds$ = merge(this.destroy$, this.endCall$);
1172
- const transcriptDrivenAssistant$ = this.wsClient.botTranscript$.pipe(switchMap(() => concat(of(true), timer(800).pipe(map(() => false)))), distinctUntilChanged());
1173
- const assistantTalking$ = merge(this.wsClient.assistantSpeaking$, transcriptDrivenAssistant$).pipe(distinctUntilChanged(), startWith(false));
1174
- const userTalking$ = combineLatest([
1175
- this.audioAnalyzer.isUserSpeaking$,
1176
- this.wsClient.serverUserSpeaking$.pipe(startWith(false)),
1177
- ]).pipe(map(([local, server]) => local || server), distinctUntilChanged(), startWith(false));
1178
- this.subscriptions.add(combineLatest([assistantTalking$, userTalking$])
1179
- .pipe(takeUntil(untilCallEnds$))
1180
- .subscribe(([bot, user]) => {
1181
- const current = this.callStateSubject.value;
1182
- if (user) {
1183
- this.isUserSpeakingSubject.next(true);
1184
- this.callStateSubject.next('listening');
1185
- }
1186
- else {
1187
- this.isUserSpeakingSubject.next(false);
1188
- }
1189
- if (user) {
1190
- return;
1191
- }
1192
- if (bot) {
1193
- if (this.callStartTime === 0) {
1194
- this.callStartTime = Date.now();
1195
- this.startDurationTimer();
1196
- }
1197
- this.callStateSubject.next('talking');
1198
- }
1199
- else if (current === 'talking' || current === 'listening') {
1200
- this.callStateSubject.next('connected');
1201
- }
1202
- }));
961
+ onPipecatConnected() {
962
+ this.callStateSubject.next('connected');
963
+ this.statusTextSubject.next('Connected');
964
+ this.isMicMutedSubject.next(false);
965
+ this.startLocalMicAnalyzer();
1203
966
  }
1204
- startLocalMic() {
1205
- return __awaiter(this, void 0, void 0, function* () {
1206
- this.stopLocalMic();
1207
- const stream = yield navigator.mediaDevices.getUserMedia({ audio: true });
1208
- const track = stream.getAudioTracks()[0];
1209
- if (!track) {
1210
- stream.getTracks().forEach((t) => t.stop());
1211
- throw new Error('No audio track');
1212
- }
1213
- this.localMicStream = stream;
1214
- this.isMicMutedSubject.next(!track.enabled);
1215
- this.audioAnalyzer.start(stream);
1216
- });
1217
- }
1218
- stopLocalMic() {
1219
- if (this.localMicStream) {
1220
- this.localMicStream.getTracks().forEach((t) => t.stop());
1221
- this.localMicStream = null;
1222
- }
1223
- }
1224
- enqueueRemoteAudio(chunk) {
1225
- this.pendingRemoteAudio.push(chunk.slice(0));
1226
- if (!this.remoteAudioPlaying) {
1227
- void this.playRemoteAudioQueue();
1228
- }
967
+ onPipecatDisconnected() {
968
+ this.stopDurationTimer();
969
+ this.callStartTime = 0;
970
+ this.audioAnalyzer.stop();
971
+ this.stopBotAudio();
972
+ this.callStateSubject.next('ended');
973
+ this.statusTextSubject.next('Call Ended');
1229
974
  }
1230
- playRemoteAudioQueue() {
1231
- return __awaiter(this, void 0, void 0, function* () {
1232
- this.remoteAudioPlaying = true;
1233
- const context = this.getOrCreateRemoteAudioContext();
1234
- while (this.pendingRemoteAudio.length > 0) {
1235
- const chunk = this.pendingRemoteAudio.shift();
1236
- if (!chunk)
1237
- continue;
1238
- try {
1239
- const decoded = yield this.decodeAudioChunk(context, chunk);
1240
- this.assistantAudioStarted();
1241
- yield this.playDecodedBuffer(context, decoded);
1242
- }
1243
- catch (_a) {
1244
- // Ignore undecodable chunks; server may mix non-audio binary events.
1245
- }
1246
- }
1247
- this.remoteAudioPlaying = false;
1248
- this.assistantAudioStopped();
1249
- });
975
+ onBotReady() {
976
+ var _a, _b, _c;
977
+ // Retry track wiring in case tracks weren't ready at onConnected
978
+ this.startLocalMicAnalyzer();
979
+ const botTrack = (_c = (_b = (_a = this.pcClient) === null || _a === void 0 ? void 0 : _a.tracks()) === null || _b === void 0 ? void 0 : _b.bot) === null || _c === void 0 ? void 0 : _c.audio;
980
+ if (botTrack)
981
+ this.setupBotAudioTrack(botTrack);
1250
982
  }
1251
- getOrCreateRemoteAudioContext() {
1252
- if (!this.remoteAudioContext || this.remoteAudioContext.state === 'closed') {
1253
- this.remoteAudioContext = new AudioContext();
1254
- }
1255
- if (this.remoteAudioContext.state === 'suspended') {
1256
- void this.remoteAudioContext.resume();
983
+ startLocalMicAnalyzer() {
984
+ var _a, _b, _c;
985
+ const localTrack = (_c = (_b = (_a = this.pcClient) === null || _a === void 0 ? void 0 : _a.tracks()) === null || _b === void 0 ? void 0 : _b.local) === null || _c === void 0 ? void 0 : _c.audio;
986
+ if (localTrack) {
987
+ this.audioAnalyzer.start(new MediaStream([localTrack]));
1257
988
  }
1258
- return this.remoteAudioContext;
1259
989
  }
1260
- decodeAudioChunk(context, chunk) {
1261
- return new Promise((resolve, reject) => {
1262
- context.decodeAudioData(chunk.slice(0), resolve, reject);
1263
- });
1264
- }
1265
- playDecodedBuffer(context, buffer) {
1266
- return new Promise((resolve) => {
1267
- const source = context.createBufferSource();
1268
- source.buffer = buffer;
1269
- source.connect(context.destination);
1270
- source.onended = () => resolve();
1271
- source.start();
1272
- });
1273
- }
1274
- assistantAudioStarted() {
990
+ onBotStartedSpeaking() {
1275
991
  if (this.callStartTime === 0) {
1276
992
  this.callStartTime = Date.now();
1277
993
  this.startDurationTimer();
1278
994
  }
1279
995
  this.callStateSubject.next('talking');
1280
996
  }
1281
- assistantAudioStopped() {
997
+ onBotStoppedSpeaking() {
1282
998
  if (this.callStateSubject.value === 'talking') {
1283
999
  this.callStateSubject.next('connected');
1284
1000
  }
1285
1001
  }
1286
- resetRemoteAudioPlayback() {
1287
- this.pendingRemoteAudio = [];
1288
- this.remoteAudioPlaying = false;
1289
- if (this.remoteAudioContext && this.remoteAudioContext.state !== 'closed') {
1290
- this.remoteAudioContext.close().catch(() => { });
1002
+ setupBotAudioTrack(track) {
1003
+ var _a;
1004
+ if (!this.botAudioElement) {
1005
+ this.botAudioElement = new Audio();
1006
+ this.botAudioElement.autoplay = true;
1291
1007
  }
1292
- this.remoteAudioContext = null;
1008
+ const existing = (_a = this.botAudioElement.srcObject) === null || _a === void 0 ? void 0 : _a.getAudioTracks()[0];
1009
+ if ((existing === null || existing === void 0 ? void 0 : existing.id) === track.id)
1010
+ return;
1011
+ this.botAudioElement.srcObject = new MediaStream([track]);
1012
+ this.botAudioElement.play().catch((err) => console.warn('[HiveGpt Voice] Bot audio play blocked', err));
1293
1013
  }
1294
- handleRemoteClose() {
1295
- return __awaiter(this, void 0, void 0, function* () {
1296
- const state = this.callStateSubject.value;
1297
- if (state === 'idle' || state === 'ended')
1298
- return;
1299
- this.endCall$.next();
1300
- this.stopDurationTimer();
1301
- this.callStartTime = 0;
1302
- this.audioAnalyzer.stop();
1303
- this.stopLocalMic();
1304
- this.resetRemoteAudioPlayback();
1305
- this.callStateSubject.next('ended');
1306
- this.statusTextSubject.next('Connection lost');
1307
- });
1014
+ stopBotAudio() {
1015
+ var _a;
1016
+ if (this.botAudioElement) {
1017
+ try {
1018
+ this.botAudioElement.pause();
1019
+ (_a = this.botAudioElement.srcObject) === null || _a === void 0 ? void 0 : _a.getAudioTracks().forEach((t) => t.stop());
1020
+ this.botAudioElement.srcObject = null;
1021
+ }
1022
+ catch (_b) {
1023
+ // ignore
1024
+ }
1025
+ this.botAudioElement = null;
1026
+ }
1308
1027
  }
1309
1028
  disconnect() {
1310
1029
  return __awaiter(this, void 0, void 0, function* () {
1311
- this.endCall$.next();
1312
1030
  this.stopDurationTimer();
1313
1031
  this.callStartTime = 0;
1314
1032
  this.audioAnalyzer.stop();
1315
- this.stopLocalMic();
1316
- this.resetRemoteAudioPlayback();
1317
- this.wsClient.disconnect();
1033
+ this.stopBotAudio();
1034
+ yield this.cleanupPipecatClient();
1318
1035
  this.callStateSubject.next('ended');
1319
1036
  this.statusTextSubject.next('Call Ended');
1320
1037
  });
1321
1038
  }
1039
+ cleanupPipecatClient() {
1040
+ return __awaiter(this, void 0, void 0, function* () {
1041
+ if (this.pcClient) {
1042
+ try {
1043
+ yield this.pcClient.disconnect();
1044
+ }
1045
+ catch (_a) {
1046
+ // ignore
1047
+ }
1048
+ this.pcClient = null;
1049
+ }
1050
+ });
1051
+ }
1322
1052
  toggleMic() {
1323
- var _a;
1053
+ if (!this.pcClient)
1054
+ return;
1324
1055
  const nextMuted = !this.isMicMutedSubject.value;
1325
- const track = (_a = this.localMicStream) === null || _a === void 0 ? void 0 : _a.getAudioTracks()[0];
1326
- if (track) {
1327
- track.enabled = !nextMuted;
1328
- }
1056
+ this.pcClient.enableMic(!nextMuted);
1329
1057
  this.isMicMutedSubject.next(nextMuted);
1058
+ if (nextMuted)
1059
+ this.isUserSpeakingSubject.next(false);
1330
1060
  }
1331
1061
  startDurationTimer() {
1332
- const updateDuration = () => {
1062
+ const tick = () => {
1333
1063
  if (this.callStartTime > 0) {
1334
1064
  const elapsed = Math.floor((Date.now() - this.callStartTime) / 1000);
1335
- const minutes = Math.floor(elapsed / 60);
1336
- const seconds = elapsed % 60;
1337
- this.durationSubject.next(`${minutes}:${String(seconds).padStart(2, '0')}`);
1065
+ const m = Math.floor(elapsed / 60);
1066
+ const s = elapsed % 60;
1067
+ this.durationSubject.next(`${m}:${String(s).padStart(2, '0')}`);
1338
1068
  }
1339
1069
  };
1340
- updateDuration();
1341
- this.durationInterval = setInterval(updateDuration, 1000);
1070
+ tick();
1071
+ this.durationInterval = setInterval(tick, 1000);
1342
1072
  }
1343
1073
  stopDurationTimer() {
1344
1074
  if (this.durationInterval) {
@@ -1347,7 +1077,7 @@ class VoiceAgentService {
1347
1077
  }
1348
1078
  }
1349
1079
  }
1350
- VoiceAgentService.ɵprov = i0.ɵɵdefineInjectable({ factory: function VoiceAgentService_Factory() { return new VoiceAgentService(i0.ɵɵinject(AudioAnalyzerService), i0.ɵɵinject(WebSocketVoiceClientService), i0.ɵɵinject(PlatformTokenRefreshService), i0.ɵɵinject(i0.PLATFORM_ID)); }, token: VoiceAgentService, providedIn: "root" });
1080
+ VoiceAgentService.ɵprov = i0.ɵɵdefineInjectable({ factory: function VoiceAgentService_Factory() { return new VoiceAgentService(i0.ɵɵinject(AudioAnalyzerService), i0.ɵɵinject(PlatformTokenRefreshService), i0.ɵɵinject(i0.NgZone), i0.ɵɵinject(i0.PLATFORM_ID)); }, token: VoiceAgentService, providedIn: "root" });
1351
1081
  VoiceAgentService.decorators = [
1352
1082
  { type: Injectable, args: [{
1353
1083
  providedIn: 'root',
@@ -1355,8 +1085,8 @@ VoiceAgentService.decorators = [
1355
1085
  ];
1356
1086
  VoiceAgentService.ctorParameters = () => [
1357
1087
  { type: AudioAnalyzerService },
1358
- { type: WebSocketVoiceClientService },
1359
1088
  { type: PlatformTokenRefreshService },
1089
+ { type: NgZone },
1360
1090
  { type: Object, decorators: [{ type: Inject, args: [PLATFORM_ID,] }] }
1361
1091
  ];
1362
1092
 
@@ -5395,8 +5125,8 @@ ChatBotComponent.propDecorators = {
5395
5125
  };
5396
5126
 
5397
5127
  /**
5398
- * Voice agent module. Uses native WebSocket for the voice session.
5399
- * Does NOT use Socket.IO or ngx-socket-io.
5128
+ * Voice agent module. Uses @pipecat-ai/client-js + @pipecat-ai/websocket-transport
5129
+ * (peer dependencies) for WebSocket transport, RTVI protocol, and audio.
5400
5130
  */
5401
5131
  class VoiceAgentModule {
5402
5132
  }
@@ -5411,7 +5141,6 @@ VoiceAgentModule.decorators = [
5411
5141
  providers: [
5412
5142
  VoiceAgentService,
5413
5143
  AudioAnalyzerService,
5414
- WebSocketVoiceClientService
5415
5144
  ],
5416
5145
  exports: [
5417
5146
  VoiceAgentModalComponent
@@ -5682,5 +5411,5 @@ HiveGptModule.decorators = [
5682
5411
  * Generated bundle index. Do not edit.
5683
5412
  */
5684
5413
 
5685
- export { AudioAnalyzerService, ChatBotComponent, ChatDrawerComponent, HIVEGPT_AUTH_STORAGE_KEY, HiveGptModule, PlatformTokenRefreshService, VOICE_MODAL_CLOSE_CALLBACK, VOICE_MODAL_CONFIG, VoiceAgentModalComponent, VoiceAgentModule, VoiceAgentService, eClassificationType, hiveGptAuthStorageKeyFactory, BotsService as ɵa, SocketService as ɵb, ConversationService as ɵc, NotificationSocket as ɵd, TranslationService as ɵe, WebSocketVoiceClientService as ɵf, VideoPlayerComponent as ɵg, SafeHtmlPipe as ɵh, BotHtmlEditorComponent as ɵi };
5414
+ export { AudioAnalyzerService, ChatBotComponent, ChatDrawerComponent, HIVEGPT_AUTH_STORAGE_KEY, HiveGptModule, PlatformTokenRefreshService, VOICE_MODAL_CLOSE_CALLBACK, VOICE_MODAL_CONFIG, VoiceAgentModalComponent, VoiceAgentModule, VoiceAgentService, eClassificationType, hiveGptAuthStorageKeyFactory, BotsService as ɵa, SocketService as ɵb, ConversationService as ɵc, NotificationSocket as ɵd, TranslationService as ɵe, VideoPlayerComponent as ɵf, SafeHtmlPipe as ɵg, BotHtmlEditorComponent as ɵh };
5686
5415
  //# sourceMappingURL=hivegpt-hiveai-angular.js.map