@decentchat/decentclaw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,882 @@
1
+ /**
2
+ * BotHuddleManager — Huddle signaling + WebRTC audio for the bot (Node.js side).
3
+ *
4
+ * Handles huddle-* signal types, creates real WebRTC PeerConnections via
5
+ * node-datachannel, and wires incoming audio through AudioPipeline → STT → LLM → TTS.
6
+ *
7
+ * Signal types handled:
8
+ * huddle-announce — someone started a huddle; auto-join if enabled
9
+ * huddle-join — someone joined the huddle
10
+ * huddle-leave — someone left the huddle
11
+ * huddle-offer — WebRTC offer → create PC, generate answer
12
+ * huddle-answer — WebRTC answer → setRemoteDescription
13
+ * huddle-ice — ICE candidate → addRemoteCandidate
14
+ * huddle-mute — mute state change
15
+ */
16
+
17
+ import ndc from 'node-datachannel';
18
+ import { AudioPipeline } from './AudioPipeline.js';
19
+ import { SpeechToText } from './SpeechToText.js';
20
+ import { TextToSpeech } from './TextToSpeech.js';
21
+
22
+ export interface BotHuddleCallbacks {
23
+ sendSignal: (peerId: string, data: object) => boolean;
24
+ broadcastSignal: (data: object) => void;
25
+ getDisplayName: (peerId: string) => string;
26
+ onTranscription?: (text: string, peerId: string, channelId: string) => Promise<string | undefined>;
27
+ log?: { info: (s: string) => void; warn?: (s: string) => void; error?: (s: string) => void };
28
+ }
29
+
30
+ export interface BotHuddleConfig {
31
+ autoJoin?: boolean;
32
+ sttEngine?: 'whisper-cpp' | 'whisper-python' | 'openai' | 'groq';
33
+ whisperModel?: string;
34
+ sttLanguage?: string;
35
+ sttApiKey?: string;
36
+ ttsVoice?: string;
37
+ vadSilenceMs?: number;
38
+ vadThreshold?: number;
39
+ }
40
+
41
+ export type BotHuddleState = 'inactive' | 'listening' | 'in-call';
42
+
43
+ export interface BotHuddleParticipant {
44
+ peerId: string;
45
+ displayName: string;
46
+ muted: boolean;
47
+ }
48
+
49
+ interface PeerAudioState {
50
+ pc: ndc.PeerConnection;
51
+ track: ndc.Track | null;
52
+ rtpConfig?: any; // RtpPacketizationConfig — needed for manual timestamp control
53
+ }
54
+
55
+ export class BotHuddleManager {
56
+ private state: BotHuddleState = 'inactive';
57
+ private activeChannelId: string | null = null;
58
+ private readonly myPeerId: string;
59
+ private readonly callbacks: BotHuddleCallbacks;
60
+ private readonly participants = new Map<string, BotHuddleParticipant>();
61
+ private readonly autoJoin: boolean;
62
+
63
+ // WebRTC state per peer
64
+ private readonly peerConnections = new Map<string, PeerAudioState>();
65
+ private readonly audioTracks = new Map<string, ndc.Track>(); // receiving tracks (from onTrack)
66
+ private readonly sendTracks = new Map<string, ndc.Track>(); // sending tracks (from addTrack)
67
+
68
+ // Audio pipeline components
69
+ private audioPipeline: AudioPipeline;
70
+ private stt: SpeechToText;
71
+ private tts: TextToSpeech | null = null;
72
+
73
+ // Track which peer is currently speaking (for attribution)
74
+ private currentSpeakerPeerId: string | null = null;
75
+
76
+ // Prevent overlapping responses
77
+ private isProcessing = false;
78
+ // Barge-in: abort current TTS send when user starts talking
79
+ private abortSending = false;
80
+
81
+ constructor(myPeerId: string, callbacks: BotHuddleCallbacks, opts?: BotHuddleConfig) {
82
+ this.myPeerId = myPeerId;
83
+ this.callbacks = callbacks;
84
+ this.autoJoin = opts?.autoJoin ?? true;
85
+
86
+ // Initialize audio pipeline
87
+ this.audioPipeline = new AudioPipeline({
88
+ sampleRate: 48000,
89
+ channels: 1,
90
+ vadThreshold: opts?.vadThreshold ?? 0.02,
91
+ vadSilenceMs: opts?.vadSilenceMs ?? 500,
92
+ onSpeechStart: () => {
93
+ if (this.isProcessing) {
94
+ this.log('info', '[bot-huddle] barge-in detected — aborting current response');
95
+ this.abortSending = true;
96
+ this.emitStatus('interrupted');
97
+ } else {
98
+ this.emitStatus('hearing');
99
+ }
100
+ },
101
+ onSpeechEnd: (pcm) => this.handleSpeechEnd(pcm),
102
+ log: callbacks.log,
103
+ });
104
+
105
+ // Initialize STT
106
+ this.stt = new SpeechToText({
107
+ engine: opts?.sttEngine ?? 'whisper-cpp',
108
+ model: opts?.whisperModel ?? 'medium',
109
+ language: opts?.sttLanguage,
110
+ apiKey: opts?.sttApiKey,
111
+ log: callbacks.log,
112
+ });
113
+
114
+ // Initialize TTS (needs API key from env)
115
+ const elevenLabsKey = process.env.ELEVENLABS_API_KEY;
116
+ if (elevenLabsKey) {
117
+ this.tts = new TextToSpeech({
118
+ apiKey: elevenLabsKey,
119
+ voiceId: this.resolveVoiceId(opts?.ttsVoice),
120
+ language: opts?.sttLanguage, // same language for TTS pronunciation
121
+ log: callbacks.log,
122
+ });
123
+ } else {
124
+ this.log('warn', '[bot-huddle] ELEVENLABS_API_KEY not set — TTS disabled');
125
+ }
126
+ }
127
+
128
+ // ── Accessors ──────────────────────────────────────────────────────
129
+
130
+ getState(): BotHuddleState {
131
+ return this.state;
132
+ }
133
+
134
+ getActiveChannelId(): string | null {
135
+ return this.activeChannelId;
136
+ }
137
+
138
+ getParticipants(): BotHuddleParticipant[] {
139
+ return Array.from(this.participants.values());
140
+ }
141
+
142
+ // ── Signal dispatch ────────────────────────────────────────────────
143
+
144
+ async handleSignal(fromPeerId: string, data: any): Promise<void> {
145
+ const type = data?.type as string | undefined;
146
+ if (!type) return;
147
+
148
+ switch (type) {
149
+ case 'huddle-announce':
150
+ this.handleAnnounce(fromPeerId, data);
151
+ break;
152
+ case 'huddle-join':
153
+ this.handleJoin(fromPeerId, data);
154
+ break;
155
+ case 'huddle-leave':
156
+ this.handleLeave(fromPeerId, data);
157
+ break;
158
+ case 'huddle-offer':
159
+ this.handleOffer(fromPeerId, data);
160
+ break;
161
+ case 'huddle-answer':
162
+ this.handleAnswer(fromPeerId, data);
163
+ break;
164
+ case 'huddle-ice':
165
+ this.handleIce(fromPeerId, data);
166
+ break;
167
+ case 'huddle-stats':
168
+ this.log('info', `[bot-huddle] stats from ${fromPeerId.slice(0, 8)}: ${JSON.stringify(data.stats).slice(0, 200)}`);
169
+ break;
170
+ case 'huddle-mute':
171
+ this.handleMute(fromPeerId, data);
172
+ break;
173
+ default:
174
+ this.log('warn', `[bot-huddle] unknown huddle signal: ${type}`);
175
+ }
176
+ }
177
+
178
+ // ── Manual join/leave ──────────────────────────────────────────────
179
+
180
+ join(channelId: string): void {
181
+ if (this.state === 'in-call') return;
182
+
183
+ this.activeChannelId = channelId;
184
+ this.state = 'in-call';
185
+
186
+ this.callbacks.broadcastSignal({
187
+ type: 'huddle-join',
188
+ channelId,
189
+ peerId: this.myPeerId,
190
+ });
191
+
192
+ this.log('info', `[bot-huddle] joined huddle in ${channelId}`);
193
+ this.emitStatus('listening');
194
+ }
195
+
196
+ leave(): void {
197
+ if (this.state === 'inactive') return;
198
+
199
+ const channelId = this.activeChannelId;
200
+
201
+ // Cleanup all peer connections
202
+ for (const [peerId] of this.peerConnections) {
203
+ this.cleanupPeer(peerId);
204
+ }
205
+
206
+ this.participants.clear();
207
+ this.state = 'inactive';
208
+ this.activeChannelId = null;
209
+ this.audioPipeline.reset();
210
+
211
+ if (channelId) {
212
+ this.callbacks.broadcastSignal({
213
+ type: 'huddle-leave',
214
+ channelId,
215
+ peerId: this.myPeerId,
216
+ });
217
+ }
218
+
219
+ this.log('info', `[bot-huddle] left huddle in ${channelId ?? '(none)'}`);
220
+ }
221
+
222
+ // ── Signal handlers ────────────────────────────────────────────────
223
+
224
+ private handleAnnounce(fromPeerId: string, data: any): void {
225
+ const channelId = data.channelId as string;
226
+ this.log('info', `[bot-huddle] huddle-announce from ${fromPeerId} in ${channelId}`);
227
+
228
+ this.participants.set(fromPeerId, {
229
+ peerId: fromPeerId,
230
+ displayName: this.callbacks.getDisplayName(fromPeerId),
231
+ muted: false,
232
+ });
233
+
234
+ if (this.autoJoin && this.state === 'inactive') {
235
+ this.activeChannelId = channelId;
236
+ this.state = 'in-call';
237
+
238
+ this.callbacks.broadcastSignal({
239
+ type: 'huddle-join',
240
+ channelId,
241
+ peerId: this.myPeerId,
242
+ });
243
+
244
+ this.log('info', `[bot-huddle] auto-joined huddle in ${channelId}`);
245
+ this.emitStatus('listening');
246
+ } else if (this.state === 'in-call' && this.activeChannelId === channelId) {
247
+ // Already in a huddle — notify the announcing peer so they send us an offer
248
+ this.log('info', `[bot-huddle] already in-call, notifying announcer ${fromPeerId}`);
249
+ this.callbacks.sendSignal(fromPeerId, {
250
+ type: 'huddle-join',
251
+ channelId,
252
+ peerId: this.myPeerId,
253
+ });
254
+ }
255
+
256
+
257
+ }
258
+
259
+ private handleJoin(fromPeerId: string, data: any): void {
260
+ const channelId = data.channelId as string;
261
+ this.log('info', `[bot-huddle] huddle-join from ${fromPeerId} in ${channelId}`);
262
+
263
+ this.participants.set(fromPeerId, {
264
+ peerId: fromPeerId,
265
+ displayName: this.callbacks.getDisplayName(fromPeerId),
266
+ muted: false,
267
+ });
268
+
269
+ if (this.state === 'in-call' && this.activeChannelId === channelId) {
270
+ // Send a targeted huddle-join to the new peer so they discover us
271
+ // and send us an offer. This avoids SDP glare (both sides sending
272
+ // offers simultaneously) which kills the connection.
273
+ this.log('info', `[bot-huddle] notifying new peer ${fromPeerId} of our presence`);
274
+ this.callbacks.sendSignal(fromPeerId, {
275
+ type: 'huddle-join',
276
+ channelId,
277
+ peerId: this.myPeerId,
278
+ });
279
+ }
280
+
281
+
282
+
283
+ }
284
+
285
+ private handleLeave(fromPeerId: string, data: any): void {
286
+ const channelId = data.channelId as string;
287
+ this.log('info', `[bot-huddle] huddle-leave from ${fromPeerId} in ${channelId}`);
288
+
289
+ this.participants.delete(fromPeerId);
290
+ this.cleanupPeer(fromPeerId);
291
+
292
+ // If no participants left and we're in a call, go inactive
293
+ if (this.state === 'in-call' && this.participants.size === 0) {
294
+ this.state = 'inactive';
295
+ this.activeChannelId = null;
296
+ this.audioPipeline.reset();
297
+ this.log('info', `[bot-huddle] all participants left, going inactive`);
298
+ }
299
+ }
300
+
301
+ private handleOffer(fromPeerId: string, data: any): void {
302
+ this.log('info', `[bot-huddle] received offer from ${fromPeerId}`);
303
+
304
+ // Cleanup any existing PC for this peer
305
+ this.cleanupPeer(fromPeerId);
306
+
307
+ try {
308
+ const pc = new ndc.PeerConnection('bot-huddle', {
309
+ iceServers: ['stun:stun.l.google.com:19302'],
310
+ // AUTO-NEGOTIATION enabled so that SRTP sender is fully initialized.
311
+ // onLocalDescription filters out the spurious re-offer from addTrack.
312
+ });
313
+
314
+ // Store peer state (track set via onTrack)
315
+ const peerState: PeerAudioState = { pc, track: null };
316
+ this.peerConnections.set(fromPeerId, peerState);
317
+
318
+ // Extract Opus payload type from the browser's offer SDP.
319
+ // Chrome uses PT=111, Firefox/Zen uses PT=109. We MUST match.
320
+ const offerSdp = typeof data.sdp === 'object' ? data.sdp.sdp : data.sdp;
321
+ const opusPt = this.extractOpusPayloadType(offerSdp);
322
+ this.log('info', `[bot-huddle] browser Opus PT = ${opusPt}`);
323
+
324
+ // Single bidirectional track for both SEND and RECEIVE.
325
+ // addTrack with mid='0' matches the browser's offer m-line.
326
+ // node-datachannel uses this ONE track for both directions (onTrack won't fire).
327
+ const audio = new ndc.Audio('0', 'SendRecv');
328
+ audio.addOpusCodec(opusPt);
329
+ audio.addSSRC(1234, 'bot-audio', 'bot-stream', 'audio-track');
330
+ const track = pc.addTrack(audio);
331
+
332
+ // Media handler chain: RTCP Sender Reports + Receiving Session.
333
+ // Chrome requires RTCP SR to synchronize RTP timing and start decoding.
334
+ // Without this, Chrome receives SRTP packets but plays silence because
335
+ // it has no timing reference. We still construct RTP headers manually
336
+ // and send via sendMessageBinary — the media handler chain handles RTCP.
337
+ const rtpCfg = new ndc.RtpPacketizationConfig(1234, 'bot-audio', opusPt, 48000);
338
+ const srReporter = new ndc.RtcpSrReporter(rtpCfg);
339
+ srReporter.addToChain(new ndc.RtcpReceivingSession());
340
+ track.setMediaHandler(srReporter);
341
+
342
+ const manualRtp = {
343
+ ssrc: 1234,
344
+ payloadType: opusPt,
345
+ sequenceNumber: Math.floor(Math.random() * 65535),
346
+ timestamp: Math.floor(Math.random() * 0xFFFFFFFF),
347
+ };
348
+ peerState.rtpConfig = manualRtp as any;
349
+
350
+ peerState.track = track;
351
+ this.audioTracks.set(fromPeerId, track);
352
+ this.sendTracks.set(fromPeerId, track); // same track for sending
353
+
354
+ let msgCount = 0;
355
+ track.onMessage((buf: Buffer) => {
356
+ // CRITICAL: copy the buffer and defer processing to Node.js event loop.
357
+ // node-datachannel calls this from a native thread; doing work here
358
+ // (Opus decode, PCM conversion) can cause segfaults.
359
+ const copy = Buffer.from(buf);
360
+ msgCount++;
361
+ const n = msgCount;
362
+ setImmediate(() => {
363
+ try {
364
+ if (n <= 3 || n % 500 === 0) {
365
+ this.log('info', `[bot-huddle] track.onMessage #${n} from ${fromPeerId}, ${copy.length} bytes`);
366
+ }
367
+ this.currentSpeakerPeerId = fromPeerId;
368
+ this.audioPipeline.feedRtpPacket(copy);
369
+ } catch (err) {
370
+ this.log('error', `[bot-huddle] feedRtpPacket error: ${String(err)}`);
371
+ }
372
+ });
373
+ });
374
+
375
+ track.onOpen(() => {
376
+ this.log('info', `[bot-huddle] audio track opened for ${fromPeerId}`);
377
+ this.sendTracks.set(fromPeerId, track);
378
+ });
379
+
380
+ track.onClosed(() => {
381
+ this.log('info', `[bot-huddle] audio track closed for ${fromPeerId}`);
382
+ });
383
+
384
+ track.onError((err: string) => {
385
+ this.log('error', `[bot-huddle] audio track error for ${fromPeerId}: ${err}`);
386
+ });
387
+
388
+ // Only send the answer SDP
389
+ pc.onLocalDescription((sdp: string, type: string) => {
390
+ const lowerType = type.toLowerCase();
391
+ this.log('info', `[bot-huddle] onLocalDescription type=${lowerType} for ${fromPeerId} (${sdp.length} chars)\n${sdp}`);
392
+ if (lowerType !== 'answer') {
393
+ this.log('warn', `[bot-huddle] unexpected non-answer SDP (type=${lowerType}), ignoring`);
394
+ return;
395
+ }
396
+ this.callbacks.sendSignal(fromPeerId, {
397
+ type: 'huddle-answer',
398
+ sdp: { sdp, type: lowerType },
399
+ channelId: this.activeChannelId,
400
+ fromPeerId: this.myPeerId,
401
+ });
402
+ });
403
+
404
+ // Forward ICE candidates
405
+ pc.onLocalCandidate((candidate: string, mid: string) => {
406
+ this.callbacks.sendSignal(fromPeerId, {
407
+ type: 'huddle-ice',
408
+ candidate: { candidate, sdpMid: mid },
409
+ channelId: this.activeChannelId,
410
+ fromPeerId: this.myPeerId,
411
+ });
412
+ });
413
+
414
+ pc.onStateChange((state: string) => {
415
+ this.log('info', `[bot-huddle] PC state for ${fromPeerId}: ${state}`);
416
+ if (state === 'disconnected' || state === 'failed' || state === 'closed') {
417
+ this.cleanupPeer(fromPeerId);
418
+ }
419
+ });
420
+
421
+ // Extract SDP from the offer - handle both formats:
422
+ // Browser sends: { sdp: { sdp: 'v=0...', type: 'offer' } } OR { sdp: 'v=0...' }
423
+ let sdpString: string;
424
+ let sdpType: string;
425
+
426
+ if (typeof data.sdp === 'object' && data.sdp !== null) {
427
+ sdpString = data.sdp.sdp;
428
+ sdpType = data.sdp.type || 'offer';
429
+ } else if (typeof data.sdp === 'string') {
430
+ sdpString = data.sdp;
431
+ sdpType = 'offer';
432
+ } else {
433
+ this.log('error', `[bot-huddle] invalid SDP in offer from ${fromPeerId}`);
434
+ return;
435
+ }
436
+
437
+ // Normalize type to node-datachannel's DescriptionType format (e.g. 'Offer')
438
+ const normalizedType = sdpType.charAt(0).toUpperCase() + sdpType.slice(1).toLowerCase();
439
+
440
+ this.log('info', `[bot-huddle] setting remote description (type=${normalizedType}, sdp=${sdpString.length} chars)\n${sdpString}`);
441
+ pc.setRemoteDescription(sdpString, normalizedType as any);
442
+
443
+ // Auto-negotiation generates the answer automatically after setRemoteDescription.
444
+ // The onLocalDescription callback filters to only send the answer.
445
+
446
+ } catch (err) {
447
+ this.log('error', `[bot-huddle] failed to handle offer from ${fromPeerId}: ${String(err)}`);
448
+ this.cleanupPeer(fromPeerId);
449
+ }
450
+ }
451
+
452
+ private handleAnswer(fromPeerId: string, data: any): void {
453
+ this.log('info', `[bot-huddle] received answer from ${fromPeerId}`);
454
+
455
+ const peerState = this.peerConnections.get(fromPeerId);
456
+ if (!peerState) {
457
+ this.log('warn', `[bot-huddle] no PC found for answer from ${fromPeerId}`);
458
+ return;
459
+ }
460
+
461
+ try {
462
+ let sdpString: string;
463
+ let sdpType: string;
464
+
465
+ if (typeof data.sdp === 'object' && data.sdp !== null) {
466
+ sdpString = data.sdp.sdp;
467
+ sdpType = data.sdp.type || 'answer';
468
+ } else if (typeof data.sdp === 'string') {
469
+ sdpString = data.sdp;
470
+ sdpType = 'answer';
471
+ } else {
472
+ this.log('error', `[bot-huddle] invalid SDP in answer from ${fromPeerId}`);
473
+ return;
474
+ }
475
+
476
+ const normalizedType = sdpType.charAt(0).toUpperCase() + sdpType.slice(1).toLowerCase();
477
+ peerState.pc.setRemoteDescription(sdpString, normalizedType as any);
478
+ } catch (err) {
479
+ this.log('error', `[bot-huddle] failed to set answer from ${fromPeerId}: ${String(err)}`);
480
+ }
481
+ }
482
+
483
+ private handleIce(fromPeerId: string, data: any): void {
484
+ const peerState = this.peerConnections.get(fromPeerId);
485
+ if (!peerState) {
486
+ this.log('warn', `[bot-huddle] no PC found for ICE from ${fromPeerId}`);
487
+ return;
488
+ }
489
+
490
+ try {
491
+ // Browser sends: { candidate: { candidate: 'candidate:...', sdpMid: '0' } }
492
+ // OR: { candidate: 'candidate:...', sdpMid: '0' }
493
+ let candidateStr: string;
494
+ let mid: string;
495
+
496
+ if (typeof data.candidate === 'object' && data.candidate !== null) {
497
+ candidateStr = data.candidate.candidate;
498
+ mid = data.candidate.sdpMid ?? '0';
499
+ } else if (typeof data.candidate === 'string') {
500
+ candidateStr = data.candidate;
501
+ mid = data.sdpMid ?? '0';
502
+ } else {
503
+ this.log('warn', `[bot-huddle] invalid ICE candidate from ${fromPeerId}`);
504
+ return;
505
+ }
506
+
507
+ peerState.pc.addRemoteCandidate(candidateStr, mid);
508
+ } catch (err) {
509
+ this.log('error', `[bot-huddle] failed to add ICE from ${fromPeerId}: ${String(err)}`);
510
+ }
511
+ }
512
+
513
+ private handleMute(fromPeerId: string, data: any): void {
514
+ const muted = data.muted as boolean;
515
+ const participant = this.participants.get(fromPeerId);
516
+ if (participant) {
517
+ participant.muted = muted;
518
+ this.participants.set(fromPeerId, participant);
519
+ this.log('info', `[bot-huddle] ${fromPeerId} ${muted ? 'muted' : 'unmuted'}`);
520
+ }
521
+ }
522
+
523
+
524
+ // ── Initiate WebRTC connection to a peer (bot as offerer) ────────
525
+
526
+ private initiateConnectionTo(peerId: string): void {
527
+ // Skip if we already have a connection to this peer
528
+ const existing = this.peerConnections.get(peerId);
529
+ if (existing) {
530
+ this.log('info', `[bot-huddle] already have PC for ${peerId}, skipping initiation`);
531
+ return;
532
+ }
533
+
534
+ this.log('info', `[bot-huddle] initiating WebRTC connection to ${peerId}`);
535
+
536
+ try {
537
+ const pc = new ndc.PeerConnection('bot-huddle-init', {
538
+ iceServers: ['stun:stun.l.google.com:19302'],
539
+ });
540
+
541
+ const peerState: PeerAudioState = { pc, track: null };
542
+ this.peerConnections.set(peerId, peerState);
543
+
544
+ // Use default Opus PT=111 (Chrome standard).
545
+ // The browser's answer will confirm compatibility.
546
+ const opusPt = 111;
547
+
548
+ const audio = new ndc.Audio('0', 'SendRecv');
549
+ audio.addOpusCodec(opusPt);
550
+ audio.addSSRC(1234, 'bot-audio', 'bot-stream', 'audio-track');
551
+ const track = pc.addTrack(audio);
552
+
553
+ // Media handler chain for RTCP Sender Reports
554
+ const rtpCfg = new ndc.RtpPacketizationConfig(1234, 'bot-audio', opusPt, 48000);
555
+ const srReporter = new ndc.RtcpSrReporter(rtpCfg);
556
+ srReporter.addToChain(new ndc.RtcpReceivingSession());
557
+ track.setMediaHandler(srReporter);
558
+
559
+ const manualRtp = {
560
+ ssrc: 1234,
561
+ payloadType: opusPt,
562
+ sequenceNumber: Math.floor(Math.random() * 65535),
563
+ timestamp: Math.floor(Math.random() * 0xFFFFFFFF),
564
+ };
565
+ peerState.rtpConfig = manualRtp as any;
566
+ peerState.track = track;
567
+ this.audioTracks.set(peerId, track);
568
+ this.sendTracks.set(peerId, track);
569
+
570
+ // Wire incoming audio
571
+ let msgCount = 0;
572
+ track.onMessage((buf: Buffer) => {
573
+ const copy = Buffer.from(buf);
574
+ msgCount++;
575
+ const n = msgCount;
576
+ setImmediate(() => {
577
+ try {
578
+ if (n <= 3 || n % 500 === 0) {
579
+ this.log('info', `[bot-huddle] track.onMessage #${n} from ${peerId}, ${copy.length} bytes`);
580
+ }
581
+ this.currentSpeakerPeerId = peerId;
582
+ this.audioPipeline.feedRtpPacket(copy);
583
+ } catch (err) {
584
+ this.log('error', `[bot-huddle] feedRtpPacket error: ${String(err)}`);
585
+ }
586
+ });
587
+ });
588
+
589
+ track.onOpen(() => {
590
+ this.log('info', `[bot-huddle] audio track opened for ${peerId} (initiated)`);
591
+ this.sendTracks.set(peerId, track);
592
+ });
593
+
594
+ track.onClosed(() => {
595
+ this.log('info', `[bot-huddle] audio track closed for ${peerId} (initiated)`);
596
+ });
597
+
598
+ track.onError((err: string) => {
599
+ this.log('error', `[bot-huddle] audio track error for ${peerId}: ${err}`);
600
+ });
601
+
602
+ // Send the OFFER (not answer) — this is the key difference from handleOffer
603
+ pc.onLocalDescription((sdp: string, type: string) => {
604
+ const lowerType = type.toLowerCase();
605
+ this.log('info', `[bot-huddle] onLocalDescription (initiate) type=${lowerType} for ${peerId} (${sdp.length} chars)`);
606
+ if (lowerType !== 'offer') {
607
+ this.log('info', `[bot-huddle] ignoring non-offer SDP (type=${lowerType}) during initiation`);
608
+ return;
609
+ }
610
+ this.callbacks.sendSignal(peerId, {
611
+ type: 'huddle-offer',
612
+ sdp: { sdp, type: lowerType },
613
+ channelId: this.activeChannelId,
614
+ fromPeerId: this.myPeerId,
615
+ });
616
+ });
617
+
618
+ // Forward ICE candidates
619
+ pc.onLocalCandidate((candidate: string, mid: string) => {
620
+ this.callbacks.sendSignal(peerId, {
621
+ type: 'huddle-ice',
622
+ candidate: { candidate, sdpMid: mid },
623
+ channelId: this.activeChannelId,
624
+ fromPeerId: this.myPeerId,
625
+ });
626
+ });
627
+
628
+ pc.onStateChange((state: string) => {
629
+ this.log('info', `[bot-huddle] PC state (initiated) for ${peerId}: ${state}`);
630
+ if (state === 'connected') {
631
+ this.log('info', `[bot-huddle] WebRTC connected to ${peerId} (bot-initiated)`);
632
+ } else if (state === 'disconnected' || state === 'failed' || state === 'closed') {
633
+ this.cleanupPeer(peerId);
634
+ }
635
+ });
636
+
637
+ // Explicitly trigger offer generation.
638
+ // Unlike the answer path (where setRemoteDescription auto-generates the answer),
639
+ // the offerer must call setLocalDescription to produce the offer SDP.
640
+ pc.setLocalDescription();
641
+
642
+ } catch (err) {
643
+ this.log('error', `[bot-huddle] failed to initiate connection to ${peerId}: ${String(err)}`);
644
+ this.cleanupPeer(peerId);
645
+ }
646
+ }
647
+ // ── Voice Pipeline: STT → LLM → TTS (sentence-streamed) ────────
648
+
649
+ private async handleSpeechEnd(pcm: Buffer): Promise<void> {
650
+ if (this.isProcessing) {
651
+ this.log('info', '[bot-huddle] already processing speech, skipping');
652
+ return;
653
+ }
654
+
655
+ this.isProcessing = true;
656
+ this.abortSending = false;
657
+ const speakerPeerId = this.currentSpeakerPeerId ?? 'unknown';
658
+ const channelId = this.activeChannelId ?? '';
659
+ const pipelineStart = Date.now();
660
+
661
+ try {
662
+ // 1. STT
663
+ this.emitStatus('transcribing');
664
+ const sttStart = Date.now();
665
+ const text = await this.stt.transcribe(pcm, 48000);
666
+ const sttMs = Date.now() - sttStart;
667
+ if (!text || text.length < 2) {
668
+ this.log('info', '[bot-huddle] STT returned empty/noise, skipping');
669
+ return;
670
+ }
671
+ this.log('info', `[bot-huddle] heard from ${speakerPeerId.slice(0, 8)}: "${text}" (STT: ${sttMs}ms)`);
672
+
673
+ // 2. LLM
674
+ this.emitStatus('thinking');
675
+ const llmStart = Date.now();
676
+ let response: string | undefined;
677
+ try {
678
+ response = await this.callbacks.onTranscription?.(text, speakerPeerId, channelId);
679
+ } catch (llmErr) {
680
+ this.log('error', `[bot-huddle] LLM call failed: ${String(llmErr)}`);
681
+ }
682
+ if (!response) {
683
+ response = `I heard you say: ${text}`;
684
+ this.log('info', `[bot-huddle] LLM unavailable, using echo response`);
685
+ }
686
+ const llmMs = Date.now() - llmStart;
687
+
688
+ // Check speakability
689
+ const speakableText = response.replace(/[\p{Emoji_Presentation}\p{Extended_Pictographic}]/gu, '').trim();
690
+ if (speakableText.length < 3) {
691
+ this.log('info', `[bot-huddle] response too short for TTS ("${response}"), skipping`);
692
+ return;
693
+ }
694
+
695
+ this.log('info', `[bot-huddle] responding (LLM: ${llmMs}ms): "${response.slice(0, 100)}${response.length > 100 ? '...' : ''}"`);
696
+
697
+ if (!this.tts) {
698
+ this.log('warn', '[bot-huddle] TTS not available');
699
+ return;
700
+ }
701
+
702
+ // 3. Split into sentences and stream TTS+send for each
703
+ const sentences = this.splitIntoSentences(speakableText);
704
+ this.log('info', `[bot-huddle] streaming ${sentences.length} sentence(s)`);
705
+ this.emitStatus('speaking');
706
+
707
+ const ttsStart = Date.now();
708
+ let totalFrames = 0;
709
+
710
+ for (const sentence of sentences) {
711
+ if (sentence.length < 2) continue;
712
+ if (this.abortSending) {
713
+ this.log('info', `[bot-huddle] barge-in: stopped after ${totalFrames} frames`);
714
+ break;
715
+ }
716
+ const frames = await this.tts.speakRaw(sentence);
717
+ totalFrames += frames.length;
718
+ if (this.abortSending) {
719
+ this.log('info', `[bot-huddle] barge-in: skipping send after TTS`);
720
+ break;
721
+ }
722
+ await this.sendFramesToAllPeers(frames, totalFrames === frames.length);
723
+ }
724
+
725
+ const ttsMs = Date.now() - ttsStart;
726
+ const totalMs = Date.now() - pipelineStart;
727
+ this.log('info', `[bot-huddle] pipeline done: STT=${sttMs}ms LLM=${llmMs}ms TTS+send=${ttsMs}ms total=${totalMs}ms (${totalFrames} frames, ${(totalFrames * 0.02).toFixed(1)}s audio)`);
728
+
729
+ } catch (err) {
730
+ this.log('error', `[bot-huddle] voice pipeline error: ${String(err)}`);
731
+ } finally {
732
+ this.isProcessing = false;
733
+ this.abortSending = false;
734
+ this.emitStatus('listening');
735
+ }
736
+ }
737
+
738
+ /**
739
+ * Split text into sentences for progressive TTS.
740
+ * Keeps sentences together if very short (<30 chars) to reduce API calls.
741
+ */
742
+ private splitIntoSentences(text: string): string[] {
743
+ // Split on sentence-ending punctuation
744
+ const raw = text.match(/[^.!?]+[.!?]+|[^.!?]+$/g) ?? [text];
745
+ const sentences: string[] = [];
746
+ let buffer = '';
747
+
748
+ for (const s of raw) {
749
+ buffer += s;
750
+ // Flush when buffer is long enough (>60 chars) or it's the last segment
751
+ if (buffer.length >= 60 || s === raw[raw.length - 1]) {
752
+ sentences.push(buffer.trim());
753
+ buffer = '';
754
+ }
755
+ }
756
+ if (buffer.trim()) sentences.push(buffer.trim());
757
+ return sentences;
758
+ }
759
+
760
+ /**
761
+ * Send Opus frames to all connected peers with RTP pacing.
762
+ */
763
+ private async sendFramesToAllPeers(frames: Buffer[], isFirstBatch: boolean): Promise<void> {
764
+ const SAMPLES_PER_FRAME = 960;
765
+
766
+ for (const [peerId, track] of this.sendTracks) {
767
+ if (!track.isOpen()) {
768
+ this.log('warn', `[bot-huddle] track NOT open for ${peerId.slice(0, 8)}, skip`);
769
+ continue;
770
+ }
771
+
772
+ let sentOk = 0, sentFail = 0;
773
+ const peerState = this.peerConnections.get(peerId);
774
+ const rtpConfig = peerState?.rtpConfig;
775
+
776
+ for (const frame of frames) {
777
+ if (this.abortSending) {
778
+ this.log('info', `[bot-huddle] barge-in: stopped mid-send at frame ${sentOk}/${frames.length}`);
779
+ break;
780
+ }
781
+ if (rtpConfig) {
782
+ rtpConfig.sequenceNumber = (rtpConfig.sequenceNumber + 1) & 0xFFFF;
783
+ rtpConfig.timestamp = (rtpConfig.timestamp + SAMPLES_PER_FRAME) >>> 0;
784
+ }
785
+ const rtpHeader = Buffer.alloc(12);
786
+ const isFirst = isFirstBatch && (sentOk + sentFail === 0);
787
+ rtpHeader[0] = 0x80;
788
+ rtpHeader[1] = (isFirst ? 0x80 : 0x00) | (rtpConfig?.payloadType ?? 111);
789
+ rtpHeader.writeUInt16BE(rtpConfig?.sequenceNumber ?? 0, 2);
790
+ rtpHeader.writeUInt32BE(rtpConfig?.timestamp ?? 0, 4);
791
+ rtpHeader.writeUInt32BE(rtpConfig?.ssrc ?? 1234, 8);
792
+
793
+ const ok = track.sendMessageBinary(Buffer.concat([rtpHeader, frame]));
794
+ if (ok) sentOk++; else sentFail++;
795
+
796
+ if (sentOk + sentFail <= 2 && isFirstBatch) {
797
+ this.log('info', `[bot-huddle] send #${sentOk+sentFail}: seq=${rtpConfig?.sequenceNumber}, ts=${rtpConfig?.timestamp}`);
798
+ }
799
+ await new Promise(r => setTimeout(r, 18));
800
+ }
801
+ this.log('info', `[bot-huddle] sent ${sentOk}/${frames.length} frames to ${peerId.slice(0, 8)}`);
802
+ }
803
+ }
804
+
805
+ // ── Cleanup ────────────────────────────────────────────────────────
806
+
807
+ private cleanupPeer(peerId: string): void {
808
+ const peerState = this.peerConnections.get(peerId);
809
+ if (peerState) {
810
+ try {
811
+ peerState.track?.close();
812
+ } catch { /* already closed */ }
813
+ try {
814
+ peerState.pc.close();
815
+ } catch { /* already closed */ }
816
+ this.peerConnections.delete(peerId);
817
+ this.audioTracks.delete(peerId);
818
+ this.sendTracks.delete(peerId);
819
+ this.log('info', `[bot-huddle] cleaned up PC for ${peerId}`);
820
+ }
821
+ }
822
+
823
+ destroy(): void {
824
+ this.leave();
825
+ this.audioPipeline.destroy();
826
+ this.tts?.destroy();
827
+ }
828
+
829
+ // ── Helpers ────────────────────────────────────────────────────────
830
+
831
+ private resolveVoiceId(voiceName?: string): string | undefined {
832
+ if (!voiceName) return undefined;
833
+ const voiceMap: Record<string, string> = {
834
+ 'rachel': 'EXAVITQu4vr4xnSDxMaL',
835
+ 'domi': 'AZnzlk1XvdvUeBnXmlld',
836
+ 'bella': 'EXAVITQu4vr4xnSDxMaL',
837
+ 'antoni': 'ErXwobaYiN019PkySvjV',
838
+ 'elli': 'MF3mGyEYCl7XYWbV9V6O',
839
+ 'josh': 'TxGEqnHWrfWFTfGW9XjX',
840
+ 'arnold': 'VR6AewLTigWG4xSOukaG',
841
+ 'adam': 'pNInz6obpgDQGcFmaJgB',
842
+ 'sam': 'yoZ06aMxZJJ28mfd3POQ',
843
+ };
844
+ return voiceMap[voiceName.toLowerCase()] ?? voiceName;
845
+ }
846
+
847
+ /**
848
+ * Extract the Opus payload type from an SDP offer.
849
+ * Chrome typically uses 111, Firefox/Zen uses 109.
850
+ * Falls back to 111 if not found.
851
+ */
852
+ private extractOpusPayloadType(sdp: string): number {
853
+ if (!sdp) return 111;
854
+ // Match: a=rtpmap:<PT> opus/48000/2
855
+ const match = sdp.match(/a=rtpmap:(\d+)\s+opus\/48000/i);
856
+ if (match) return parseInt(match[1], 10);
857
+ return 111; // Default fallback
858
+ }
859
+
860
+ /**
861
+ * Broadcast a status indicator to all peers in the huddle.
862
+ * States: listening, hearing, transcribing, thinking, speaking, interrupted
863
+ */
864
+ private emitStatus(status: 'listening' | 'hearing' | 'transcribing' | 'thinking' | 'speaking' | 'interrupted'): void {
865
+ this.callbacks.broadcastSignal({
866
+ type: 'huddle-status',
867
+ channelId: this.activeChannelId,
868
+ peerId: this.myPeerId,
869
+ status,
870
+ });
871
+ }
872
+
873
+ private log(level: 'info' | 'warn' | 'error', msg: string): void {
874
+ if (level === 'info') {
875
+ this.callbacks.log?.info(msg);
876
+ } else if (level === 'warn') {
877
+ (this.callbacks.log?.warn ?? this.callbacks.log?.info)?.(msg);
878
+ } else {
879
+ (this.callbacks.log?.error ?? this.callbacks.log?.info)?.(msg);
880
+ }
881
+ }
882
+ }