@gakr-gakr/google-meet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,780 @@
1
+ import type { AutoBotConfig } from "autobot/plugin-sdk/config-contracts";
2
+ import { formatErrorMessage } from "autobot/plugin-sdk/error-runtime";
3
+ import type { PluginRuntime, RuntimeLogger } from "autobot/plugin-sdk/plugin-runtime";
4
+ import type {
5
+ RealtimeTranscriptionProviderPlugin,
6
+ RealtimeTranscriptionSession,
7
+ } from "autobot/plugin-sdk/realtime-transcription";
8
+ import {
9
+ createRealtimeVoiceAgentTalkbackQueue,
10
+ createTalkSessionController,
11
+ createRealtimeVoiceBridgeSession,
12
+ recordTalkObservabilityEvent,
13
+ type RealtimeVoiceAgentTalkbackQueue,
14
+ type RealtimeVoiceBridgeSession,
15
+ type RealtimeVoiceProviderPlugin,
16
+ type TalkEvent,
17
+ type TalkEventInput,
18
+ type TalkSessionController,
19
+ } from "autobot/plugin-sdk/realtime-voice";
20
+ import {
21
+ consultAutoBotAgentForGoogleMeet,
22
+ handleGoogleMeetRealtimeConsultToolCall,
23
+ resolveGoogleMeetRealtimeTools,
24
+ } from "./agent-consult.js";
25
+ import type { GoogleMeetConfig } from "./config.js";
26
+ import {
27
+ getGoogleMeetRealtimeTranscriptHealth,
28
+ buildGoogleMeetSpeakExactUserMessage,
29
+ GOOGLE_MEET_AGENT_TRANSCRIPT_DEBOUNCE_MS,
30
+ extendGoogleMeetOutputEchoSuppression,
31
+ getGoogleMeetRealtimeEventHealth,
32
+ recordGoogleMeetRealtimeTranscript,
33
+ recordGoogleMeetRealtimeEvent,
34
+ resolveGoogleMeetRealtimeAudioFormat,
35
+ resolveGoogleMeetRealtimeProvider,
36
+ resolveGoogleMeetRealtimeTranscriptionProvider,
37
+ isGoogleMeetLikelyAssistantEchoTranscript,
38
+ pushGoogleMeetTalkEvent,
39
+ summarizeGoogleMeetTalkEvents,
40
+ convertGoogleMeetBridgeAudioForStt,
41
+ convertGoogleMeetTtsAudioForBridge,
42
+ formatGoogleMeetAgentAudioModelLog,
43
+ formatGoogleMeetAgentTtsResultLog,
44
+ formatGoogleMeetTranscriptSummaryLog,
45
+ formatGoogleMeetRealtimeVoiceModelLog,
46
+ type GoogleMeetRealtimeEventEntry,
47
+ type GoogleMeetRealtimeTranscriptEntry,
48
+ } from "./realtime.js";
49
+ import type { GoogleMeetChromeHealth } from "./transports/types.js";
50
+
51
+ export type ChromeNodeRealtimeAudioBridgeHandle = {
52
+ type: "node-command-pair";
53
+ providerId: string;
54
+ nodeId: string;
55
+ bridgeId: string;
56
+ speak: (instructions?: string) => void;
57
+ getHealth: () => GoogleMeetChromeHealth;
58
+ stop: () => Promise<void>;
59
+ };
60
+
61
+ function asRecord(value: unknown): Record<string, unknown> {
62
+ return value && typeof value === "object" && !Array.isArray(value)
63
+ ? (value as Record<string, unknown>)
64
+ : {};
65
+ }
66
+
67
+ function readString(value: unknown): string | undefined {
68
+ return typeof value === "string" && value.trim() ? value : undefined;
69
+ }
70
+
71
+ function normalizeGoogleMeetTtsPromptText(text: string | undefined): string | undefined {
72
+ const trimmed = text?.trim();
73
+ if (!trimmed) {
74
+ return undefined;
75
+ }
76
+ const sayExactly = trimmed.match(/^say exactly:\s*(?<text>.+)$/is)?.groups?.text?.trim();
77
+ if (sayExactly) {
78
+ return sayExactly.replace(/^["']|["']$/g, "").trim() || trimmed;
79
+ }
80
+ return trimmed;
81
+ }
82
+
83
+ export async function startNodeAgentAudioBridge(params: {
84
+ config: GoogleMeetConfig;
85
+ fullConfig: AutoBotConfig;
86
+ runtime: PluginRuntime;
87
+ meetingSessionId: string;
88
+ requesterSessionKey?: string;
89
+ nodeId: string;
90
+ bridgeId: string;
91
+ logger: RuntimeLogger;
92
+ providers?: RealtimeTranscriptionProviderPlugin[];
93
+ }): Promise<ChromeNodeRealtimeAudioBridgeHandle> {
94
+ let stopped = false;
95
+ let sttSession: RealtimeTranscriptionSession | null = null;
96
+ let realtimeReady = false;
97
+ let lastInputAt: string | undefined;
98
+ let lastOutputAt: string | undefined;
99
+ let lastInputBytes = 0;
100
+ let lastOutputBytes = 0;
101
+ let suppressedInputBytes = 0;
102
+ let lastSuppressedInputAt: string | undefined;
103
+ let suppressInputUntil = 0;
104
+ let lastOutputPlayableUntilMs = 0;
105
+ let consecutiveInputErrors = 0;
106
+ let lastInputError: string | undefined;
107
+ const resolved = resolveGoogleMeetRealtimeTranscriptionProvider({
108
+ config: params.config,
109
+ fullConfig: params.fullConfig,
110
+ providers: params.providers,
111
+ });
112
+ params.logger.info(
113
+ formatGoogleMeetAgentAudioModelLog({
114
+ provider: resolved.provider,
115
+ providerConfig: resolved.providerConfig,
116
+ audioFormat: params.config.chrome.audioFormat,
117
+ }),
118
+ );
119
+ const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
120
+ let agentTalkback: RealtimeVoiceAgentTalkbackQueue | undefined;
121
+ let ttsQueue = Promise.resolve();
122
+
123
+ const stop = async () => {
124
+ if (stopped) {
125
+ return;
126
+ }
127
+ stopped = true;
128
+ agentTalkback?.close();
129
+ try {
130
+ sttSession?.close();
131
+ } catch (error) {
132
+ params.logger.debug?.(
133
+ `[google-meet] node agent transcription bridge close ignored: ${formatErrorMessage(error)}`,
134
+ );
135
+ }
136
+ try {
137
+ await params.runtime.nodes.invoke({
138
+ nodeId: params.nodeId,
139
+ command: "googlemeet.chrome",
140
+ params: { action: "stop", bridgeId: params.bridgeId },
141
+ timeoutMs: 5_000,
142
+ });
143
+ } catch (error) {
144
+ params.logger.debug?.(
145
+ `[google-meet] node audio bridge stop ignored: ${formatErrorMessage(error)}`,
146
+ );
147
+ }
148
+ };
149
+
150
+ const pushOutputAudio = async (audio: Buffer) => {
151
+ const suppression = extendGoogleMeetOutputEchoSuppression({
152
+ audio,
153
+ audioFormat: params.config.chrome.audioFormat,
154
+ nowMs: Date.now(),
155
+ lastOutputPlayableUntilMs,
156
+ suppressInputUntilMs: suppressInputUntil,
157
+ });
158
+ suppressInputUntil = suppression.suppressInputUntilMs;
159
+ lastOutputPlayableUntilMs = suppression.lastOutputPlayableUntilMs;
160
+ lastOutputAt = new Date().toISOString();
161
+ lastOutputBytes += audio.byteLength;
162
+ await params.runtime.nodes.invoke({
163
+ nodeId: params.nodeId,
164
+ command: "googlemeet.chrome",
165
+ params: {
166
+ action: "pushAudio",
167
+ bridgeId: params.bridgeId,
168
+ base64: Buffer.from(audio).toString("base64"),
169
+ },
170
+ timeoutMs: 5_000,
171
+ });
172
+ };
173
+
174
+ const enqueueSpeakText = (text: string | undefined) => {
175
+ const normalized = normalizeGoogleMeetTtsPromptText(text);
176
+ if (!normalized || stopped) {
177
+ return;
178
+ }
179
+ ttsQueue = ttsQueue
180
+ .then(async () => {
181
+ if (stopped) {
182
+ return;
183
+ }
184
+ recordGoogleMeetRealtimeTranscript(transcript, "assistant", normalized);
185
+ params.logger.info(
186
+ formatGoogleMeetTranscriptSummaryLog("node agent assistant", normalized),
187
+ );
188
+ const result = await params.runtime.tts.textToSpeechTelephony({
189
+ text: normalized,
190
+ cfg: params.fullConfig,
191
+ });
192
+ if (!result.success || !result.audioBuffer || !result.sampleRate) {
193
+ throw new Error(result.error ?? "TTS conversion failed");
194
+ }
195
+ params.logger.info(formatGoogleMeetAgentTtsResultLog("node agent", result));
196
+ await pushOutputAudio(
197
+ convertGoogleMeetTtsAudioForBridge(
198
+ result.audioBuffer,
199
+ result.sampleRate,
200
+ params.config,
201
+ result.outputFormat,
202
+ ),
203
+ );
204
+ })
205
+ .catch((error) => {
206
+ params.logger.warn(`[google-meet] node agent TTS failed: ${formatErrorMessage(error)}`);
207
+ });
208
+ };
209
+
210
+ agentTalkback = createRealtimeVoiceAgentTalkbackQueue({
211
+ debounceMs: GOOGLE_MEET_AGENT_TRANSCRIPT_DEBOUNCE_MS,
212
+ isStopped: () => stopped,
213
+ logger: params.logger,
214
+ logPrefix: "[google-meet] node agent",
215
+ responseStyle: "Brief, natural spoken answer for a live meeting.",
216
+ fallbackText: "I hit an error while checking that. Please try again.",
217
+ consult: ({ question, responseStyle }) =>
218
+ consultAutoBotAgentForGoogleMeet({
219
+ config: params.config,
220
+ fullConfig: params.fullConfig,
221
+ runtime: params.runtime,
222
+ logger: params.logger,
223
+ meetingSessionId: params.meetingSessionId,
224
+ requesterSessionKey: params.requesterSessionKey,
225
+ args: { question, responseStyle },
226
+ transcript,
227
+ }),
228
+ deliver: enqueueSpeakText,
229
+ });
230
+
231
+ sttSession = resolved.provider.createSession({
232
+ cfg: params.fullConfig,
233
+ providerConfig: resolved.providerConfig,
234
+ onTranscript: (text) => {
235
+ const trimmed = text.trim();
236
+ if (!trimmed || stopped) {
237
+ return;
238
+ }
239
+ recordGoogleMeetRealtimeTranscript(transcript, "user", trimmed);
240
+ params.logger.info(formatGoogleMeetTranscriptSummaryLog("node agent user", trimmed));
241
+ if (isGoogleMeetLikelyAssistantEchoTranscript({ transcript, text: trimmed })) {
242
+ params.logger.info(
243
+ formatGoogleMeetTranscriptSummaryLog(
244
+ "node agent ignored assistant echo transcript",
245
+ trimmed,
246
+ ),
247
+ );
248
+ return;
249
+ }
250
+ agentTalkback?.enqueue(trimmed);
251
+ },
252
+ onError: (error) => {
253
+ params.logger.warn(
254
+ `[google-meet] node agent transcription bridge failed: ${formatErrorMessage(error)}`,
255
+ );
256
+ void stop();
257
+ },
258
+ });
259
+ await sttSession.connect();
260
+ realtimeReady = true;
261
+
262
+ void (async () => {
263
+ for (;;) {
264
+ if (stopped) {
265
+ break;
266
+ }
267
+ try {
268
+ const raw = await params.runtime.nodes.invoke({
269
+ nodeId: params.nodeId,
270
+ command: "googlemeet.chrome",
271
+ params: { action: "pullAudio", bridgeId: params.bridgeId, timeoutMs: 250 },
272
+ timeoutMs: 2_000,
273
+ });
274
+ const result = asRecord(asRecord(raw).payload ?? raw);
275
+ consecutiveInputErrors = 0;
276
+ lastInputError = undefined;
277
+ const base64 = readString(result.base64);
278
+ if (base64) {
279
+ const audio = Buffer.from(base64, "base64");
280
+ if (Date.now() < suppressInputUntil) {
281
+ lastSuppressedInputAt = new Date().toISOString();
282
+ suppressedInputBytes += audio.byteLength;
283
+ continue;
284
+ }
285
+ lastInputAt = new Date().toISOString();
286
+ lastInputBytes += audio.byteLength;
287
+ sttSession?.sendAudio(convertGoogleMeetBridgeAudioForStt(audio, params.config));
288
+ }
289
+ if (result.closed === true) {
290
+ await stop();
291
+ }
292
+ } catch (error) {
293
+ if (!stopped) {
294
+ const message = formatErrorMessage(error);
295
+ consecutiveInputErrors += 1;
296
+ lastInputError = message;
297
+ params.logger.warn(
298
+ `[google-meet] node agent audio input failed (${consecutiveInputErrors}/5): ${message}`,
299
+ );
300
+ if (consecutiveInputErrors >= 5 || /unknown bridgeId|bridge is not open/i.test(message)) {
301
+ await stop();
302
+ } else {
303
+ await new Promise((resolve) => setTimeout(resolve, 250));
304
+ }
305
+ }
306
+ }
307
+ }
308
+ })();
309
+
310
+ return {
311
+ type: "node-command-pair",
312
+ providerId: resolved.provider.id,
313
+ nodeId: params.nodeId,
314
+ bridgeId: params.bridgeId,
315
+ speak: enqueueSpeakText,
316
+ getHealth: () => ({
317
+ providerConnected: sttSession?.isConnected() ?? false,
318
+ realtimeReady,
319
+ audioInputActive: lastInputBytes > 0,
320
+ audioOutputActive: lastOutputBytes > 0,
321
+ lastInputAt,
322
+ lastOutputAt,
323
+ lastSuppressedInputAt,
324
+ lastInputBytes,
325
+ lastOutputBytes,
326
+ suppressedInputBytes,
327
+ ...getGoogleMeetRealtimeTranscriptHealth(transcript),
328
+ consecutiveInputErrors,
329
+ lastInputError,
330
+ bridgeClosed: stopped,
331
+ }),
332
+ stop,
333
+ };
334
+ }
335
+
336
+ export async function startNodeRealtimeAudioBridge(params: {
337
+ config: GoogleMeetConfig;
338
+ fullConfig: AutoBotConfig;
339
+ runtime: PluginRuntime;
340
+ meetingSessionId: string;
341
+ requesterSessionKey?: string;
342
+ nodeId: string;
343
+ bridgeId: string;
344
+ logger: RuntimeLogger;
345
+ providers?: RealtimeVoiceProviderPlugin[];
346
+ }): Promise<ChromeNodeRealtimeAudioBridgeHandle> {
347
+ let stopped = false;
348
+ let bridge: RealtimeVoiceBridgeSession | null = null;
349
+ let realtimeReady = false;
350
+ let lastInputAt: string | undefined;
351
+ let lastOutputAt: string | undefined;
352
+ let lastClearAt: string | undefined;
353
+ let lastInputBytes = 0;
354
+ let lastOutputBytes = 0;
355
+ let suppressedInputBytes = 0;
356
+ let lastSuppressedInputAt: string | undefined;
357
+ let suppressInputUntil = 0;
358
+ let lastOutputPlayableUntilMs = 0;
359
+ let consecutiveInputErrors = 0;
360
+ let lastInputError: string | undefined;
361
+ let clearCount = 0;
362
+ const resolved = resolveGoogleMeetRealtimeProvider({
363
+ config: params.config,
364
+ fullConfig: params.fullConfig,
365
+ providers: params.providers,
366
+ });
367
+ const transcript: GoogleMeetRealtimeTranscriptEntry[] = [];
368
+ const realtimeEvents: GoogleMeetRealtimeEventEntry[] = [];
369
+ const strategy = params.config.realtime.strategy;
370
+ const talk: TalkSessionController = createTalkSessionController(
371
+ {
372
+ sessionId: `google-meet:${params.meetingSessionId}:${params.bridgeId}:node-realtime`,
373
+ mode: "realtime",
374
+ transport: "gateway-relay",
375
+ brain: strategy === "bidi" ? "direct-tools" : "agent-consult",
376
+ provider: resolved.provider.id,
377
+ },
378
+ { onEvent: recordTalkObservabilityEvent },
379
+ );
380
+ const recentTalkEvents: TalkEvent[] = [];
381
+ const rememberTalkEvent = (event: TalkEvent | undefined): void => {
382
+ if (event) {
383
+ pushGoogleMeetTalkEvent(recentTalkEvents, event);
384
+ }
385
+ };
386
+ const emitTalkEvent = (input: TalkEventInput): void => {
387
+ rememberTalkEvent(talk.emit(input));
388
+ };
389
+ const ensureTalkTurn = (): string => {
390
+ const turn = talk.ensureTurn({
391
+ payload: { bridgeId: params.bridgeId, meetingSessionId: params.meetingSessionId },
392
+ });
393
+ if (turn.event) {
394
+ rememberTalkEvent(turn.event);
395
+ }
396
+ return turn.turnId;
397
+ };
398
+ const finishOutputAudio = (reason: string): void => {
399
+ rememberTalkEvent(
400
+ talk.finishOutputAudio({
401
+ payload: { bridgeId: params.bridgeId, reason },
402
+ }),
403
+ );
404
+ };
405
+ const endTalkTurn = (reason = "completed"): void => {
406
+ const ended = talk.endTurn({
407
+ payload: { bridgeId: params.bridgeId, reason },
408
+ });
409
+ if (ended.ok) {
410
+ rememberTalkEvent(ended.event);
411
+ }
412
+ };
413
+ emitTalkEvent({
414
+ type: "session.started",
415
+ payload: {
416
+ bridgeId: params.bridgeId,
417
+ meetingSessionId: params.meetingSessionId,
418
+ nodeId: params.nodeId,
419
+ },
420
+ });
421
+ params.logger.info(
422
+ formatGoogleMeetRealtimeVoiceModelLog({
423
+ strategy,
424
+ provider: resolved.provider,
425
+ providerConfig: resolved.providerConfig,
426
+ fallbackModel: params.config.realtime.model,
427
+ audioFormat: params.config.chrome.audioFormat,
428
+ }),
429
+ );
430
+ let agentTalkback: RealtimeVoiceAgentTalkbackQueue | undefined;
431
+ agentTalkback = createRealtimeVoiceAgentTalkbackQueue({
432
+ debounceMs: GOOGLE_MEET_AGENT_TRANSCRIPT_DEBOUNCE_MS,
433
+ isStopped: () => stopped,
434
+ logger: params.logger,
435
+ logPrefix: "[google-meet] node realtime agent",
436
+ responseStyle: "Brief, natural spoken answer for a live meeting.",
437
+ fallbackText: "I hit an error while checking that. Please try again.",
438
+ consult: ({ question, responseStyle }) =>
439
+ consultAutoBotAgentForGoogleMeet({
440
+ config: params.config,
441
+ fullConfig: params.fullConfig,
442
+ runtime: params.runtime,
443
+ logger: params.logger,
444
+ meetingSessionId: params.meetingSessionId,
445
+ requesterSessionKey: params.requesterSessionKey,
446
+ args: { question, responseStyle },
447
+ transcript,
448
+ }),
449
+ deliver: (text) => {
450
+ bridge?.sendUserMessage(buildGoogleMeetSpeakExactUserMessage(text));
451
+ },
452
+ });
453
+
454
+ const stop = async () => {
455
+ if (stopped) {
456
+ return;
457
+ }
458
+ stopped = true;
459
+ agentTalkback?.close();
460
+ try {
461
+ bridge?.close();
462
+ } catch (error) {
463
+ params.logger.debug?.(
464
+ `[google-meet] node realtime bridge close ignored: ${formatErrorMessage(error)}`,
465
+ );
466
+ }
467
+ try {
468
+ await params.runtime.nodes.invoke({
469
+ nodeId: params.nodeId,
470
+ command: "googlemeet.chrome",
471
+ params: { action: "stop", bridgeId: params.bridgeId },
472
+ timeoutMs: 5_000,
473
+ });
474
+ } catch (error) {
475
+ params.logger.debug?.(
476
+ `[google-meet] node audio bridge stop ignored: ${formatErrorMessage(error)}`,
477
+ );
478
+ }
479
+ };
480
+
481
+ bridge = createRealtimeVoiceBridgeSession({
482
+ provider: resolved.provider,
483
+ cfg: params.fullConfig,
484
+ providerConfig: resolved.providerConfig,
485
+ audioFormat: resolveGoogleMeetRealtimeAudioFormat(params.config),
486
+ instructions: params.config.realtime.instructions,
487
+ initialGreetingInstructions: params.config.realtime.introMessage,
488
+ autoRespondToAudio: strategy === "bidi",
489
+ triggerGreetingOnReady: false,
490
+ markStrategy: "ack-immediately",
491
+ tools:
492
+ strategy === "bidi" ? resolveGoogleMeetRealtimeTools(params.config.realtime.toolPolicy) : [],
493
+ audioSink: {
494
+ isOpen: () => !stopped,
495
+ sendAudio: (audio) => {
496
+ const turnId = ensureTalkTurn();
497
+ rememberTalkEvent(
498
+ talk.startOutputAudio({
499
+ turnId,
500
+ payload: { bridgeId: params.bridgeId },
501
+ }).event,
502
+ );
503
+ emitTalkEvent({
504
+ type: "output.audio.delta",
505
+ turnId,
506
+ payload: { byteLength: audio.byteLength },
507
+ });
508
+ const suppression = extendGoogleMeetOutputEchoSuppression({
509
+ audio,
510
+ audioFormat: params.config.chrome.audioFormat,
511
+ nowMs: Date.now(),
512
+ lastOutputPlayableUntilMs,
513
+ suppressInputUntilMs: suppressInputUntil,
514
+ });
515
+ suppressInputUntil = suppression.suppressInputUntilMs;
516
+ lastOutputPlayableUntilMs = suppression.lastOutputPlayableUntilMs;
517
+ lastOutputAt = new Date().toISOString();
518
+ lastOutputBytes += audio.byteLength;
519
+ void params.runtime.nodes
520
+ .invoke({
521
+ nodeId: params.nodeId,
522
+ command: "googlemeet.chrome",
523
+ params: {
524
+ action: "pushAudio",
525
+ bridgeId: params.bridgeId,
526
+ base64: Buffer.from(audio).toString("base64"),
527
+ },
528
+ timeoutMs: 5_000,
529
+ })
530
+ .catch((error) => {
531
+ params.logger.warn(
532
+ `[google-meet] node audio output failed: ${formatErrorMessage(error)}`,
533
+ );
534
+ void stop();
535
+ });
536
+ },
537
+ clearAudio: () => {
538
+ lastClearAt = new Date().toISOString();
539
+ clearCount += 1;
540
+ finishOutputAudio("clear");
541
+ suppressInputUntil = 0;
542
+ lastOutputPlayableUntilMs = 0;
543
+ void params.runtime.nodes
544
+ .invoke({
545
+ nodeId: params.nodeId,
546
+ command: "googlemeet.chrome",
547
+ params: {
548
+ action: "clearAudio",
549
+ bridgeId: params.bridgeId,
550
+ },
551
+ timeoutMs: 5_000,
552
+ })
553
+ .catch((error) => {
554
+ params.logger.warn(
555
+ `[google-meet] node audio clear failed: ${formatErrorMessage(error)}`,
556
+ );
557
+ void stop();
558
+ });
559
+ },
560
+ },
561
+ onTranscript: (role, text, isFinal) => {
562
+ const turnId = ensureTalkTurn();
563
+ const eventType =
564
+ role === "assistant"
565
+ ? isFinal
566
+ ? "output.text.done"
567
+ : "output.text.delta"
568
+ : isFinal
569
+ ? "transcript.done"
570
+ : "transcript.delta";
571
+ const payload = role === "assistant" ? { text } : { role, text };
572
+ emitTalkEvent({
573
+ type: eventType,
574
+ turnId,
575
+ payload,
576
+ final: isFinal,
577
+ });
578
+ if (role === "user" && isFinal) {
579
+ emitTalkEvent({
580
+ type: "input.audio.committed",
581
+ turnId,
582
+ payload: { bridgeId: params.bridgeId },
583
+ final: true,
584
+ });
585
+ }
586
+ if (isFinal) {
587
+ recordGoogleMeetRealtimeTranscript(transcript, role, text);
588
+ params.logger.info(formatGoogleMeetTranscriptSummaryLog(`node realtime ${role}`, text));
589
+ if (role === "user" && strategy === "agent") {
590
+ if (isGoogleMeetLikelyAssistantEchoTranscript({ transcript, text })) {
591
+ params.logger.info(
592
+ formatGoogleMeetTranscriptSummaryLog(
593
+ "node realtime ignored assistant echo transcript",
594
+ text,
595
+ ),
596
+ );
597
+ return;
598
+ }
599
+ agentTalkback?.enqueue(text);
600
+ }
601
+ }
602
+ },
603
+ onEvent: (event) => {
604
+ recordGoogleMeetRealtimeEvent(realtimeEvents, event);
605
+ if (event.type === "input_audio_buffer.speech_started") {
606
+ ensureTalkTurn();
607
+ } else if (event.type === "input_audio_buffer.speech_stopped") {
608
+ const turnId = talk.activeTurnId;
609
+ if (!turnId) {
610
+ return;
611
+ }
612
+ emitTalkEvent({
613
+ type: "input.audio.committed",
614
+ turnId,
615
+ payload: { bridgeId: params.bridgeId, source: event.type },
616
+ final: true,
617
+ });
618
+ } else if (event.type === "response.done") {
619
+ finishOutputAudio("response.done");
620
+ endTalkTurn("response.done");
621
+ } else if (event.type === "error") {
622
+ emitTalkEvent({
623
+ type: "session.error",
624
+ payload: { message: event.detail ?? "Realtime provider error" },
625
+ final: true,
626
+ });
627
+ }
628
+ if (
629
+ event.type === "error" ||
630
+ event.type === "response.done" ||
631
+ event.type === "input_audio_buffer.speech_started" ||
632
+ event.type === "input_audio_buffer.speech_stopped" ||
633
+ event.type === "conversation.item.input_audio_transcription.completed" ||
634
+ event.type === "conversation.item.input_audio_transcription.failed"
635
+ ) {
636
+ const detail = event.detail ? ` ${event.detail}` : "";
637
+ params.logger.info(`[google-meet] node realtime ${event.direction}:${event.type}${detail}`);
638
+ }
639
+ },
640
+ onToolCall: (event, session) => {
641
+ emitTalkEvent({
642
+ type: "tool.call",
643
+ turnId: ensureTalkTurn(),
644
+ itemId: event.itemId,
645
+ callId: event.callId,
646
+ payload: { name: event.name, args: event.args },
647
+ });
648
+ const turnId = ensureTalkTurn();
649
+ handleGoogleMeetRealtimeConsultToolCall({
650
+ strategy,
651
+ session,
652
+ event,
653
+ config: params.config,
654
+ fullConfig: params.fullConfig,
655
+ runtime: params.runtime,
656
+ logger: params.logger,
657
+ meetingSessionId: params.meetingSessionId,
658
+ requesterSessionKey: params.requesterSessionKey,
659
+ transcript,
660
+ onTalkEvent: (input) => emitTalkEvent({ ...input, turnId: input.turnId ?? turnId }),
661
+ });
662
+ },
663
+ onError: (error) => {
664
+ params.logger.warn(
665
+ `[google-meet] node realtime voice bridge failed: ${formatErrorMessage(error)}`,
666
+ );
667
+ emitTalkEvent({
668
+ type: "session.error",
669
+ payload: { message: formatErrorMessage(error) },
670
+ final: true,
671
+ });
672
+ void stop();
673
+ },
674
+ onClose: (reason) => {
675
+ realtimeReady = false;
676
+ finishOutputAudio(reason);
677
+ emitTalkEvent({
678
+ type: "session.closed",
679
+ payload: { reason },
680
+ final: true,
681
+ });
682
+ if (reason === "error") {
683
+ void stop();
684
+ }
685
+ },
686
+ onReady: () => {
687
+ realtimeReady = true;
688
+ emitTalkEvent({
689
+ type: "session.ready",
690
+ payload: { bridgeId: params.bridgeId },
691
+ });
692
+ },
693
+ });
694
+
695
+ await bridge.connect();
696
+
697
+ void (async () => {
698
+ for (;;) {
699
+ if (stopped) {
700
+ break;
701
+ }
702
+ try {
703
+ const raw = await params.runtime.nodes.invoke({
704
+ nodeId: params.nodeId,
705
+ command: "googlemeet.chrome",
706
+ params: { action: "pullAudio", bridgeId: params.bridgeId, timeoutMs: 250 },
707
+ timeoutMs: 2_000,
708
+ });
709
+ const result = asRecord(asRecord(raw).payload ?? raw);
710
+ consecutiveInputErrors = 0;
711
+ lastInputError = undefined;
712
+ const base64 = readString(result.base64);
713
+ if (base64) {
714
+ const audio = Buffer.from(base64, "base64");
715
+ if (Date.now() < suppressInputUntil) {
716
+ lastSuppressedInputAt = new Date().toISOString();
717
+ suppressedInputBytes += audio.byteLength;
718
+ continue;
719
+ }
720
+ lastInputAt = new Date().toISOString();
721
+ lastInputBytes += audio.byteLength;
722
+ emitTalkEvent({
723
+ type: "input.audio.delta",
724
+ turnId: ensureTalkTurn(),
725
+ payload: { byteLength: audio.byteLength },
726
+ });
727
+ bridge?.sendAudio(audio);
728
+ }
729
+ if (result.closed === true) {
730
+ await stop();
731
+ }
732
+ } catch (error) {
733
+ if (!stopped) {
734
+ const message = formatErrorMessage(error);
735
+ consecutiveInputErrors += 1;
736
+ lastInputError = message;
737
+ params.logger.warn(
738
+ `[google-meet] node audio input failed (${consecutiveInputErrors}/5): ${message}`,
739
+ );
740
+ if (consecutiveInputErrors >= 5 || /unknown bridgeId|bridge is not open/i.test(message)) {
741
+ await stop();
742
+ } else {
743
+ await new Promise((resolve) => setTimeout(resolve, 250));
744
+ }
745
+ }
746
+ }
747
+ }
748
+ })();
749
+
750
+ return {
751
+ type: "node-command-pair",
752
+ providerId: resolved.provider.id,
753
+ nodeId: params.nodeId,
754
+ bridgeId: params.bridgeId,
755
+ speak: (instructions) => {
756
+ bridge?.triggerGreeting(instructions);
757
+ },
758
+ getHealth: () => ({
759
+ providerConnected: bridge?.bridge.isConnected() ?? false,
760
+ realtimeReady,
761
+ audioInputActive: lastInputBytes > 0,
762
+ audioOutputActive: lastOutputBytes > 0,
763
+ lastInputAt,
764
+ lastOutputAt,
765
+ lastSuppressedInputAt,
766
+ lastClearAt,
767
+ lastInputBytes,
768
+ lastOutputBytes,
769
+ suppressedInputBytes,
770
+ ...getGoogleMeetRealtimeTranscriptHealth(transcript),
771
+ ...getGoogleMeetRealtimeEventHealth(realtimeEvents),
772
+ recentTalkEvents: summarizeGoogleMeetTalkEvents(recentTalkEvents),
773
+ consecutiveInputErrors,
774
+ lastInputError,
775
+ clearCount,
776
+ bridgeClosed: stopped,
777
+ }),
778
+ stop,
779
+ };
780
+ }