@gakr-gakr/google-meet 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/runtime.ts ADDED
@@ -0,0 +1,1008 @@
1
+ import { randomUUID } from "node:crypto";
2
+ import type { AutoBotConfig } from "autobot/plugin-sdk/config-contracts";
3
+ import { formatErrorMessage } from "autobot/plugin-sdk/error-runtime";
4
+ import type { PluginRuntime, RuntimeLogger } from "autobot/plugin-sdk/plugin-runtime";
5
+ import { sleep } from "autobot/plugin-sdk/runtime-env";
6
+ import { normalizeOptionalString } from "autobot/plugin-sdk/string-coerce-runtime";
7
+ import type {
8
+ GoogleMeetConfig,
9
+ GoogleMeetMode,
10
+ GoogleMeetModeInput,
11
+ GoogleMeetTransport,
12
+ } from "./config.js";
13
+ import { addGoogleMeetSetupCheck, getGoogleMeetSetupStatus } from "./setup.js";
14
+ import { isSameMeetUrlForReuse, resolveChromeNodeInfo } from "./transports/chrome-browser-proxy.js";
15
+ import { createMeetWithBrowserProxyOnNode } from "./transports/chrome-create.js";
16
+ import {
17
+ assertBlackHole2chAvailable,
18
+ launchChromeMeet,
19
+ launchChromeMeetOnNode,
20
+ recoverCurrentMeetTab,
21
+ recoverCurrentMeetTabOnNode,
22
+ } from "./transports/chrome.js";
23
+ import {
24
+ buildMeetDtmfSequence,
25
+ normalizeDialInNumber,
26
+ prefixDtmfWait,
27
+ } from "./transports/twilio.js";
28
+ import type {
29
+ GoogleMeetChromeHealth,
30
+ GoogleMeetJoinRequest,
31
+ GoogleMeetJoinResult,
32
+ GoogleMeetSession,
33
+ } from "./transports/types.js";
34
+ import {
35
+ endMeetVoiceCallGatewayCall,
36
+ getMeetVoiceCallGatewayCall,
37
+ isVoiceCallMissingError,
38
+ joinMeetViaVoiceCallGateway,
39
+ speakMeetViaVoiceCallGateway,
40
+ } from "./voice-call-gateway.js";
41
+
42
+ type ChromeAudioBridgeResult = NonNullable<
43
+ | Awaited<ReturnType<typeof launchChromeMeet>>["audioBridge"]
44
+ | Awaited<ReturnType<typeof launchChromeMeetOnNode>>["audioBridge"]
45
+ >;
46
+
47
+ function nowIso(): string {
48
+ return new Date().toISOString();
49
+ }
50
+
51
+ function buildTwilioVoiceCallSessionKey(meetingSessionId: string): string {
52
+ return `voice:google-meet:${meetingSessionId}`;
53
+ }
54
+
55
+ export function normalizeMeetUrl(input: unknown): string {
56
+ const raw = normalizeOptionalString(input);
57
+ if (!raw) {
58
+ throw new Error("url required");
59
+ }
60
+ let url: URL;
61
+ try {
62
+ url = new URL(raw);
63
+ } catch {
64
+ throw new Error("url must be a valid Google Meet URL");
65
+ }
66
+ if (url.protocol !== "https:" || url.hostname.toLowerCase() !== "meet.google.com") {
67
+ throw new Error("url must be an explicit https://meet.google.com/... URL");
68
+ }
69
+ if (!/^\/[a-z]{3}-[a-z]{4}-[a-z]{3}(?:$|[/?#])/i.test(url.pathname)) {
70
+ throw new Error("url must include a Google Meet meeting code");
71
+ }
72
+ return url.toString();
73
+ }
74
+
75
+ function resolveTransport(input: GoogleMeetTransport | undefined, config: GoogleMeetConfig) {
76
+ return input ?? config.defaultTransport;
77
+ }
78
+
79
+ function resolveMode(input: GoogleMeetModeInput | undefined, config: GoogleMeetConfig) {
80
+ return input === "realtime" ? "agent" : (input ?? config.defaultMode);
81
+ }
82
+
83
+ function isGoogleMeetTalkBackMode(mode: GoogleMeetMode): boolean {
84
+ return mode === "agent" || mode === "bidi";
85
+ }
86
+
87
+ function hasRealtimeAudioOutputAdvanced(
88
+ health: GoogleMeetChromeHealth | undefined,
89
+ startOutputBytes: number,
90
+ ): boolean {
91
+ return (health?.lastOutputBytes ?? 0) > startOutputBytes;
92
+ }
93
+
94
+ type TranscriptCheckpoint = {
95
+ lines: number;
96
+ lastCaptionAt?: string;
97
+ lastCaptionText?: string;
98
+ };
99
+
100
+ function transcriptCheckpoint(health: GoogleMeetChromeHealth | undefined): TranscriptCheckpoint {
101
+ return {
102
+ lines: health?.transcriptLines ?? 0,
103
+ lastCaptionAt: health?.lastCaptionAt,
104
+ lastCaptionText: health?.lastCaptionText,
105
+ };
106
+ }
107
+
108
+ function hasTranscriptAdvanced(
109
+ health: GoogleMeetChromeHealth | undefined,
110
+ start: TranscriptCheckpoint,
111
+ ): boolean {
112
+ if ((health?.transcriptLines ?? 0) > start.lines) {
113
+ return true;
114
+ }
115
+ if (health?.lastCaptionAt && health.lastCaptionAt !== start.lastCaptionAt) {
116
+ return true;
117
+ }
118
+ return Boolean(health?.lastCaptionText && health.lastCaptionText !== start.lastCaptionText);
119
+ }
120
+
121
+ function resolveProbeTimeoutMs(input: number | undefined, fallback: number): number {
122
+ if (input === undefined) {
123
+ return Math.min(Math.max(fallback, 1), 120_000);
124
+ }
125
+ if (!Number.isFinite(input) || input <= 0) {
126
+ throw new Error("timeoutMs must be a positive number");
127
+ }
128
+ return Math.min(Math.trunc(input), 120_000);
129
+ }
130
+
131
+ function isManagedChromeBrowserSession(session: GoogleMeetSession): boolean {
132
+ return Boolean(
133
+ (session.transport === "chrome" || session.transport === "chrome-node") &&
134
+ session.chrome &&
135
+ session.chrome.launched,
136
+ );
137
+ }
138
+
139
+ function noteSession(session: GoogleMeetSession, note: string): void {
140
+ session.notes = [...session.notes.filter((item) => item !== note), note];
141
+ }
142
+
143
+ function evaluateSpeechReadiness(session: GoogleMeetSession): {
144
+ ready: boolean;
145
+ reason?: NonNullable<GoogleMeetChromeHealth["speechBlockedReason"]>;
146
+ message?: string;
147
+ } {
148
+ if (!isGoogleMeetTalkBackMode(session.mode) || !session.chrome) {
149
+ return { ready: true };
150
+ }
151
+ if (!isManagedChromeBrowserSession(session)) {
152
+ if (session.chrome.audioBridge) {
153
+ return { ready: true };
154
+ }
155
+ return {
156
+ ready: false,
157
+ reason: "audio-bridge-unavailable",
158
+ message: "Realtime speech requires an active Chrome audio bridge.",
159
+ };
160
+ }
161
+ const health = session.chrome.health;
162
+ if (health?.manualActionRequired) {
163
+ return {
164
+ ready: false,
165
+ reason: health.manualActionReason ?? "browser-unverified",
166
+ message:
167
+ health.manualActionMessage ??
168
+ "Resolve the Google Meet browser prompt before asking AutoBot to speak.",
169
+ };
170
+ }
171
+ if (health?.inCall === true) {
172
+ if (health.micMuted === true) {
173
+ return {
174
+ ready: false,
175
+ reason: "meet-microphone-muted",
176
+ message: "Turn on the AutoBot Google Meet microphone before asking AutoBot to speak.",
177
+ };
178
+ }
179
+ if (session.chrome.audioBridge) {
180
+ return { ready: true };
181
+ }
182
+ return {
183
+ ready: false,
184
+ reason: "audio-bridge-unavailable",
185
+ message: "Realtime speech requires an active Chrome audio bridge.",
186
+ };
187
+ }
188
+ if (health?.inCall === false) {
189
+ return {
190
+ ready: false,
191
+ reason: "not-in-call",
192
+ message: "Google Meet has not reported that the browser participant is in the call.",
193
+ };
194
+ }
195
+ return {
196
+ ready: false,
197
+ reason: "browser-unverified",
198
+ message: "Google Meet browser state has not been verified yet.",
199
+ };
200
+ }
201
+
202
+ function collectChromeAudioCommands(config: GoogleMeetConfig): string[] {
203
+ const commands = config.chrome.audioBridgeCommand
204
+ ? [config.chrome.audioBridgeCommand[0]]
205
+ : [
206
+ config.chrome.audioInputCommand?.[0],
207
+ config.chrome.audioOutputCommand?.[0],
208
+ config.chrome.bargeInInputCommand?.[0],
209
+ ];
210
+ return [...new Set(commands.filter((value): value is string => Boolean(value?.trim())))];
211
+ }
212
+
213
+ async function commandExists(runtime: PluginRuntime, command: string): Promise<boolean> {
214
+ const result = await runtime.system.runCommandWithTimeout(
215
+ ["/bin/sh", "-lc", 'command -v "$1" >/dev/null 2>&1', "sh", command],
216
+ { timeoutMs: 5_000 },
217
+ );
218
+ return result.code === 0;
219
+ }
220
+
221
+ export class GoogleMeetRuntime {
222
+ readonly #sessions = new Map<string, GoogleMeetSession>();
223
+ readonly #sessionStops = new Map<string, () => Promise<void>>();
224
+ readonly #sessionSpeakers = new Map<string, (instructions?: string) => void>();
225
+ readonly #sessionHealth = new Map<string, () => GoogleMeetChromeHealth>();
226
+
227
+ constructor(
228
+ private readonly params: {
229
+ config: GoogleMeetConfig;
230
+ fullConfig: AutoBotConfig;
231
+ runtime: PluginRuntime;
232
+ logger: RuntimeLogger;
233
+ },
234
+ ) {}
235
+
236
+ list(): GoogleMeetSession[] {
237
+ this.#refreshHealth();
238
+ return [...this.#sessions.values()].toSorted((a, b) => a.createdAt.localeCompare(b.createdAt));
239
+ }
240
+
241
+ async status(sessionId?: string): Promise<{
242
+ found: boolean;
243
+ session?: GoogleMeetSession;
244
+ sessions?: GoogleMeetSession[];
245
+ }> {
246
+ this.#refreshHealth(sessionId);
247
+ if (!sessionId) {
248
+ const sessions = [...this.#sessions.values()].toSorted((a, b) =>
249
+ a.createdAt.localeCompare(b.createdAt),
250
+ );
251
+ await Promise.all(sessions.map((session) => this.#refreshStatusHealthForSession(session)));
252
+ return { found: true, sessions };
253
+ }
254
+ const session = this.#sessions.get(sessionId);
255
+ if (session) {
256
+ await this.#refreshStatusHealthForSession(session);
257
+ }
258
+ return session ? { found: true, session } : { found: false };
259
+ }
260
+
261
+ async setupStatus(
262
+ options: {
263
+ transport?: GoogleMeetTransport;
264
+ mode?: GoogleMeetModeInput;
265
+ dialInNumber?: string;
266
+ } = {},
267
+ ) {
268
+ const transport = resolveTransport(options.transport, this.params.config);
269
+ const mode = resolveMode(options.mode, this.params.config);
270
+ const twilioDialInNumber =
271
+ transport === "twilio" ? normalizeDialInNumber(options.dialInNumber) : undefined;
272
+ const shouldCheckChromeNode =
273
+ transport === "chrome-node" ||
274
+ (!options.transport && Boolean(this.params.config.chromeNode.node));
275
+ let status = getGoogleMeetSetupStatus(this.params.config, {
276
+ fullConfig: this.params.fullConfig,
277
+ mode,
278
+ transport,
279
+ twilioDialInNumber,
280
+ });
281
+ if (shouldCheckChromeNode) {
282
+ try {
283
+ const node = await resolveChromeNodeInfo({
284
+ runtime: this.params.runtime,
285
+ requestedNode: this.params.config.chromeNode.node,
286
+ });
287
+ const label = node.displayName ?? node.remoteIp ?? node.nodeId ?? "connected node";
288
+ status = addGoogleMeetSetupCheck(status, {
289
+ id: "chrome-node-connected",
290
+ ok: true,
291
+ message: `Connected Google Meet node ready: ${label}`,
292
+ });
293
+ } catch (error) {
294
+ status = addGoogleMeetSetupCheck(status, {
295
+ id: "chrome-node-connected",
296
+ ok: false,
297
+ message: formatErrorMessage(error),
298
+ });
299
+ }
300
+ }
301
+ if (transport === "chrome" && isGoogleMeetTalkBackMode(mode)) {
302
+ try {
303
+ await assertBlackHole2chAvailable({
304
+ runtime: this.params.runtime,
305
+ timeoutMs: Math.min(this.params.config.chrome.joinTimeoutMs, 10_000),
306
+ });
307
+ status = addGoogleMeetSetupCheck(status, {
308
+ id: "chrome-local-audio-device",
309
+ ok: true,
310
+ message: "BlackHole 2ch audio device found",
311
+ });
312
+ } catch (error) {
313
+ status = addGoogleMeetSetupCheck(status, {
314
+ id: "chrome-local-audio-device",
315
+ ok: false,
316
+ message: formatErrorMessage(error),
317
+ });
318
+ }
319
+
320
+ const commands = collectChromeAudioCommands(this.params.config);
321
+ const missingCommands: string[] = [];
322
+ for (const command of commands) {
323
+ try {
324
+ if (!(await commandExists(this.params.runtime, command))) {
325
+ missingCommands.push(command);
326
+ }
327
+ } catch {
328
+ missingCommands.push(command);
329
+ }
330
+ }
331
+ status = addGoogleMeetSetupCheck(status, {
332
+ id: "chrome-local-audio-commands",
333
+ ok: commands.length > 0 && missingCommands.length === 0,
334
+ message:
335
+ commands.length === 0
336
+ ? "Chrome talk-back audio commands are not configured"
337
+ : missingCommands.length === 0
338
+ ? `Chrome audio command${commands.length === 1 ? "" : "s"} available: ${commands.join(", ")}`
339
+ : `Chrome audio command${missingCommands.length === 1 ? "" : "s"} missing: ${missingCommands.join(", ")}`,
340
+ });
341
+ }
342
+ return status;
343
+ }
344
+
345
+ async createViaBrowser() {
346
+ return createMeetWithBrowserProxyOnNode({
347
+ runtime: this.params.runtime,
348
+ config: this.params.config,
349
+ });
350
+ }
351
+
352
+ async recoverCurrentTab(request: { url?: string; transport?: GoogleMeetTransport } = {}) {
353
+ const transport = resolveTransport(request.transport, this.params.config);
354
+ if (transport === "twilio") {
355
+ throw new Error("recover_current_tab only supports chrome or chrome-node transports");
356
+ }
357
+ const url = request.url ? normalizeMeetUrl(request.url) : undefined;
358
+ if (transport === "chrome-node") {
359
+ return recoverCurrentMeetTabOnNode({
360
+ runtime: this.params.runtime,
361
+ config: this.params.config,
362
+ url,
363
+ });
364
+ }
365
+ return recoverCurrentMeetTab({
366
+ config: this.params.config,
367
+ url,
368
+ });
369
+ }
370
+
371
+ async join(request: GoogleMeetJoinRequest): Promise<GoogleMeetJoinResult> {
372
+ const url = normalizeMeetUrl(request.url);
373
+ const transport = resolveTransport(request.transport, this.params.config);
374
+ const mode = resolveMode(request.mode, this.params.config);
375
+ let reusable = this.list().find(
376
+ (session) =>
377
+ session.state === "active" &&
378
+ isSameMeetUrlForReuse(session.url, url) &&
379
+ session.transport === transport &&
380
+ session.mode === mode,
381
+ );
382
+ if (reusable?.transport === "twilio") {
383
+ await this.#refreshTwilioVoiceCallStatus(reusable);
384
+ if (reusable.state !== "active") {
385
+ reusable = undefined;
386
+ }
387
+ }
388
+ const speechInstructions = request.message ?? this.params.config.realtime.introMessage;
389
+ if (reusable) {
390
+ await this.#refreshBrowserHealthForChromeSession(reusable);
391
+ noteSession(reusable, "Reused existing active Meet session.");
392
+ reusable.updatedAt = nowIso();
393
+ const spoken =
394
+ isGoogleMeetTalkBackMode(mode) && speechInstructions
395
+ ? await this.#speakWhenReady(reusable, speechInstructions)
396
+ : false;
397
+ return { session: reusable, spoken };
398
+ }
399
+ const createdAt = nowIso();
400
+ let delegatedTwilioSpoken = false;
401
+
402
+ const session: GoogleMeetSession = {
403
+ id: `meet_${randomUUID()}`,
404
+ url,
405
+ transport,
406
+ mode,
407
+ state: "active",
408
+ createdAt,
409
+ updatedAt: createdAt,
410
+ participantIdentity:
411
+ transport === "twilio"
412
+ ? "Twilio phone participant"
413
+ : transport === "chrome-node"
414
+ ? "signed-in Google Chrome profile on a paired node"
415
+ : "signed-in Google Chrome profile",
416
+ realtime: {
417
+ enabled: isGoogleMeetTalkBackMode(mode),
418
+ strategy: mode === "bidi" ? "bidi" : "agent",
419
+ provider:
420
+ mode === "bidi"
421
+ ? (this.params.config.realtime.voiceProvider ?? this.params.config.realtime.provider)
422
+ : undefined,
423
+ model: mode === "bidi" ? this.params.config.realtime.model : undefined,
424
+ transcriptionProvider:
425
+ mode === "agent"
426
+ ? (this.params.config.realtime.transcriptionProvider ??
427
+ this.params.config.realtime.provider)
428
+ : undefined,
429
+ toolPolicy: this.params.config.realtime.toolPolicy,
430
+ },
431
+ notes: [],
432
+ };
433
+
434
+ try {
435
+ if (transport === "chrome" || transport === "chrome-node") {
436
+ const result =
437
+ transport === "chrome-node"
438
+ ? await launchChromeMeetOnNode({
439
+ runtime: this.params.runtime,
440
+ config: this.params.config,
441
+ fullConfig: this.params.fullConfig,
442
+ meetingSessionId: session.id,
443
+ requesterSessionKey: request.requesterSessionKey,
444
+ mode,
445
+ url,
446
+ logger: this.params.logger,
447
+ })
448
+ : await launchChromeMeet({
449
+ runtime: this.params.runtime,
450
+ config: this.params.config,
451
+ fullConfig: this.params.fullConfig,
452
+ meetingSessionId: session.id,
453
+ requesterSessionKey: request.requesterSessionKey,
454
+ mode,
455
+ url,
456
+ logger: this.params.logger,
457
+ });
458
+ session.chrome = {
459
+ audioBackend: this.params.config.chrome.audioBackend,
460
+ launched: result.launched,
461
+ nodeId: "nodeId" in result ? result.nodeId : undefined,
462
+ browserProfile: this.params.config.chrome.browserProfile,
463
+ health: "browser" in result ? result.browser : undefined,
464
+ };
465
+ this.#attachChromeAudioBridge(session, result.audioBridge);
466
+ session.notes.push(
467
+ result.audioBridge
468
+ ? transport === "chrome-node"
469
+ ? "Chrome node transport joins as the signed-in Google profile on the selected node and routes realtime audio through the node bridge."
470
+ : "Chrome transport joins as the signed-in Google profile and routes realtime audio through the configured bridge."
471
+ : isGoogleMeetTalkBackMode(mode)
472
+ ? "Chrome transport joins as the signed-in Google profile and expects BlackHole 2ch audio routing."
473
+ : "Chrome transport joins as the signed-in Google profile without starting the realtime audio bridge.",
474
+ );
475
+ this.#refreshSpeechReadiness(session);
476
+ } else {
477
+ const dialInNumber = normalizeDialInNumber(
478
+ request.dialInNumber ?? this.params.config.twilio.defaultDialInNumber,
479
+ );
480
+ if (!dialInNumber) {
481
+ throw new Error(
482
+ "Twilio transport requires a Meet dial-in phone number. Google Meet URLs do not include dial-in details; pass dialInNumber with optional pin/dtmfSequence, configure twilio.defaultDialInNumber, or use chrome/chrome-node transport.",
483
+ );
484
+ }
485
+ const rawDtmfSequence = buildMeetDtmfSequence({
486
+ pin: request.pin ?? this.params.config.twilio.defaultPin,
487
+ dtmfSequence: request.dtmfSequence ?? this.params.config.twilio.defaultDtmfSequence,
488
+ });
489
+ const dtmfSequence =
490
+ request.dtmfSequence || this.params.config.twilio.defaultDtmfSequence
491
+ ? rawDtmfSequence
492
+ : prefixDtmfWait(rawDtmfSequence, this.params.config.voiceCall.dtmfDelayMs);
493
+ const voiceCallResult = this.params.config.voiceCall.enabled
494
+ ? await joinMeetViaVoiceCallGateway({
495
+ config: this.params.config,
496
+ dialInNumber,
497
+ dtmfSequence,
498
+ logger: this.params.logger,
499
+ ...(request.requesterSessionKey
500
+ ? { requesterSessionKey: request.requesterSessionKey }
501
+ : {}),
502
+ sessionKey: buildTwilioVoiceCallSessionKey(session.id),
503
+ message: isGoogleMeetTalkBackMode(mode)
504
+ ? (request.message ??
505
+ this.params.config.voiceCall.introMessage ??
506
+ this.params.config.realtime.introMessage)
507
+ : undefined,
508
+ })
509
+ : undefined;
510
+ delegatedTwilioSpoken = Boolean(voiceCallResult?.introSent);
511
+ session.twilio = {
512
+ dialInNumber,
513
+ pinProvided: Boolean(request.pin ?? this.params.config.twilio.defaultPin),
514
+ dtmfSequence,
515
+ voiceCallId: voiceCallResult?.callId,
516
+ dtmfSent: voiceCallResult?.dtmfSent,
517
+ introSent: voiceCallResult?.introSent,
518
+ };
519
+ if (voiceCallResult?.callId) {
520
+ this.#sessionStops.set(session.id, async () => {
521
+ await endMeetVoiceCallGatewayCall({
522
+ config: this.params.config,
523
+ callId: voiceCallResult.callId,
524
+ });
525
+ });
526
+ }
527
+ session.notes.push(
528
+ this.params.config.voiceCall.enabled
529
+ ? dtmfSequence
530
+ ? "Twilio transport delegated the phone leg to the voice-call plugin, then queued configured DTMF before realtime connect."
531
+ : "Twilio transport delegated the call to the voice-call plugin without configured DTMF."
532
+ : "Twilio transport is an explicit dial plan; voice-call delegation is disabled.",
533
+ );
534
+ }
535
+ } catch (err) {
536
+ this.params.logger.warn(`[google-meet] join failed: ${formatErrorMessage(err)}`);
537
+ throw err;
538
+ }
539
+
540
+ this.#sessions.set(session.id, session);
541
+ const spoken =
542
+ transport === "twilio"
543
+ ? delegatedTwilioSpoken
544
+ : isGoogleMeetTalkBackMode(mode) && speechInstructions
545
+ ? await this.#speakWhenReady(session, speechInstructions)
546
+ : false;
547
+ return { session, spoken };
548
+ }
549
+
550
+ async leave(sessionId: string): Promise<{ found: boolean; session?: GoogleMeetSession }> {
551
+ const session = this.#sessions.get(sessionId);
552
+ if (!session) {
553
+ return { found: false };
554
+ }
555
+ const stop = this.#sessionStops.get(sessionId);
556
+ if (stop) {
557
+ this.#sessionStops.delete(sessionId);
558
+ this.#sessionSpeakers.delete(sessionId);
559
+ this.#sessionHealth.delete(sessionId);
560
+ try {
561
+ await stop();
562
+ } finally {
563
+ session.state = "ended";
564
+ session.updatedAt = nowIso();
565
+ }
566
+ }
567
+ session.state = "ended";
568
+ session.updatedAt = nowIso();
569
+ return { found: true, session };
570
+ }
571
+
572
+ async speak(
573
+ sessionId: string,
574
+ instructions?: string,
575
+ ): Promise<{ found: boolean; spoken: boolean; session?: GoogleMeetSession }> {
576
+ const session = this.#sessions.get(sessionId);
577
+ if (!session) {
578
+ return { found: false, spoken: false };
579
+ }
580
+ if (session.transport === "twilio" && session.twilio?.voiceCallId) {
581
+ try {
582
+ await speakMeetViaVoiceCallGateway({
583
+ config: this.params.config,
584
+ callId: session.twilio.voiceCallId,
585
+ message:
586
+ instructions ||
587
+ this.params.config.voiceCall.introMessage ||
588
+ this.params.config.realtime.introMessage ||
589
+ "",
590
+ });
591
+ } catch (err) {
592
+ if (!isVoiceCallMissingError(err)) {
593
+ throw err;
594
+ }
595
+ this.#markTwilioSessionEnded(session, "Voice Call is no longer active.");
596
+ return { found: true, spoken: false, session };
597
+ }
598
+ session.twilio.introSent = true;
599
+ session.updatedAt = nowIso();
600
+ return { found: true, spoken: true, session };
601
+ }
602
+ await this.#refreshBrowserHealthForChromeSession(session);
603
+ await this.#ensureChromeRealtimeBridge(session);
604
+ const speak = this.#sessionSpeakers.get(sessionId);
605
+ if (!speak || session.state !== "active") {
606
+ return { found: true, spoken: false, session };
607
+ }
608
+ const readiness = this.#refreshSpeechReadiness(session);
609
+ if (!readiness.ready) {
610
+ const note = readiness.message
611
+ ? `Realtime speech blocked: ${readiness.message}`
612
+ : "Realtime speech blocked until Google Meet is ready.";
613
+ session.notes = [...session.notes.filter((item) => item !== note), note];
614
+ session.updatedAt = nowIso();
615
+ return { found: true, spoken: false, session };
616
+ }
617
+ speak(instructions || this.params.config.realtime.introMessage);
618
+ session.updatedAt = nowIso();
619
+ this.#refreshHealth(sessionId);
620
+ return { found: true, spoken: true, session };
621
+ }
622
+
623
+ async #speakWhenReady(session: GoogleMeetSession, instructions: string): Promise<boolean> {
624
+ let result = await this.speak(session.id, instructions);
625
+ if (result.spoken || session.transport === "twilio") {
626
+ return result.spoken;
627
+ }
628
+ const waitMs = Math.min(
629
+ Math.max(0, this.params.config.chrome.waitForInCallMs),
630
+ Math.max(0, this.params.config.chrome.joinTimeoutMs),
631
+ );
632
+ const deadline = Date.now() + waitMs;
633
+ while (Date.now() < deadline) {
634
+ await sleep(Math.min(250, Math.max(0, deadline - Date.now())));
635
+ result = await this.speak(session.id, instructions);
636
+ if (result.spoken) {
637
+ return true;
638
+ }
639
+ const health = result.session?.chrome?.health;
640
+ if (health?.manualActionRequired || result.session?.state !== "active") {
641
+ return false;
642
+ }
643
+ const blocked = health?.speechBlockedReason;
644
+ if (
645
+ blocked &&
646
+ blocked !== "not-in-call" &&
647
+ blocked !== "browser-unverified" &&
648
+ blocked !== "meet-microphone-muted"
649
+ ) {
650
+ return false;
651
+ }
652
+ }
653
+ return false;
654
+ }
655
+
656
+ async testSpeech(request: GoogleMeetJoinRequest): Promise<{
657
+ createdSession: boolean;
658
+ inCall?: boolean;
659
+ manualActionRequired?: boolean;
660
+ manualActionReason?: GoogleMeetChromeHealth["manualActionReason"];
661
+ manualActionMessage?: string;
662
+ spoken: boolean;
663
+ speechOutputVerified: boolean;
664
+ speechOutputTimedOut: boolean;
665
+ speechReady?: boolean;
666
+ speechBlockedReason?: GoogleMeetChromeHealth["speechBlockedReason"];
667
+ speechBlockedMessage?: string;
668
+ audioOutputActive?: boolean;
669
+ lastOutputBytes?: number;
670
+ session: GoogleMeetSession;
671
+ }> {
672
+ if (request.mode === "transcribe") {
673
+ throw new Error(
674
+ "test_speech requires mode: agent or bidi; use join mode: transcribe for observe-only sessions.",
675
+ );
676
+ }
677
+ const requestedMode = request.mode ? resolveMode(request.mode, this.params.config) : undefined;
678
+ const mode =
679
+ requestedMode && isGoogleMeetTalkBackMode(requestedMode)
680
+ ? requestedMode
681
+ : isGoogleMeetTalkBackMode(this.params.config.defaultMode)
682
+ ? this.params.config.defaultMode
683
+ : "agent";
684
+ const url = normalizeMeetUrl(request.url);
685
+ const transport = resolveTransport(request.transport, this.params.config);
686
+ const beforeSessions = this.list();
687
+ const before = new Set(beforeSessions.map((session) => session.id));
688
+ const existingSession = beforeSessions.find(
689
+ (session) =>
690
+ session.state === "active" &&
691
+ isSameMeetUrlForReuse(session.url, url) &&
692
+ session.transport === transport &&
693
+ isGoogleMeetTalkBackMode(session.mode),
694
+ );
695
+ const startOutputBytes = existingSession?.chrome?.health?.lastOutputBytes ?? 0;
696
+ const result = await this.join({
697
+ ...request,
698
+ transport,
699
+ url,
700
+ mode,
701
+ message: request.message ?? "Say exactly: Google Meet speech test complete.",
702
+ });
703
+ let health = result.session.chrome?.health;
704
+ const shouldWaitForOutput =
705
+ result.spoken === true &&
706
+ health?.manualActionRequired !== true &&
707
+ this.#sessionHealth.has(result.session.id);
708
+ if (shouldWaitForOutput && !hasRealtimeAudioOutputAdvanced(health, startOutputBytes)) {
709
+ const deadline = Date.now() + Math.min(this.params.config.chrome.joinTimeoutMs, 5_000);
710
+ while (Date.now() < deadline) {
711
+ await sleep(100);
712
+ this.#refreshHealth(result.session.id);
713
+ health = result.session.chrome?.health;
714
+ if (hasRealtimeAudioOutputAdvanced(health, startOutputBytes)) {
715
+ break;
716
+ }
717
+ }
718
+ }
719
+ const speechOutputVerified = hasRealtimeAudioOutputAdvanced(health, startOutputBytes);
720
+ return {
721
+ createdSession: !before.has(result.session.id),
722
+ inCall: health?.inCall,
723
+ manualActionRequired: health?.manualActionRequired,
724
+ manualActionReason: health?.manualActionReason,
725
+ manualActionMessage: health?.manualActionMessage,
726
+ spoken: result.spoken ?? false,
727
+ speechOutputVerified,
728
+ speechOutputTimedOut: shouldWaitForOutput && !speechOutputVerified,
729
+ speechReady: health?.speechReady,
730
+ speechBlockedReason: health?.speechBlockedReason,
731
+ speechBlockedMessage: health?.speechBlockedMessage,
732
+ audioOutputActive: health?.audioOutputActive,
733
+ lastOutputBytes: health?.lastOutputBytes,
734
+ session: result.session,
735
+ };
736
+ }
737
+
738
+ async testListen(request: GoogleMeetJoinRequest): Promise<{
739
+ createdSession: boolean;
740
+ inCall?: boolean;
741
+ manualActionRequired?: boolean;
742
+ manualActionReason?: GoogleMeetChromeHealth["manualActionReason"];
743
+ manualActionMessage?: string;
744
+ listenVerified: boolean;
745
+ listenTimedOut: boolean;
746
+ captioning?: boolean;
747
+ captionsEnabledAttempted?: boolean;
748
+ transcriptLines?: number;
749
+ lastCaptionAt?: string;
750
+ lastCaptionSpeaker?: string;
751
+ lastCaptionText?: string;
752
+ recentTranscript?: GoogleMeetChromeHealth["recentTranscript"];
753
+ session: GoogleMeetSession;
754
+ }> {
755
+ const requestedMode = request.mode ? resolveMode(request.mode, this.params.config) : undefined;
756
+ if (requestedMode && isGoogleMeetTalkBackMode(requestedMode)) {
757
+ throw new Error(
758
+ "test_listen requires mode: transcribe; use test_speech for talk-back sessions.",
759
+ );
760
+ }
761
+ const url = normalizeMeetUrl(request.url);
762
+ const transport = resolveTransport(request.transport, this.params.config);
763
+ if (transport === "twilio") {
764
+ throw new Error("test_listen supports chrome or chrome-node transports");
765
+ }
766
+ const beforeSessions = this.list();
767
+ const before = new Set(beforeSessions.map((session) => session.id));
768
+ const existingSession = beforeSessions.find(
769
+ (session) =>
770
+ session.state === "active" &&
771
+ isSameMeetUrlForReuse(session.url, url) &&
772
+ session.transport === transport &&
773
+ session.mode === "transcribe",
774
+ );
775
+ const start = transcriptCheckpoint(existingSession?.chrome?.health);
776
+ const result = await this.join({
777
+ ...request,
778
+ transport,
779
+ url,
780
+ mode: "transcribe",
781
+ message: undefined,
782
+ });
783
+ let health = result.session.chrome?.health;
784
+ const timeoutMs = resolveProbeTimeoutMs(
785
+ request.timeoutMs,
786
+ this.params.config.chrome.joinTimeoutMs,
787
+ );
788
+ const shouldWait =
789
+ health?.manualActionRequired !== true && isManagedChromeBrowserSession(result.session);
790
+ if (shouldWait && !hasTranscriptAdvanced(health, start)) {
791
+ const deadline = Date.now() + timeoutMs;
792
+ while (Date.now() < deadline) {
793
+ await sleep(250);
794
+ await this.#refreshCaptionHealthForSession(result.session);
795
+ health = result.session.chrome?.health;
796
+ if (health?.manualActionRequired || hasTranscriptAdvanced(health, start)) {
797
+ break;
798
+ }
799
+ }
800
+ }
801
+ const listenVerified = hasTranscriptAdvanced(health, start);
802
+ return {
803
+ createdSession: !before.has(result.session.id),
804
+ inCall: health?.inCall,
805
+ manualActionRequired: health?.manualActionRequired,
806
+ manualActionReason: health?.manualActionReason,
807
+ manualActionMessage: health?.manualActionMessage,
808
+ listenVerified,
809
+ listenTimedOut: shouldWait && !listenVerified && health?.manualActionRequired !== true,
810
+ captioning: health?.captioning,
811
+ captionsEnabledAttempted: health?.captionsEnabledAttempted,
812
+ transcriptLines: health?.transcriptLines,
813
+ lastCaptionAt: health?.lastCaptionAt,
814
+ lastCaptionSpeaker: health?.lastCaptionSpeaker,
815
+ lastCaptionText: health?.lastCaptionText,
816
+ recentTranscript: health?.recentTranscript,
817
+ session: result.session,
818
+ };
819
+ }
820
+
821
+ async #refreshCaptionHealthForSession(session: GoogleMeetSession) {
822
+ if (session.mode !== "transcribe") {
823
+ this.#refreshSpeechReadiness(session);
824
+ return;
825
+ }
826
+ await this.#refreshBrowserHealthForChromeSession(session);
827
+ }
828
+
829
+ async #refreshStatusHealthForSession(session: GoogleMeetSession) {
830
+ if (session.transport === "chrome" || session.transport === "chrome-node") {
831
+ await this.#refreshBrowserHealthForChromeSession(session, { force: true, readOnly: true });
832
+ return;
833
+ }
834
+ if (session.transport === "twilio") {
835
+ await this.#refreshTwilioVoiceCallStatus(session);
836
+ return;
837
+ }
838
+ this.#refreshSpeechReadiness(session);
839
+ }
840
+
841
+ #markTwilioSessionEnded(session: GoogleMeetSession, reason: string) {
842
+ session.state = "ended";
843
+ session.updatedAt = nowIso();
844
+ this.#sessionStops.delete(session.id);
845
+ this.#sessionSpeakers.delete(session.id);
846
+ this.#sessionHealth.delete(session.id);
847
+ noteSession(session, reason);
848
+ }
849
+
850
+ async #refreshTwilioVoiceCallStatus(session: GoogleMeetSession) {
851
+ const callId = session.twilio?.voiceCallId;
852
+ if (!callId || session.state !== "active") {
853
+ this.#refreshSpeechReadiness(session);
854
+ return;
855
+ }
856
+ try {
857
+ const status = await getMeetVoiceCallGatewayCall({
858
+ config: this.params.config,
859
+ callId,
860
+ });
861
+ if (status.found === false) {
862
+ this.#markTwilioSessionEnded(session, "Voice Call is no longer active.");
863
+ }
864
+ } catch (error) {
865
+ this.params.logger.debug?.(
866
+ `[google-meet] voice-call status refresh ignored: ${formatErrorMessage(error)}`,
867
+ );
868
+ }
869
+ this.#refreshSpeechReadiness(session);
870
+ }
871
+
872
+ async #refreshBrowserHealthForChromeSession(
873
+ session: GoogleMeetSession,
874
+ options: { force?: boolean; readOnly?: boolean } = {},
875
+ ) {
876
+ if (!isManagedChromeBrowserSession(session)) {
877
+ this.#refreshSpeechReadiness(session);
878
+ return;
879
+ }
880
+ if (
881
+ !options.force &&
882
+ isGoogleMeetTalkBackMode(session.mode) &&
883
+ evaluateSpeechReadiness(session).ready
884
+ ) {
885
+ this.#refreshSpeechReadiness(session);
886
+ return;
887
+ }
888
+ try {
889
+ const result =
890
+ session.transport === "chrome-node"
891
+ ? await recoverCurrentMeetTabOnNode({
892
+ runtime: this.params.runtime,
893
+ config: this.params.config,
894
+ mode: session.mode,
895
+ readOnly: options.readOnly,
896
+ url: session.url,
897
+ })
898
+ : await recoverCurrentMeetTab({
899
+ config: this.params.config,
900
+ mode: session.mode,
901
+ readOnly: options.readOnly,
902
+ url: session.url,
903
+ });
904
+ if (result.found && result.browser && session.chrome) {
905
+ session.chrome.health = {
906
+ ...session.chrome.health,
907
+ ...result.browser,
908
+ };
909
+ session.updatedAt = nowIso();
910
+ }
911
+ } catch (error) {
912
+ this.params.logger.debug?.(
913
+ `[google-meet] browser readiness refresh ignored: ${formatErrorMessage(error)}`,
914
+ );
915
+ }
916
+ this.#refreshSpeechReadiness(session);
917
+ }
918
+
919
+ #attachChromeAudioBridge(
920
+ session: GoogleMeetSession,
921
+ audioBridge: ChromeAudioBridgeResult | undefined,
922
+ ) {
923
+ if (!session.chrome || !audioBridge) {
924
+ return;
925
+ }
926
+ session.chrome.audioBridge = {
927
+ type: audioBridge.type,
928
+ provider:
929
+ audioBridge.type === "command-pair" || audioBridge.type === "node-command-pair"
930
+ ? audioBridge.providerId
931
+ : undefined,
932
+ };
933
+ if (audioBridge.type === "command-pair" || audioBridge.type === "node-command-pair") {
934
+ this.#sessionStops.set(session.id, audioBridge.stop);
935
+ this.#sessionSpeakers.set(session.id, audioBridge.speak);
936
+ this.#sessionHealth.set(session.id, audioBridge.getHealth);
937
+ }
938
+ }
939
+
940
+ async #ensureChromeRealtimeBridge(session: GoogleMeetSession) {
941
+ if (
942
+ !isGoogleMeetTalkBackMode(session.mode) ||
943
+ session.transport !== "chrome" ||
944
+ session.state !== "active" ||
945
+ !session.chrome ||
946
+ session.chrome.audioBridge
947
+ ) {
948
+ return;
949
+ }
950
+ const health = session.chrome.health;
951
+ if (
952
+ health?.inCall !== true ||
953
+ health.micMuted === true ||
954
+ health.manualActionRequired === true
955
+ ) {
956
+ return;
957
+ }
958
+ const result = await launchChromeMeet({
959
+ runtime: this.params.runtime,
960
+ config: {
961
+ ...this.params.config,
962
+ chrome: {
963
+ ...this.params.config.chrome,
964
+ launch: false,
965
+ },
966
+ },
967
+ fullConfig: this.params.fullConfig,
968
+ meetingSessionId: session.id,
969
+ mode: session.mode,
970
+ url: session.url,
971
+ logger: this.params.logger,
972
+ });
973
+ this.#attachChromeAudioBridge(session, result.audioBridge);
974
+ session.updatedAt = nowIso();
975
+ }
976
+
977
+ #refreshSpeechReadiness(session: GoogleMeetSession) {
978
+ const readiness = evaluateSpeechReadiness(session);
979
+ if (readiness.ready) {
980
+ session.notes = session.notes.filter((note) => !note.startsWith("Realtime speech blocked:"));
981
+ }
982
+ if (session.chrome) {
983
+ session.chrome.health = {
984
+ ...session.chrome.health,
985
+ speechReady: readiness.ready,
986
+ speechBlockedReason: readiness.reason,
987
+ speechBlockedMessage: readiness.message,
988
+ };
989
+ }
990
+ return readiness;
991
+ }
992
+
993
+ #refreshHealth(sessionId?: string) {
994
+ const ids = sessionId ? [sessionId] : [...this.#sessionHealth.keys()];
995
+ for (const id of ids) {
996
+ const session = this.#sessions.get(id);
997
+ const getHealth = this.#sessionHealth.get(id);
998
+ if (!session?.chrome || !getHealth) {
999
+ continue;
1000
+ }
1001
+ session.chrome.health = {
1002
+ ...session.chrome.health,
1003
+ ...getHealth(),
1004
+ };
1005
+ this.#refreshSpeechReadiness(session);
1006
+ }
1007
+ }
1008
+ }