@agenticmail/enterprise 0.5.367 → 0.5.369
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{agent-tools-XBMJR6DQ.js → agent-tools-QHR2LY5V.js} +1 -1
- package/dist/agent-tools-YZF26YMY.js +14007 -0
- package/dist/{chunk-X2YL6KSY.js → chunk-2EI4AYXM.js} +2 -5
- package/dist/{chunk-ULKGUIVP.js → chunk-IO5EPYLA.js} +7 -2
- package/dist/{chunk-Z3OXPQ73.js → chunk-JNNZKFBP.js} +2 -2
- package/dist/chunk-LEACSX6C.js +4945 -0
- package/dist/chunk-LICMHA7V.js +2583 -0
- package/dist/{chunk-OKLI672B.js → chunk-QHBOQ2IG.js} +2 -2
- package/dist/chunk-SXDQEUHA.js +1727 -0
- package/dist/chunk-ZW24HPRG.js +5101 -0
- package/dist/{cli-agent-JYQSLTKP.js → cli-agent-45NEZY7H.js} +1 -1
- package/dist/cli-agent-VEQZKUCM.js +2483 -0
- package/dist/{cli-serve-YTFZUDRS.js → cli-serve-2K7AX2G2.js} +1 -1
- package/dist/cli-serve-5KMU2NHN.js +281 -0
- package/dist/cli.js +3 -3
- package/dist/index.js +3 -3
- package/dist/{meetings-SQNTB6GD.js → meetings-EMKU56G3.js} +1 -1
- package/dist/meetings-TL77WLKW.js +12 -0
- package/dist/{runtime-5AZEZO75.js → runtime-GS2SAORH.js} +1 -1
- package/dist/runtime-HNPFFKQL.js +45 -0
- package/dist/{server-7IR64ZIT.js → server-EGWD64TG.js} +1 -1
- package/dist/server-ET4HLIC7.js +28 -0
- package/dist/{setup-R4AAM5PT.js → setup-DCBFFTLT.js} +1 -1
- package/dist/setup-WO337O3I.js +20 -0
- package/logs/cloudflared-error.log +6 -0
- package/logs/fola-error.log +1 -0
- package/logs/john-error.log +1 -0
- package/package.json +1 -1
|
@@ -0,0 +1,2583 @@
|
|
|
1
|
+
import {
|
|
2
|
+
errorResult,
|
|
3
|
+
jsonResult,
|
|
4
|
+
readNumberParam,
|
|
5
|
+
readStringParam,
|
|
6
|
+
textResult
|
|
7
|
+
} from "./chunk-ZB3VC2MR.js";
|
|
8
|
+
|
|
9
|
+
// src/agent-tools/tools/browser.ts
|
|
10
|
+
import * as path from "path";
|
|
11
|
+
import * as os from "os";
|
|
12
|
+
import { promises as fs, existsSync } from "fs";
|
|
13
|
+
var BROWSER_ACTIONS = ["navigate", "screenshot", "click", "type", "scroll", "evaluate", "content", "close"];
|
|
14
|
+
var DEFAULT_VIEWPORT_WIDTH = 1280;
|
|
15
|
+
var DEFAULT_VIEWPORT_HEIGHT = 720;
|
|
16
|
+
var DEFAULT_TIMEOUT_MS = 3e4;
|
|
17
|
+
var DEFAULT_MAX_CONTEXTS = 5;
|
|
18
|
+
var DEFAULT_IDLE_TIMEOUT_MS = 5 * 6e4;
|
|
19
|
+
var browsers = { headless: null, headed: null, chrome: null };
|
|
20
|
+
var agentContexts = /* @__PURE__ */ new Map();
|
|
21
|
+
var idleCleanupTimer = null;
|
|
22
|
+
function findChromePath() {
|
|
23
|
+
const candidates = [
|
|
24
|
+
// macOS
|
|
25
|
+
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
|
|
26
|
+
"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary",
|
|
27
|
+
// Linux
|
|
28
|
+
"/usr/bin/google-chrome",
|
|
29
|
+
"/usr/bin/google-chrome-stable",
|
|
30
|
+
"/usr/bin/chromium-browser",
|
|
31
|
+
"/snap/bin/chromium",
|
|
32
|
+
// Windows (common paths)
|
|
33
|
+
"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
|
|
34
|
+
"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe"
|
|
35
|
+
];
|
|
36
|
+
for (const p of candidates) {
|
|
37
|
+
try {
|
|
38
|
+
if (existsSync(p)) return p;
|
|
39
|
+
} catch {
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
function startIdleCleanup() {
|
|
45
|
+
if (idleCleanupTimer) return;
|
|
46
|
+
idleCleanupTimer = setInterval(function() {
|
|
47
|
+
var now = Date.now();
|
|
48
|
+
for (var [key, ctx] of agentContexts) {
|
|
49
|
+
var timeout = key.endsWith(":chrome") || key.endsWith(":headed") ? 2 * 60 * 6e4 : DEFAULT_IDLE_TIMEOUT_MS;
|
|
50
|
+
if (now - ctx.lastUsed > timeout) {
|
|
51
|
+
ctx.context.close().catch(() => {
|
|
52
|
+
});
|
|
53
|
+
agentContexts.delete(key);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
for (var mode of ["headless", "headed", "chrome"]) {
|
|
57
|
+
var prefix = `:${mode}`;
|
|
58
|
+
var hasContexts = false;
|
|
59
|
+
for (var [key] of agentContexts) {
|
|
60
|
+
if (key.endsWith(prefix)) {
|
|
61
|
+
hasContexts = true;
|
|
62
|
+
break;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
if (!hasContexts && browsers[mode]) {
|
|
66
|
+
browsers[mode].close().catch(() => {
|
|
67
|
+
});
|
|
68
|
+
browsers[mode] = null;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
if (!browsers.headless && !browsers.headed && !browsers.chrome && idleCleanupTimer) {
|
|
72
|
+
clearInterval(idleCleanupTimer);
|
|
73
|
+
idleCleanupTimer = null;
|
|
74
|
+
}
|
|
75
|
+
}, 6e4);
|
|
76
|
+
if (idleCleanupTimer && typeof idleCleanupTimer === "object" && "unref" in idleCleanupTimer) {
|
|
77
|
+
idleCleanupTimer.unref();
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
async function ensureBrowser(headless, agentId, useChrome) {
|
|
81
|
+
var mode = useChrome ? "chrome" : headless ? "headless" : "headed";
|
|
82
|
+
var contextKey = `${agentId}:${mode}`;
|
|
83
|
+
var existing = agentContexts.get(contextKey);
|
|
84
|
+
if (existing) {
|
|
85
|
+
existing.lastUsed = Date.now();
|
|
86
|
+
return { page: existing.page };
|
|
87
|
+
}
|
|
88
|
+
if (agentContexts.size >= DEFAULT_MAX_CONTEXTS) {
|
|
89
|
+
var oldestId = null;
|
|
90
|
+
var oldestTime = Infinity;
|
|
91
|
+
for (var [id, ctx] of agentContexts) {
|
|
92
|
+
if (ctx.lastUsed < oldestTime) {
|
|
93
|
+
oldestTime = ctx.lastUsed;
|
|
94
|
+
oldestId = id;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
if (oldestId) {
|
|
98
|
+
var evicted = agentContexts.get(oldestId);
|
|
99
|
+
if (evicted) {
|
|
100
|
+
evicted.context.close().catch(() => {
|
|
101
|
+
});
|
|
102
|
+
agentContexts.delete(oldestId);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
try {
|
|
107
|
+
var pw = await import("playwright");
|
|
108
|
+
if (mode === "headless") {
|
|
109
|
+
if (!browsers[mode]) {
|
|
110
|
+
browsers[mode] = await pw.chromium.launch({ headless: true });
|
|
111
|
+
console.log(`[browser] Launched headless Chromium`);
|
|
112
|
+
startIdleCleanup();
|
|
113
|
+
}
|
|
114
|
+
var context = await browsers[mode].newContext({
|
|
115
|
+
viewport: { width: DEFAULT_VIEWPORT_WIDTH, height: DEFAULT_VIEWPORT_HEIGHT },
|
|
116
|
+
userAgent: "AgenticMail-Agent/1.0"
|
|
117
|
+
});
|
|
118
|
+
var page = await context.newPage();
|
|
119
|
+
agentContexts.set(contextKey, { context, page, lastUsed: Date.now() });
|
|
120
|
+
return { page };
|
|
121
|
+
}
|
|
122
|
+
const { mkdirSync } = await import("fs");
|
|
123
|
+
const { join: join4 } = await import("path");
|
|
124
|
+
const userDataDir = join4("/tmp", "agenticmail-browser", agentId, mode);
|
|
125
|
+
mkdirSync(userDataDir, { recursive: true });
|
|
126
|
+
var launchArgs = [
|
|
127
|
+
"--disable-blink-features=AutomationControlled",
|
|
128
|
+
// NOTE: do NOT use --use-fake-device-for-media-stream — it overrides real audio devices
|
|
129
|
+
// We need Chrome to use the system's real audio input (BlackHole/VB-CABLE) for meeting voice
|
|
130
|
+
"--use-fake-ui-for-media-stream",
|
|
131
|
+
// Auto-accept mic/camera permission prompts
|
|
132
|
+
"--auto-select-desktop-capture-source=Entire screen",
|
|
133
|
+
"--enable-usermedia-screen-capturing",
|
|
134
|
+
"--disable-infobars",
|
|
135
|
+
"--no-first-run",
|
|
136
|
+
"--no-default-browser-check"
|
|
137
|
+
];
|
|
138
|
+
var launchOpts = {
|
|
139
|
+
headless: false,
|
|
140
|
+
args: launchArgs,
|
|
141
|
+
viewport: { width: DEFAULT_VIEWPORT_WIDTH, height: DEFAULT_VIEWPORT_HEIGHT },
|
|
142
|
+
userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36",
|
|
143
|
+
permissions: ["microphone", "camera", "notifications"]
|
|
144
|
+
};
|
|
145
|
+
if (mode === "chrome") {
|
|
146
|
+
const chromePath = findChromePath();
|
|
147
|
+
if (!chromePath) {
|
|
148
|
+
throw new Error("Native Google Chrome not found. Install: brew install --cask google-chrome");
|
|
149
|
+
}
|
|
150
|
+
launchOpts.executablePath = chromePath;
|
|
151
|
+
}
|
|
152
|
+
var persistentContext = await pw.chromium.launchPersistentContext(userDataDir, launchOpts);
|
|
153
|
+
await persistentContext.addInitScript(() => {
|
|
154
|
+
Object.defineProperty(navigator, "webdriver", { get: () => void 0 });
|
|
155
|
+
});
|
|
156
|
+
var page = persistentContext.pages()[0] || await persistentContext.newPage();
|
|
157
|
+
console.log(`[browser] Launched ${mode} persistent browser for ${agentId} (profile: ${userDataDir})`);
|
|
158
|
+
agentContexts.set(contextKey, { context: persistentContext, page, lastUsed: Date.now() });
|
|
159
|
+
startIdleCleanup();
|
|
160
|
+
return { page };
|
|
161
|
+
} catch (error) {
|
|
162
|
+
throw new Error(
|
|
163
|
+
"Browser launch failed. " + (mode === "chrome" ? "Ensure Google Chrome is installed." : "Install Playwright: npm install playwright") + "\nError: " + (error.message || "unknown")
|
|
164
|
+
);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
var activeSshTunnels = /* @__PURE__ */ new Map();
|
|
168
|
+
async function ensureSshTunnel(tunnelCmd, agentId) {
|
|
169
|
+
var tunnelKey = `ssh-tunnel:${agentId}`;
|
|
170
|
+
var existing = activeSshTunnels.get(tunnelKey);
|
|
171
|
+
if (existing && !existing.killed) {
|
|
172
|
+
try {
|
|
173
|
+
existing.kill(0);
|
|
174
|
+
return;
|
|
175
|
+
} catch {
|
|
176
|
+
activeSshTunnels.delete(tunnelKey);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
console.log(`[browser] Establishing SSH tunnel for ${agentId}: ${tunnelCmd}`);
|
|
180
|
+
const { spawn } = await import("child_process");
|
|
181
|
+
var args = tunnelCmd.replace(/^ssh\s+/, "").split(/\s+/).filter(Boolean);
|
|
182
|
+
if (!args.includes("-N")) args.push("-N");
|
|
183
|
+
if (!args.some((a) => a.includes("StrictHostKeyChecking"))) {
|
|
184
|
+
args.push("-o", "StrictHostKeyChecking=accept-new");
|
|
185
|
+
}
|
|
186
|
+
if (!args.some((a) => a.includes("ServerAliveInterval"))) {
|
|
187
|
+
args.push("-o", "ServerAliveInterval=30");
|
|
188
|
+
}
|
|
189
|
+
var proc = spawn("ssh", args, { stdio: "pipe", detached: false });
|
|
190
|
+
activeSshTunnels.set(tunnelKey, proc);
|
|
191
|
+
proc.on("exit", () => {
|
|
192
|
+
activeSshTunnels.delete(tunnelKey);
|
|
193
|
+
});
|
|
194
|
+
proc.on("error", (err) => {
|
|
195
|
+
console.error(`[browser] SSH tunnel error for ${agentId}:`, err.message);
|
|
196
|
+
activeSshTunnels.delete(tunnelKey);
|
|
197
|
+
});
|
|
198
|
+
var localPortMatch = tunnelCmd.match(/-L\s*(\d+):/);
|
|
199
|
+
if (localPortMatch) {
|
|
200
|
+
var port = parseInt(localPortMatch[1]);
|
|
201
|
+
var maxWait = 15e3;
|
|
202
|
+
var start = Date.now();
|
|
203
|
+
const net = await import("net");
|
|
204
|
+
while (Date.now() - start < maxWait) {
|
|
205
|
+
var connected = await new Promise((resolve) => {
|
|
206
|
+
var sock = net.connect({ port, host: "127.0.0.1" }, () => {
|
|
207
|
+
sock.destroy();
|
|
208
|
+
resolve(true);
|
|
209
|
+
});
|
|
210
|
+
sock.on("error", () => resolve(false));
|
|
211
|
+
sock.setTimeout(1e3, () => {
|
|
212
|
+
sock.destroy();
|
|
213
|
+
resolve(false);
|
|
214
|
+
});
|
|
215
|
+
});
|
|
216
|
+
if (connected) {
|
|
217
|
+
console.log(`[browser] SSH tunnel ready on port ${port} for ${agentId}`);
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
221
|
+
}
|
|
222
|
+
console.warn(`[browser] SSH tunnel port ${port} not ready after ${maxWait}ms \u2014 proceeding anyway`);
|
|
223
|
+
} else {
|
|
224
|
+
await new Promise((r) => setTimeout(r, 3e3));
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
async function connectRemoteCDP(cfg, agentId) {
|
|
228
|
+
var contextKey = `${agentId}:remote-cdp`;
|
|
229
|
+
var existing = agentContexts.get(contextKey);
|
|
230
|
+
if (existing) {
|
|
231
|
+
existing.lastUsed = Date.now();
|
|
232
|
+
return { page: existing.page };
|
|
233
|
+
}
|
|
234
|
+
if (cfg.sshTunnel) {
|
|
235
|
+
await ensureSshTunnel(cfg.sshTunnel, agentId);
|
|
236
|
+
}
|
|
237
|
+
var pw = await import("playwright");
|
|
238
|
+
var cdpUrl = cfg.cdpUrl;
|
|
239
|
+
if (!cdpUrl) throw new Error("CDP WebSocket URL not configured. Go to Settings \u2192 Browser \u2192 Remote CDP and enter the WebSocket URL.");
|
|
240
|
+
if (!cdpUrl.startsWith("ws://") && !cdpUrl.startsWith("wss://")) {
|
|
241
|
+
var httpUrl = cdpUrl.startsWith("http") ? cdpUrl : `http://${cdpUrl}`;
|
|
242
|
+
if (!httpUrl.includes("/json/version")) httpUrl = httpUrl.replace(/\/$/, "") + "/json/version";
|
|
243
|
+
try {
|
|
244
|
+
var resp = await fetch(httpUrl, {
|
|
245
|
+
signal: AbortSignal.timeout(cfg.cdpTimeout || 1e4),
|
|
246
|
+
headers: cfg.cdpAuthToken ? { "Authorization": `Bearer ${cfg.cdpAuthToken}` } : {}
|
|
247
|
+
});
|
|
248
|
+
var versionData = await resp.json();
|
|
249
|
+
cdpUrl = versionData.webSocketDebuggerUrl;
|
|
250
|
+
if (!cdpUrl) throw new Error("No webSocketDebuggerUrl in /json/version response");
|
|
251
|
+
console.log(`[browser] Auto-discovered CDP WebSocket: ${cdpUrl}`);
|
|
252
|
+
} catch (e) {
|
|
253
|
+
throw new Error(`Cannot discover CDP endpoint from ${httpUrl}: ${e.message}. Provide a full ws:// URL instead.`);
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
var timeout = cfg.cdpTimeout || 3e4;
|
|
257
|
+
var headers = {};
|
|
258
|
+
if (cfg.cdpAuthToken) headers["Authorization"] = `Bearer ${cfg.cdpAuthToken}`;
|
|
259
|
+
console.log(`[browser] Connecting to remote CDP: ${cdpUrl} (agent: ${agentId})`);
|
|
260
|
+
var browser = await pw.chromium.connectOverCDP(cdpUrl, { timeout, headers });
|
|
261
|
+
var contexts = browser.contexts();
|
|
262
|
+
var context = contexts[0] || await browser.newContext({
|
|
263
|
+
viewport: { width: DEFAULT_VIEWPORT_WIDTH, height: DEFAULT_VIEWPORT_HEIGHT }
|
|
264
|
+
});
|
|
265
|
+
var page = context.pages()[0] || await context.newPage();
|
|
266
|
+
agentContexts.set(contextKey, { context: browser, page, lastUsed: Date.now() });
|
|
267
|
+
startIdleCleanup();
|
|
268
|
+
console.log(`[browser] Connected to remote CDP for ${agentId}`);
|
|
269
|
+
return { page };
|
|
270
|
+
}
|
|
271
|
+
async function connectBrowserless(cfg, agentId) {
|
|
272
|
+
var contextKey = `${agentId}:browserless`;
|
|
273
|
+
var existing = agentContexts.get(contextKey);
|
|
274
|
+
if (existing) {
|
|
275
|
+
existing.lastUsed = Date.now();
|
|
276
|
+
return { page: existing.page };
|
|
277
|
+
}
|
|
278
|
+
if (!cfg.browserlessToken) throw new Error("Browserless API token not configured. Go to Settings \u2192 Browser and enter your token.");
|
|
279
|
+
var pw = await import("playwright");
|
|
280
|
+
var endpoint = cfg.browserlessEndpoint || "wss://chrome.browserless.io";
|
|
281
|
+
if (endpoint.startsWith("http://")) endpoint = endpoint.replace("http://", "ws://");
|
|
282
|
+
if (endpoint.startsWith("https://")) endpoint = endpoint.replace("https://", "wss://");
|
|
283
|
+
if (!endpoint.startsWith("ws://") && !endpoint.startsWith("wss://")) endpoint = "wss://" + endpoint;
|
|
284
|
+
var connectUrl = `${endpoint}?token=${encodeURIComponent(cfg.browserlessToken)}`;
|
|
285
|
+
if (cfg.browserlessStealth) connectUrl += "&stealth";
|
|
286
|
+
if (cfg.browserlessProxy) connectUrl += `&--proxy-server=${encodeURIComponent(cfg.browserlessProxy)}`;
|
|
287
|
+
console.log(`[browser] Connecting to Browserless for ${agentId}`);
|
|
288
|
+
var browser = await pw.chromium.connectOverCDP(connectUrl, { timeout: 3e4 });
|
|
289
|
+
var context = await browser.newContext({
|
|
290
|
+
viewport: { width: DEFAULT_VIEWPORT_WIDTH, height: DEFAULT_VIEWPORT_HEIGHT }
|
|
291
|
+
});
|
|
292
|
+
var page = await context.newPage();
|
|
293
|
+
agentContexts.set(contextKey, { context: browser, page, lastUsed: Date.now() });
|
|
294
|
+
startIdleCleanup();
|
|
295
|
+
console.log(`[browser] Connected to Browserless for ${agentId}`);
|
|
296
|
+
return { page };
|
|
297
|
+
}
|
|
298
|
+
async function connectBrowserbase(cfg, agentId) {
|
|
299
|
+
var contextKey = `${agentId}:browserbase`;
|
|
300
|
+
var existing = agentContexts.get(contextKey);
|
|
301
|
+
if (existing) {
|
|
302
|
+
existing.lastUsed = Date.now();
|
|
303
|
+
return { page: existing.page };
|
|
304
|
+
}
|
|
305
|
+
if (!cfg.browserbaseApiKey) throw new Error("Browserbase API key not configured. Go to Settings \u2192 Browser and enter your API key.");
|
|
306
|
+
if (!cfg.browserbaseProjectId) throw new Error("Browserbase Project ID not configured. Go to Settings \u2192 Browser and enter your project ID.");
|
|
307
|
+
console.log(`[browser] Creating Browserbase session for ${agentId}`);
|
|
308
|
+
var sessionResp = await fetch("https://www.browserbase.com/v1/sessions", {
|
|
309
|
+
method: "POST",
|
|
310
|
+
headers: { "x-bb-api-key": cfg.browserbaseApiKey, "Content-Type": "application/json" },
|
|
311
|
+
body: JSON.stringify({
|
|
312
|
+
projectId: cfg.browserbaseProjectId,
|
|
313
|
+
browserSettings: {
|
|
314
|
+
recordSession: cfg.browserbaseRecording !== false
|
|
315
|
+
},
|
|
316
|
+
keepAlive: cfg.browserbaseKeepAlive || false
|
|
317
|
+
}),
|
|
318
|
+
signal: AbortSignal.timeout(15e3)
|
|
319
|
+
});
|
|
320
|
+
if (!sessionResp.ok) {
|
|
321
|
+
var errBody = await sessionResp.text().catch(() => "");
|
|
322
|
+
throw new Error(`Browserbase session creation failed (${sessionResp.status}): ${errBody}`);
|
|
323
|
+
}
|
|
324
|
+
var sessionData = await sessionResp.json();
|
|
325
|
+
var connectUrl = sessionData.connectUrl || `wss://connect.browserbase.com?apiKey=${cfg.browserbaseApiKey}&sessionId=${sessionData.id}`;
|
|
326
|
+
var pw = await import("playwright");
|
|
327
|
+
var browser = await pw.chromium.connectOverCDP(connectUrl, { timeout: 3e4 });
|
|
328
|
+
var context = browser.contexts()[0] || await browser.newContext({
|
|
329
|
+
viewport: { width: DEFAULT_VIEWPORT_WIDTH, height: DEFAULT_VIEWPORT_HEIGHT }
|
|
330
|
+
});
|
|
331
|
+
var page = context.pages()[0] || await context.newPage();
|
|
332
|
+
agentContexts.set(contextKey, { context: browser, page, lastUsed: Date.now() });
|
|
333
|
+
startIdleCleanup();
|
|
334
|
+
console.log(`[browser] Connected to Browserbase session ${sessionData.id} for ${agentId}`);
|
|
335
|
+
return { page };
|
|
336
|
+
}
|
|
337
|
+
async function connectSteel(cfg, agentId) {
|
|
338
|
+
var contextKey = `${agentId}:steel`;
|
|
339
|
+
var existing = agentContexts.get(contextKey);
|
|
340
|
+
if (existing) {
|
|
341
|
+
existing.lastUsed = Date.now();
|
|
342
|
+
return { page: existing.page };
|
|
343
|
+
}
|
|
344
|
+
if (!cfg.steelApiKey) throw new Error("Steel API key not configured. Go to Settings \u2192 Browser and enter your API key.");
|
|
345
|
+
var endpoint = cfg.steelEndpoint || "https://api.steel.dev";
|
|
346
|
+
var duration = (cfg.steelSessionDuration || 15) * 60;
|
|
347
|
+
console.log(`[browser] Creating Steel session for ${agentId}`);
|
|
348
|
+
var sessionResp = await fetch(`${endpoint}/v1/sessions`, {
|
|
349
|
+
method: "POST",
|
|
350
|
+
headers: { "Authorization": `Bearer ${cfg.steelApiKey}`, "Content-Type": "application/json" },
|
|
351
|
+
body: JSON.stringify({ timeout: duration, useProxy: false }),
|
|
352
|
+
signal: AbortSignal.timeout(15e3)
|
|
353
|
+
});
|
|
354
|
+
if (!sessionResp.ok) {
|
|
355
|
+
var errBody = await sessionResp.text().catch(() => "");
|
|
356
|
+
throw new Error(`Steel session creation failed (${sessionResp.status}): ${errBody}`);
|
|
357
|
+
}
|
|
358
|
+
var sessionData = await sessionResp.json();
|
|
359
|
+
var connectUrl = sessionData.connectUrl || sessionData.websocketUrl || sessionData.cdpUrl;
|
|
360
|
+
if (!connectUrl) throw new Error("Steel session created but no connection URL returned");
|
|
361
|
+
var pw = await import("playwright");
|
|
362
|
+
var browser = await pw.chromium.connectOverCDP(connectUrl, { timeout: 3e4 });
|
|
363
|
+
var context = browser.contexts()[0] || await browser.newContext({
|
|
364
|
+
viewport: { width: DEFAULT_VIEWPORT_WIDTH, height: DEFAULT_VIEWPORT_HEIGHT }
|
|
365
|
+
});
|
|
366
|
+
var page = context.pages()[0] || await context.newPage();
|
|
367
|
+
agentContexts.set(contextKey, { context: browser, page, lastUsed: Date.now() });
|
|
368
|
+
startIdleCleanup();
|
|
369
|
+
console.log(`[browser] Connected to Steel session ${sessionData.id} for ${agentId}`);
|
|
370
|
+
return { page };
|
|
371
|
+
}
|
|
372
|
+
async function connectScrapingBee(cfg, agentId) {
|
|
373
|
+
var contextKey = `${agentId}:scrapingbee`;
|
|
374
|
+
var existing = agentContexts.get(contextKey);
|
|
375
|
+
if (existing) {
|
|
376
|
+
existing.lastUsed = Date.now();
|
|
377
|
+
return { page: existing.page };
|
|
378
|
+
}
|
|
379
|
+
if (!cfg.scrapingbeeApiKey) throw new Error("ScrapingBee API key not configured. Go to Settings \u2192 Browser and enter your API key.");
|
|
380
|
+
var pw = await import("playwright");
|
|
381
|
+
var proxyUrl = `http://${cfg.scrapingbeeApiKey}:${cfg.scrapingbeeJsRendering !== false ? "render_js" : "norender"}${cfg.scrapingbeePremiumProxy ? "&premium_proxy=true" : ""}${cfg.scrapingbeeCountry ? `&country_code=${cfg.scrapingbeeCountry}` : ""}@proxy.scrapingbee.com:8886`;
|
|
382
|
+
console.log(`[browser] Launching browser with ScrapingBee proxy for ${agentId}`);
|
|
383
|
+
var browser = await pw.chromium.launch({
|
|
384
|
+
headless: true,
|
|
385
|
+
proxy: { server: proxyUrl }
|
|
386
|
+
});
|
|
387
|
+
var context = await browser.newContext({
|
|
388
|
+
viewport: { width: DEFAULT_VIEWPORT_WIDTH, height: DEFAULT_VIEWPORT_HEIGHT },
|
|
389
|
+
ignoreHTTPSErrors: true
|
|
390
|
+
// ScrapingBee proxy uses self-signed certs
|
|
391
|
+
});
|
|
392
|
+
var page = await context.newPage();
|
|
393
|
+
agentContexts.set(contextKey, { context: browser, page, lastUsed: Date.now() });
|
|
394
|
+
startIdleCleanup();
|
|
395
|
+
console.log(`[browser] ScrapingBee proxy browser ready for ${agentId}`);
|
|
396
|
+
return { page };
|
|
397
|
+
}
|
|
398
|
+
async function ensureBrowserFromConfig(cfg, agentId, modeOverride) {
|
|
399
|
+
var provider = cfg?.provider || "local";
|
|
400
|
+
if (modeOverride === "chrome" || modeOverride === "headed") {
|
|
401
|
+
return ensureBrowser(false, agentId, modeOverride === "chrome");
|
|
402
|
+
}
|
|
403
|
+
switch (provider) {
|
|
404
|
+
case "remote-cdp":
|
|
405
|
+
return connectRemoteCDP(cfg, agentId);
|
|
406
|
+
case "browserless":
|
|
407
|
+
return connectBrowserless(cfg, agentId);
|
|
408
|
+
case "browserbase":
|
|
409
|
+
return connectBrowserbase(cfg, agentId);
|
|
410
|
+
case "steel":
|
|
411
|
+
return connectSteel(cfg, agentId);
|
|
412
|
+
case "scrapingbee":
|
|
413
|
+
return connectScrapingBee(cfg, agentId);
|
|
414
|
+
case "local":
|
|
415
|
+
default:
|
|
416
|
+
return ensureBrowser(cfg?.headless !== false, agentId, false);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
function createBrowserTool(options) {
|
|
420
|
+
var browserConfig = options?.config?.browser;
|
|
421
|
+
if (browserConfig?.enabled === false) return null;
|
|
422
|
+
var headless = browserConfig?.headless !== false;
|
|
423
|
+
var timeoutMs = browserConfig?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
424
|
+
var agentId = options?.agentId || "default";
|
|
425
|
+
var sandboxed = options?.sandboxed ?? false;
|
|
426
|
+
var ssrfGuard = options?.ssrfGuard;
|
|
427
|
+
var providerConfig = browserConfig;
|
|
428
|
+
return {
|
|
429
|
+
name: "browser",
|
|
430
|
+
label: "Browser",
|
|
431
|
+
description: "Automate a browser for web interaction. Supports navigation, screenshots, clicking, typing, scrolling, and JavaScript evaluation. Requires Playwright.",
|
|
432
|
+
category: "browser",
|
|
433
|
+
risk: "critical",
|
|
434
|
+
parameters: {
|
|
435
|
+
type: "object",
|
|
436
|
+
properties: {
|
|
437
|
+
action: {
|
|
438
|
+
type: "string",
|
|
439
|
+
description: "Action to perform: navigate, screenshot, click, type, scroll, evaluate, content, close.",
|
|
440
|
+
enum: BROWSER_ACTIONS
|
|
441
|
+
},
|
|
442
|
+
url: { type: "string", description: "URL to navigate to (for navigate action)." },
|
|
443
|
+
selector: { type: "string", description: "CSS selector for click/type actions." },
|
|
444
|
+
text: { type: "string", description: "Text to type (for type action)." },
|
|
445
|
+
script: { type: "string", description: "JavaScript to evaluate in page context." },
|
|
446
|
+
direction: { type: "string", description: "Scroll direction: up or down." },
|
|
447
|
+
pixels: { type: "number", description: "Pixels to scroll (default 500)." },
|
|
448
|
+
headless: { type: "string", description: 'Browser mode: "true" (default) = headless Chromium. "false" = visible Chromium window (use for Google Meet, Google services, and anything that needs to persist login). Do NOT use "chrome" unless explicitly told to.' }
|
|
449
|
+
},
|
|
450
|
+
required: ["action"]
|
|
451
|
+
},
|
|
452
|
+
execute: async function(_toolCallId, args) {
|
|
453
|
+
var params = args;
|
|
454
|
+
var action = readStringParam(params, "action", { required: true });
|
|
455
|
+
var useChrome = params.headless === "chrome";
|
|
456
|
+
var useHeadless = useChrome ? false : params.headless === "false" || params.headless === false ? false : headless;
|
|
457
|
+
if (action === "close") {
|
|
458
|
+
for (var mode of ["headless", "headed"]) {
|
|
459
|
+
var key = `${agentId}:${mode}`;
|
|
460
|
+
var ctx = agentContexts.get(key);
|
|
461
|
+
if (ctx) {
|
|
462
|
+
ctx.context.close().catch(() => {
|
|
463
|
+
});
|
|
464
|
+
agentContexts.delete(key);
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
return textResult("Browser closed.");
|
|
468
|
+
}
|
|
469
|
+
try {
|
|
470
|
+
if (!useChrome && action !== "close") {
|
|
471
|
+
var chromeKey = `${agentId}:chrome`;
|
|
472
|
+
if (agentContexts.has(chromeKey) && browsers["chrome"]) {
|
|
473
|
+
useChrome = true;
|
|
474
|
+
useHeadless = false;
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
var modeOverride = useChrome ? "chrome" : !useHeadless ? "headed" : void 0;
|
|
478
|
+
var hasCloudProvider = providerConfig?.provider && providerConfig.provider !== "local";
|
|
479
|
+
var { page } = hasCloudProvider && !modeOverride ? await ensureBrowserFromConfig(providerConfig, agentId) : await ensureBrowser(useHeadless, agentId, useChrome);
|
|
480
|
+
switch (action) {
|
|
481
|
+
case "navigate": {
|
|
482
|
+
var url = readStringParam(params, "url", { required: true });
|
|
483
|
+
if (ssrfGuard && !url.startsWith("file://") && !url.startsWith("file:///")) {
|
|
484
|
+
try {
|
|
485
|
+
await ssrfGuard.validateUrl(url);
|
|
486
|
+
} catch (err) {
|
|
487
|
+
return errorResult("Navigation blocked: " + err.message);
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
if (/meet\.google\.com\/[a-z]/.test(url)) {
|
|
491
|
+
const tools = globalThis.__currentSessionTools;
|
|
492
|
+
const meetJoin = tools?.find((t) => t.name === "meeting_join");
|
|
493
|
+
if (meetJoin?.execute) {
|
|
494
|
+
console.log(`[browser] Intercepted Meet URL \u2014 redirecting to meeting_join: ${url}`);
|
|
495
|
+
return await meetJoin.execute("meeting_join", { url });
|
|
496
|
+
}
|
|
497
|
+
return errorResult(
|
|
498
|
+
'Cannot navigate to Google Meet URLs. Use meeting_join(url: "' + url + '") instead. Call request_tools(sets: ["meeting_lifecycle", "meeting_voice"]) first if meeting_join is not available.'
|
|
499
|
+
);
|
|
500
|
+
}
|
|
501
|
+
await page.goto(url, { timeout: timeoutMs, waitUntil: "domcontentloaded" });
|
|
502
|
+
var title = await page.title();
|
|
503
|
+
return jsonResult({ action: "navigate", url, title, status: "loaded" });
|
|
504
|
+
}
|
|
505
|
+
case "screenshot": {
|
|
506
|
+
var buf = await page.screenshot({ type: "png", fullPage: false });
|
|
507
|
+
var screenshotDir = path.join(os.tmpdir(), "agenticmail-screenshots");
|
|
508
|
+
await fs.mkdir(screenshotDir, { recursive: true });
|
|
509
|
+
var screenshotFile = `screenshot-${Date.now()}.png`;
|
|
510
|
+
var screenshotPath = path.join(screenshotDir, screenshotFile);
|
|
511
|
+
await fs.writeFile(screenshotPath, buf);
|
|
512
|
+
return {
|
|
513
|
+
content: [
|
|
514
|
+
{ type: "text", text: `Screenshot of: ${page.url()}
|
|
515
|
+
Saved to: ${screenshotPath}
|
|
516
|
+
Filename: ${screenshotFile}
|
|
517
|
+
Size: ${buf.length} bytes` },
|
|
518
|
+
{ type: "image", data: buf.toString("base64"), mimeType: "image/png" }
|
|
519
|
+
]
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
case "click": {
|
|
523
|
+
var selector = readStringParam(params, "selector", { required: true });
|
|
524
|
+
await page.click(selector, { timeout: timeoutMs });
|
|
525
|
+
return textResult("Clicked: " + selector);
|
|
526
|
+
}
|
|
527
|
+
case "type": {
|
|
528
|
+
var selector = readStringParam(params, "selector", { required: true });
|
|
529
|
+
var text = readStringParam(params, "text", { required: true });
|
|
530
|
+
await page.fill(selector, text, { timeout: timeoutMs });
|
|
531
|
+
return textResult("Typed into: " + selector);
|
|
532
|
+
}
|
|
533
|
+
case "scroll": {
|
|
534
|
+
var direction = readStringParam(params, "direction") || "down";
|
|
535
|
+
var pixels = readNumberParam(params, "pixels", { integer: true }) ?? 500;
|
|
536
|
+
var delta = direction === "up" ? -pixels : pixels;
|
|
537
|
+
await page.evaluate(function(d) {
|
|
538
|
+
window.scrollBy(0, d);
|
|
539
|
+
}, delta);
|
|
540
|
+
return textResult("Scrolled " + direction + " " + pixels + "px");
|
|
541
|
+
}
|
|
542
|
+
case "evaluate": {
|
|
543
|
+
if (sandboxed) {
|
|
544
|
+
return errorResult("JavaScript evaluation is disabled in sandboxed mode.");
|
|
545
|
+
}
|
|
546
|
+
var script = readStringParam(params, "script", { required: true });
|
|
547
|
+
var result = await page.evaluate(script);
|
|
548
|
+
return jsonResult({ action: "evaluate", result });
|
|
549
|
+
}
|
|
550
|
+
case "content": {
|
|
551
|
+
var html = await page.content();
|
|
552
|
+
var title = await page.title();
|
|
553
|
+
var textContent = html.replace(/<script[\s\S]*?<\/script>/gi, "").replace(/<style[\s\S]*?<\/style>/gi, "").replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
|
|
554
|
+
if (textContent.length > 5e4) textContent = textContent.slice(0, 5e4) + "... (truncated)";
|
|
555
|
+
return jsonResult({ action: "content", title, url: page.url(), text: textContent });
|
|
556
|
+
}
|
|
557
|
+
default:
|
|
558
|
+
return errorResult("Unknown browser action: " + action);
|
|
559
|
+
}
|
|
560
|
+
} catch (err) {
|
|
561
|
+
return errorResult("Browser error: " + (err.message || "unknown"));
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
};
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
// src/engine/meeting-monitor.ts
|
|
568
|
+
var MeetingMonitor = class {
|
|
569
|
+
page;
|
|
570
|
+
agentId;
|
|
571
|
+
sessionId;
|
|
572
|
+
sendMessage;
|
|
573
|
+
flushIntervalMs;
|
|
574
|
+
onMeetingEnd;
|
|
575
|
+
sendChatIndicator;
|
|
576
|
+
flushTimer = null;
|
|
577
|
+
running = false;
|
|
578
|
+
_lastCaptionCount = 0;
|
|
579
|
+
_lastChatCount = 0;
|
|
580
|
+
consecutiveEmpty = 0;
|
|
581
|
+
consecutiveSendFailures = 0;
|
|
582
|
+
/** Pending captions accumulated across flush cycles (for debouncing) */
|
|
583
|
+
pendingCaptions = [];
|
|
584
|
+
/** Pending chat messages (chat is NOT debounced — always flush immediately) */
|
|
585
|
+
pendingChat = [];
|
|
586
|
+
/** Timestamp of last new caption arrival — used for silence gap detection */
|
|
587
|
+
lastCaptionArrival = 0;
|
|
588
|
+
/** Minimum silence gap in ms before flushing captions (wait for speaker to finish) */
|
|
589
|
+
silenceGapMs = 2e3;
|
|
590
|
+
/** Minimum total caption text length to flush (skip filler fragments) */
|
|
591
|
+
minCaptionLength = 20;
|
|
592
|
+
constructor(config) {
|
|
593
|
+
this.page = config.page;
|
|
594
|
+
this.agentId = config.agentId;
|
|
595
|
+
this.sessionId = config.sessionId;
|
|
596
|
+
this.sendMessage = config.sendMessage;
|
|
597
|
+
this.flushIntervalMs = config.flushIntervalMs || 2e3;
|
|
598
|
+
this.onMeetingEnd = config.onMeetingEnd;
|
|
599
|
+
this.sendChatIndicator = config.sendChatIndicator;
|
|
600
|
+
}
|
|
601
|
+
/**
|
|
602
|
+
* Start monitoring. Injects DOM observers and begins flushing.
|
|
603
|
+
*/
|
|
604
|
+
async start() {
|
|
605
|
+
if (this.running) return;
|
|
606
|
+
this.running = true;
|
|
607
|
+
await this.injectObserver();
|
|
608
|
+
this.flushTimer = setInterval(() => this.flush().catch((err) => {
|
|
609
|
+
console.error(`[meeting-monitor:${this.agentId}] Flush error:`, err.message);
|
|
610
|
+
}), this.flushIntervalMs);
|
|
611
|
+
console.log(`[meeting-monitor:${this.agentId}] Started monitoring (flush every ${this.flushIntervalMs}ms)`);
|
|
612
|
+
}
|
|
613
|
+
/**
|
|
614
|
+
* Stop monitoring and clean up.
|
|
615
|
+
*/
|
|
616
|
+
stop() {
|
|
617
|
+
this.running = false;
|
|
618
|
+
if (this.flushTimer) {
|
|
619
|
+
clearInterval(this.flushTimer);
|
|
620
|
+
this.flushTimer = null;
|
|
621
|
+
}
|
|
622
|
+
try {
|
|
623
|
+
import("./meeting-voice-intelligence-RZZCAD6G.js").then(({ getActiveVoiceIntelligence }) => {
|
|
624
|
+
getActiveVoiceIntelligence(this.agentId)?.stopAll();
|
|
625
|
+
}).catch(() => {
|
|
626
|
+
});
|
|
627
|
+
} catch {
|
|
628
|
+
}
|
|
629
|
+
console.log(`[meeting-monitor:${this.agentId}] Stopped`);
|
|
630
|
+
}
|
|
631
|
+
/**
|
|
632
|
+
* Inject MutationObserver-based caption/chat capture into the page.
|
|
633
|
+
* Uses window.__meetingBuffer as the shared state.
|
|
634
|
+
*/
|
|
635
|
+
async injectObserver() {
|
|
636
|
+
await this.page.evaluate(() => {
|
|
637
|
+
window.__meetingBuffer = {
|
|
638
|
+
captions: [],
|
|
639
|
+
chat: [],
|
|
640
|
+
ended: false,
|
|
641
|
+
lastCaptionText: ""
|
|
642
|
+
};
|
|
643
|
+
const buf = window.__meetingBuffer;
|
|
644
|
+
const captionPoller = setInterval(() => {
|
|
645
|
+
if (!location.href.includes("meet.google.com")) {
|
|
646
|
+
buf.ended = true;
|
|
647
|
+
clearInterval(captionPoller);
|
|
648
|
+
return;
|
|
649
|
+
}
|
|
650
|
+
const bodyText = document.body.innerText || "";
|
|
651
|
+
if (bodyText.includes("You've left the meeting") || bodyText.includes("Call ended") || bodyText.includes("Return to home screen")) {
|
|
652
|
+
buf.ended = true;
|
|
653
|
+
clearInterval(captionPoller);
|
|
654
|
+
return;
|
|
655
|
+
}
|
|
656
|
+
const region = document.querySelector('[aria-label="Captions"]');
|
|
657
|
+
if (!region) return;
|
|
658
|
+
const entries = [];
|
|
659
|
+
const children = region.querySelectorAll(":scope > div");
|
|
660
|
+
for (const child of children) {
|
|
661
|
+
const divs = child.querySelectorAll(":scope > div");
|
|
662
|
+
if (divs.length >= 2) {
|
|
663
|
+
entries.push({
|
|
664
|
+
speaker: divs[0].textContent?.trim() || "",
|
|
665
|
+
text: divs[1].textContent?.trim() || ""
|
|
666
|
+
});
|
|
667
|
+
} else if (child.textContent?.trim()) {
|
|
668
|
+
entries.push({ speaker: "", text: child.textContent.trim() });
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
if (entries.length === 0) return;
|
|
672
|
+
const currentText = entries.map((e) => `${e.speaker}: ${e.text}`).join(" | ");
|
|
673
|
+
if (currentText === buf.lastCaptionText) return;
|
|
674
|
+
buf.lastCaptionText = currentText;
|
|
675
|
+
const ts = Date.now();
|
|
676
|
+
for (const e of entries) {
|
|
677
|
+
buf.captions.push({ speaker: e.speaker, text: e.text, ts });
|
|
678
|
+
}
|
|
679
|
+
}, 2e3);
|
|
680
|
+
const chatObserver = new MutationObserver(() => {
|
|
681
|
+
const panel = document.querySelector('[aria-label="Side panel"]') || document.querySelector('[aria-label*="In-call messages"]');
|
|
682
|
+
if (!panel) return;
|
|
683
|
+
const msgEls = panel.querySelectorAll("[data-message-text]");
|
|
684
|
+
const currentCount = msgEls.length;
|
|
685
|
+
const prevCount = window.__lastChatCount || 0;
|
|
686
|
+
if (currentCount > prevCount) {
|
|
687
|
+
for (let i = prevCount; i < currentCount; i++) {
|
|
688
|
+
const el = msgEls[i];
|
|
689
|
+
const text = el.getAttribute("data-message-text") || el.textContent?.trim() || "";
|
|
690
|
+
let sender = "";
|
|
691
|
+
const parentMsg = el.closest('[class*="message"]') || el.parentElement?.parentElement;
|
|
692
|
+
if (parentMsg) {
|
|
693
|
+
const nameEl = parentMsg.querySelector('[class*="sender"], [class*="name"], [data-sender-id]');
|
|
694
|
+
if (nameEl) sender = nameEl.textContent?.trim() || "";
|
|
695
|
+
}
|
|
696
|
+
if (text) {
|
|
697
|
+
buf.chat.push({ sender, text, ts: Date.now() });
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
window.__lastChatCount = currentCount;
|
|
701
|
+
}
|
|
702
|
+
});
|
|
703
|
+
chatObserver.observe(document.body, { childList: true, subtree: true });
|
|
704
|
+
let lastChatSnapshot = "";
|
|
705
|
+
setInterval(() => {
|
|
706
|
+
const panel = document.querySelector('[aria-label="Side panel"]') || document.querySelector('[aria-label*="In-call messages"]');
|
|
707
|
+
if (!panel) return;
|
|
708
|
+
const text = panel.innerText || "";
|
|
709
|
+
const lines = text.split("\n").filter(
|
|
710
|
+
(l) => l.trim().length > 0 && l.trim() !== "In-call messages" && !l.includes("Continuous chat") && !l.includes("Messages won't be saved") && !l.includes("No chat messages") && !l.includes("Send a message") && !l.includes("pin a message") && l.trim().length < 500
|
|
711
|
+
);
|
|
712
|
+
const snapshot = lines.join("\n");
|
|
713
|
+
if (snapshot !== lastChatSnapshot && snapshot.length > 0) {
|
|
714
|
+
lastChatSnapshot = snapshot;
|
|
715
|
+
}
|
|
716
|
+
}, 3e3);
|
|
717
|
+
});
|
|
718
|
+
}
|
|
719
|
+
/**
|
|
720
|
+
* Read buffered content from the page and decide whether to send to agent.
|
|
721
|
+
*
|
|
722
|
+
* KEY DESIGN: Captions are DEBOUNCED. We accumulate them and only flush when:
|
|
723
|
+
* 1. There's a silence gap (no new captions for 8+ seconds) — speaker finished talking
|
|
724
|
+
* 2. Accumulated text is substantial enough (30+ chars) — skip "um", "eh" fragments
|
|
725
|
+
* 3. Hard cap of 60 seconds — flush regardless to prevent infinite buffering
|
|
726
|
+
*
|
|
727
|
+
* Chat messages are NOT debounced — they flush immediately (someone typed a message).
|
|
728
|
+
*/
|
|
729
|
+
async flush() {
|
|
730
|
+
if (!this.running) return;
|
|
731
|
+
let bufferData;
|
|
732
|
+
try {
|
|
733
|
+
bufferData = await this.page.evaluate(() => {
|
|
734
|
+
const buf = window.__meetingBuffer;
|
|
735
|
+
if (!buf) return { captions: [], chat: [], ended: false };
|
|
736
|
+
const result = {
|
|
737
|
+
captions: [...buf.captions],
|
|
738
|
+
chat: [...buf.chat],
|
|
739
|
+
ended: buf.ended
|
|
740
|
+
};
|
|
741
|
+
buf.captions = [];
|
|
742
|
+
buf.chat = [];
|
|
743
|
+
return result;
|
|
744
|
+
});
|
|
745
|
+
} catch (err) {
|
|
746
|
+
console.log(`[meeting-monitor:${this.agentId}] Page closed, stopping monitor`);
|
|
747
|
+
this.stop();
|
|
748
|
+
this.onMeetingEnd?.();
|
|
749
|
+
return;
|
|
750
|
+
}
|
|
751
|
+
if (bufferData.ended) {
|
|
752
|
+
try {
|
|
753
|
+
const { getActiveVoiceIntelligence, removeVoiceIntelligence } = await import("./meeting-voice-intelligence-RZZCAD6G.js");
|
|
754
|
+
const voiceIntel = getActiveVoiceIntelligence(this.agentId);
|
|
755
|
+
if (voiceIntel) voiceIntel.stopAll();
|
|
756
|
+
removeVoiceIntelligence(this.agentId);
|
|
757
|
+
} catch {
|
|
758
|
+
}
|
|
759
|
+
if (this.pendingCaptions.length > 0) {
|
|
760
|
+
await this.sendUpdate(this.pendingCaptions, []);
|
|
761
|
+
this.pendingCaptions = [];
|
|
762
|
+
}
|
|
763
|
+
console.log(`[meeting-monitor:${this.agentId}] Meeting ended`);
|
|
764
|
+
try {
|
|
765
|
+
await this.sendMessage(
|
|
766
|
+
this.sessionId,
|
|
767
|
+
`[Meeting Monitor] The meeting has ended. Please save any meeting notes and email a summary to your manager if appropriate.`
|
|
768
|
+
);
|
|
769
|
+
} catch {
|
|
770
|
+
}
|
|
771
|
+
this.stop();
|
|
772
|
+
this.onMeetingEnd?.();
|
|
773
|
+
return;
|
|
774
|
+
}
|
|
775
|
+
if (bufferData.captions.length > 0) {
|
|
776
|
+
try {
|
|
777
|
+
const { getActiveVoiceIntelligence } = await import("./meeting-voice-intelligence-RZZCAD6G.js");
|
|
778
|
+
const voiceIntel = getActiveVoiceIntelligence(this.agentId);
|
|
779
|
+
if (voiceIntel?.shouldDiscardCaptions) {
|
|
780
|
+
this.pendingCaptions = [];
|
|
781
|
+
bufferData.captions = [];
|
|
782
|
+
return;
|
|
783
|
+
}
|
|
784
|
+
} catch {
|
|
785
|
+
}
|
|
786
|
+
}
|
|
787
|
+
if (bufferData.captions.length > 0) {
|
|
788
|
+
this.pendingCaptions.push(...bufferData.captions);
|
|
789
|
+
this.lastCaptionArrival = Date.now();
|
|
790
|
+
}
|
|
791
|
+
if (bufferData.chat.length > 0) {
|
|
792
|
+
this.pendingChat.push(...bufferData.chat);
|
|
793
|
+
}
|
|
794
|
+
const now = Date.now();
|
|
795
|
+
const timeSinceLastCaption = now - this.lastCaptionArrival;
|
|
796
|
+
const hasPendingCaptions = this.pendingCaptions.length > 0;
|
|
797
|
+
const hasPendingChat = this.pendingChat.length > 0;
|
|
798
|
+
if (hasPendingChat && !hasPendingCaptions) {
|
|
799
|
+
const chat = [...this.pendingChat];
|
|
800
|
+
this.pendingChat = [];
|
|
801
|
+
await this.sendUpdate([], chat);
|
|
802
|
+
return;
|
|
803
|
+
}
|
|
804
|
+
if (!hasPendingCaptions && !hasPendingChat) {
|
|
805
|
+
this.consecutiveEmpty++;
|
|
806
|
+
return;
|
|
807
|
+
}
|
|
808
|
+
const totalText = this.pendingCaptions.map((c) => c.text).join(" ");
|
|
809
|
+
const oldestCaption = this.pendingCaptions[0]?.timestamp || now;
|
|
810
|
+
const pendingAge = now - oldestCaption;
|
|
811
|
+
const silenceGapReached = timeSinceLastCaption >= this.silenceGapMs;
|
|
812
|
+
const hardCapReached = pendingAge >= 3e4;
|
|
813
|
+
const textSubstantial = totalText.length >= this.minCaptionLength;
|
|
814
|
+
if ((silenceGapReached || hardCapReached) && textSubstantial) {
|
|
815
|
+
const captions = [...this.pendingCaptions];
|
|
816
|
+
const chat = [...this.pendingChat];
|
|
817
|
+
this.pendingCaptions = [];
|
|
818
|
+
this.pendingChat = [];
|
|
819
|
+
this.consecutiveEmpty = 0;
|
|
820
|
+
await this.sendUpdate(captions, chat);
|
|
821
|
+
} else if (silenceGapReached && !textSubstantial) {
|
|
822
|
+
console.log(`[meeting-monitor:${this.agentId}] Discarding short caption fragment: "${totalText.slice(0, 50)}"`);
|
|
823
|
+
this.pendingCaptions = [];
|
|
824
|
+
}
|
|
825
|
+
}
|
|
826
|
+
/**
|
|
827
|
+
* Send accumulated captions and/or chat to the agent session.
|
|
828
|
+
*/
|
|
829
|
+
async sendUpdate(captions, chat) {
|
|
830
|
+
if (captions.length > 0) {
|
|
831
|
+
try {
|
|
832
|
+
const { getActiveVoiceIntelligence } = await import("./meeting-voice-intelligence-RZZCAD6G.js");
|
|
833
|
+
const voiceIntel = getActiveVoiceIntelligence(this.agentId);
|
|
834
|
+
if (voiceIntel?.isReady || voiceIntel?.isVoiceDegraded) {
|
|
835
|
+
voiceIntel.playHum().catch(() => {
|
|
836
|
+
});
|
|
837
|
+
} else if (this.sendChatIndicator) {
|
|
838
|
+
this.sendChatIndicator(this.page, "...").catch(() => {
|
|
839
|
+
});
|
|
840
|
+
}
|
|
841
|
+
} catch {
|
|
842
|
+
if (this.sendChatIndicator && captions.length > 0) {
|
|
843
|
+
this.sendChatIndicator(this.page, "...").catch(() => {
|
|
844
|
+
});
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
const parts = ["[Meeting Monitor \u2014 Live Update]"];
|
|
849
|
+
if (captions.length > 0) {
|
|
850
|
+
parts.push("\n--- CAPTIONS (what people are saying) ---");
|
|
851
|
+
const consolidated = this.consolidateCaptions(captions);
|
|
852
|
+
for (const c of consolidated) {
|
|
853
|
+
parts.push(c.speaker ? `${c.speaker}: ${c.text}` : c.text);
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
if (chat.length > 0) {
|
|
857
|
+
parts.push("\n--- CHAT MESSAGES ---");
|
|
858
|
+
for (const m of chat) {
|
|
859
|
+
parts.push(m.sender ? `${m.sender}: ${m.text}` : m.text);
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
parts.push("\n--- END UPDATE ---");
|
|
863
|
+
parts.push("If someone addressed you or asked a question, respond using meeting_speak to talk out loud. Otherwise, just note the content silently \u2014 do NOT respond to every caption update.");
|
|
864
|
+
try {
|
|
865
|
+
await this.sendMessage(this.sessionId, parts.join("\n"));
|
|
866
|
+
this.consecutiveSendFailures = 0;
|
|
867
|
+
console.log(`[meeting-monitor:${this.agentId}] Flushed ${captions.length} captions, ${chat.length} chat msgs`);
|
|
868
|
+
} catch (err) {
|
|
869
|
+
this.consecutiveSendFailures++;
|
|
870
|
+
console.error(`[meeting-monitor:${this.agentId}] Failed to send to session (${this.consecutiveSendFailures}/5): ${err.message}`);
|
|
871
|
+
if (this.consecutiveSendFailures >= 5) {
|
|
872
|
+
console.error(`[meeting-monitor:${this.agentId}] Too many send failures \u2014 stopping monitor`);
|
|
873
|
+
this.stop();
|
|
874
|
+
this.onMeetingEnd?.();
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
}
|
|
878
|
+
/**
|
|
879
|
+
* Consolidate consecutive captions from the same speaker.
|
|
880
|
+
*/
|
|
881
|
+
consolidateCaptions(captions) {
|
|
882
|
+
if (captions.length === 0) return [];
|
|
883
|
+
const result = [];
|
|
884
|
+
let current = { speaker: captions[0].speaker, text: captions[0].text };
|
|
885
|
+
for (let i = 1; i < captions.length; i++) {
|
|
886
|
+
const c = captions[i];
|
|
887
|
+
if (c.speaker === current.speaker) {
|
|
888
|
+
if (!current.text.includes(c.text)) {
|
|
889
|
+
current.text += " " + c.text;
|
|
890
|
+
}
|
|
891
|
+
} else {
|
|
892
|
+
result.push(current);
|
|
893
|
+
current = { speaker: c.speaker, text: c.text };
|
|
894
|
+
}
|
|
895
|
+
}
|
|
896
|
+
result.push(current);
|
|
897
|
+
return result;
|
|
898
|
+
}
|
|
899
|
+
};
|
|
900
|
+
var activeMonitors = /* @__PURE__ */ new Map();
|
|
901
|
+
function getActiveMonitor(agentId) {
|
|
902
|
+
return activeMonitors.get(agentId);
|
|
903
|
+
}
|
|
904
|
+
function registerMonitor(agentId, monitor) {
|
|
905
|
+
const existing = activeMonitors.get(agentId);
|
|
906
|
+
if (existing) existing.stop();
|
|
907
|
+
activeMonitors.set(agentId, monitor);
|
|
908
|
+
}
|
|
909
|
+
function removeMonitor(agentId) {
|
|
910
|
+
const existing = activeMonitors.get(agentId);
|
|
911
|
+
if (existing) {
|
|
912
|
+
existing.stop();
|
|
913
|
+
activeMonitors.delete(agentId);
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
// src/agent-tools/tools/google/meeting-voice.ts
|
|
918
|
+
import * as path2 from "path";
|
|
919
|
+
import * as os2 from "os";
|
|
920
|
+
import { promises as fs2 } from "fs";
|
|
921
|
+
var ELEVENLABS_BASE = "https://api.elevenlabs.io/v1";
|
|
922
|
+
var VoiceCapabilityManager = class _VoiceCapabilityManager {
|
|
923
|
+
statusByAgent = /* @__PURE__ */ new Map();
|
|
924
|
+
static instance;
|
|
925
|
+
_announcedChatFallback = /* @__PURE__ */ new Set();
|
|
926
|
+
static getInstance() {
|
|
927
|
+
if (!this.instance) this.instance = new _VoiceCapabilityManager();
|
|
928
|
+
return this.instance;
|
|
929
|
+
}
|
|
930
|
+
/** Full preflight check — call before/during meeting join */
|
|
931
|
+
async preflight(agentId, getApiKey, voiceId, voiceName, audioDevice) {
|
|
932
|
+
const setup = await checkAudioSetup();
|
|
933
|
+
const apiKey = await getApiKey();
|
|
934
|
+
const issues = [];
|
|
935
|
+
if (!apiKey) issues.push("No ElevenLabs API key");
|
|
936
|
+
if (!setup.hasBlackHole) issues.push("No virtual audio device");
|
|
937
|
+
if (!setup.hasSox) issues.push("sox not installed");
|
|
938
|
+
if (apiKey) {
|
|
939
|
+
try {
|
|
940
|
+
const res = await fetch(`${ELEVENLABS_BASE}/voices?page_size=1`, {
|
|
941
|
+
headers: { "xi-api-key": apiKey },
|
|
942
|
+
signal: AbortSignal.timeout(5e3)
|
|
943
|
+
});
|
|
944
|
+
if (!res.ok) {
|
|
945
|
+
const body = await res.text().catch(() => "");
|
|
946
|
+
issues.push(`ElevenLabs API error: ${res.status} \u2014 ${body.slice(0, 100)}`);
|
|
947
|
+
}
|
|
948
|
+
} catch (e) {
|
|
949
|
+
issues.push(`ElevenLabs unreachable: ${e.message}`);
|
|
950
|
+
}
|
|
951
|
+
}
|
|
952
|
+
const prev = this.statusByAgent.get(agentId);
|
|
953
|
+
const resolvedVoiceId = voiceId || DEFAULT_VOICES["rachel"];
|
|
954
|
+
const defaultDevice = setup.platform === "darwin" ? "BlackHole 2ch" : setup.platform === "win32" ? "CABLE Input (VB-Audio Virtual Cable)" : "virtual";
|
|
955
|
+
const status = {
|
|
956
|
+
available: issues.length === 0,
|
|
957
|
+
mode: issues.length === 0 ? "voice" : "chat-only",
|
|
958
|
+
hasApiKey: !!apiKey,
|
|
959
|
+
hasVirtualAudio: setup.hasBlackHole,
|
|
960
|
+
hasSox: setup.hasSox,
|
|
961
|
+
platform: setup.platform,
|
|
962
|
+
voiceName: voiceName || "rachel",
|
|
963
|
+
voiceId: resolvedVoiceId,
|
|
964
|
+
audioDevice: audioDevice || defaultDevice,
|
|
965
|
+
issues,
|
|
966
|
+
lastCheck: Date.now(),
|
|
967
|
+
lastSpeakSuccess: prev?.lastSpeakSuccess || null,
|
|
968
|
+
lastSpeakFailure: prev?.lastSpeakFailure || null,
|
|
969
|
+
consecutiveFailures: prev?.consecutiveFailures || 0,
|
|
970
|
+
degraded: false
|
|
971
|
+
};
|
|
972
|
+
this.statusByAgent.set(agentId, status);
|
|
973
|
+
return status;
|
|
974
|
+
}
|
|
975
|
+
/** Get cached status (or run preflight if stale / missing) */
|
|
976
|
+
async getStatus(agentId, getApiKey, voiceId, voiceName, audioDevice) {
|
|
977
|
+
const cached = this.statusByAgent.get(agentId);
|
|
978
|
+
if (cached && Date.now() - cached.lastCheck < 3e5) return cached;
|
|
979
|
+
return this.preflight(agentId, getApiKey, voiceId, voiceName, audioDevice);
|
|
980
|
+
}
|
|
981
|
+
/** Record a successful speak */
|
|
982
|
+
recordSuccess(agentId) {
|
|
983
|
+
const s = this.statusByAgent.get(agentId);
|
|
984
|
+
if (!s) return;
|
|
985
|
+
s.lastSpeakSuccess = Date.now();
|
|
986
|
+
s.consecutiveFailures = 0;
|
|
987
|
+
s.degraded = false;
|
|
988
|
+
s.mode = "voice";
|
|
989
|
+
}
|
|
990
|
+
/** Record a failed speak — auto-degrade after 3 consecutive failures */
|
|
991
|
+
recordFailure(agentId) {
|
|
992
|
+
const s = this.statusByAgent.get(agentId);
|
|
993
|
+
if (!s) return;
|
|
994
|
+
s.lastSpeakFailure = Date.now();
|
|
995
|
+
s.consecutiveFailures++;
|
|
996
|
+
if (s.consecutiveFailures >= 3) {
|
|
997
|
+
s.degraded = true;
|
|
998
|
+
s.mode = "chat-only";
|
|
999
|
+
console.warn(`[voice:${agentId}] Degraded to chat-only after ${s.consecutiveFailures} consecutive failures`);
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
/** Force re-check (e.g., after recovery attempt) */
|
|
1003
|
+
invalidate(agentId) {
|
|
1004
|
+
this.statusByAgent.delete(agentId);
|
|
1005
|
+
}
|
|
1006
|
+
/** Check if voice should be attempted (respects degradation) */
|
|
1007
|
+
shouldUseVoice(agentId) {
|
|
1008
|
+
const s = this.statusByAgent.get(agentId);
|
|
1009
|
+
if (!s) return false;
|
|
1010
|
+
if (s.mode === "chat-only") {
|
|
1011
|
+
if (s.degraded && s.lastSpeakFailure && Date.now() - s.lastSpeakFailure > 12e4) {
|
|
1012
|
+
console.log(`[voice:${agentId}] Attempting voice recovery...`);
|
|
1013
|
+
return true;
|
|
1014
|
+
}
|
|
1015
|
+
return false;
|
|
1016
|
+
}
|
|
1017
|
+
return s.available;
|
|
1018
|
+
}
|
|
1019
|
+
/** Build a system prompt block describing voice status */
|
|
1020
|
+
buildPromptBlock(agentId) {
|
|
1021
|
+
const s = this.statusByAgent.get(agentId);
|
|
1022
|
+
if (!s) return `
|
|
1023
|
+
## Voice Status
|
|
1024
|
+
Voice not checked yet. Use meeting_speak to talk \u2014 it will fall back to chat if voice is unavailable.
|
|
1025
|
+
`;
|
|
1026
|
+
if (s.available && !s.degraded) {
|
|
1027
|
+
return `
|
|
1028
|
+
## Voice: ENABLED
|
|
1029
|
+
You CAN speak in this meeting using your voice (${s.voiceName}).
|
|
1030
|
+
- Use meeting_speak(text: "...") to talk \u2014 participants will HEAR you
|
|
1031
|
+
- Use meeting_action(action: "chat", message: "...") for text chat
|
|
1032
|
+
- PREFER voice for important points, questions, and responses
|
|
1033
|
+
- Use chat for links, code snippets, or long lists
|
|
1034
|
+
- Keep spoken messages concise (1-3 sentences) for natural conversation
|
|
1035
|
+
- Wait for others to finish (check captions) before speaking`;
|
|
1036
|
+
}
|
|
1037
|
+
if (s.degraded) {
|
|
1038
|
+
return `
|
|
1039
|
+
## Voice: DEGRADED \u2014 Using Chat Fallback
|
|
1040
|
+
Voice was working but has failed ${s.consecutiveFailures} times. Automatically switched to chat.
|
|
1041
|
+
- Use meeting_action(action: "chat", message: "...") to communicate
|
|
1042
|
+
- meeting_speak will auto-retry voice periodically \u2014 if it works, voice is restored
|
|
1043
|
+
- Issues: ${s.issues.join(", ") || "transient playback failures"}`;
|
|
1044
|
+
}
|
|
1045
|
+
return `
|
|
1046
|
+
## Voice: UNAVAILABLE \u2014 Chat Only
|
|
1047
|
+
Voice is not available for this meeting. Communicate via chat only.
|
|
1048
|
+
- Use meeting_action(action: "chat", message: "...") for all communication
|
|
1049
|
+
- Issues: ${s.issues.join(", ")}
|
|
1050
|
+
- To enable voice: Dashboard \u2192 Settings \u2192 Integrations \u2192 ElevenLabs, then select a voice in agent profile`;
|
|
1051
|
+
}
|
|
1052
|
+
};
|
|
1053
|
+
var voiceCapability = VoiceCapabilityManager.getInstance();
|
|
1054
|
+
var DEFAULT_VOICES = {
|
|
1055
|
+
"rachel": "21m00Tcm4TlvDq8ikWAM",
|
|
1056
|
+
// Female, warm
|
|
1057
|
+
"drew": "29vD33N1CtxCmqQRPOHJ",
|
|
1058
|
+
// Male, confident
|
|
1059
|
+
"clyde": "2EiwWnXFnvU5JabPnv8n",
|
|
1060
|
+
// Male, deep
|
|
1061
|
+
"domi": "AZnzlk1XvdvUeBnXmlld",
|
|
1062
|
+
// Female, strong
|
|
1063
|
+
"dave": "CYw3kZ02Hs0563khs1Fj",
|
|
1064
|
+
// Male, conversational
|
|
1065
|
+
"fin": "D38z5RcWu1voky8WS1ja",
|
|
1066
|
+
// Male, British
|
|
1067
|
+
"sarah": "EXAVITQu4vr4xnSDxMaL",
|
|
1068
|
+
// Female, soft
|
|
1069
|
+
"antoni": "ErXwobaYiN019PkySvjV",
|
|
1070
|
+
// Male, well-rounded
|
|
1071
|
+
"elli": "MF3mGyEYCl7XYWbV9V6O",
|
|
1072
|
+
// Female, youthful
|
|
1073
|
+
"josh": "TxGEqnHWrfWFTfGW9XjX",
|
|
1074
|
+
// Male, deep, narrative
|
|
1075
|
+
"arnold": "VR6AewLTigWG4xSOukaG",
|
|
1076
|
+
// Male, crisp
|
|
1077
|
+
"adam": "pNInz6obpgDQGcFmaJgB",
|
|
1078
|
+
// Male, deep
|
|
1079
|
+
"sam": "yoZ06aMxZJJ28mfd3POQ"
|
|
1080
|
+
// Male, raspy
|
|
1081
|
+
};
|
|
1082
|
+
async function generateSpeech(apiKey, text, voiceId, options) {
|
|
1083
|
+
const res = await _fetchTTSStream(apiKey, text, voiceId, options);
|
|
1084
|
+
const chunks = [];
|
|
1085
|
+
const reader = res.body.getReader();
|
|
1086
|
+
while (true) {
|
|
1087
|
+
const { done, value } = await reader.read();
|
|
1088
|
+
if (done) break;
|
|
1089
|
+
chunks.push(value);
|
|
1090
|
+
}
|
|
1091
|
+
return Buffer.concat(chunks);
|
|
1092
|
+
}
|
|
1093
|
+
var activeResponsePlayers = /* @__PURE__ */ new Map();
|
|
1094
|
+
async function streamSpeechToDevice(apiKey, text, voiceId, device, options, agentId) {
|
|
1095
|
+
const { spawn } = await import("child_process");
|
|
1096
|
+
const platform = process.platform;
|
|
1097
|
+
const startTime = Date.now();
|
|
1098
|
+
let playerArgs;
|
|
1099
|
+
let playerCmd;
|
|
1100
|
+
if (platform === "darwin") {
|
|
1101
|
+
playerCmd = "sox";
|
|
1102
|
+
playerArgs = ["-t", "mp3", "-", "-t", "coreaudio", device];
|
|
1103
|
+
} else if (platform === "linux") {
|
|
1104
|
+
playerCmd = "paplay";
|
|
1105
|
+
playerArgs = device ? ["--device=" + device, "--raw"] : ["--raw"];
|
|
1106
|
+
} else if (platform === "win32") {
|
|
1107
|
+
playerCmd = "sox";
|
|
1108
|
+
playerArgs = ["-t", "mp3", "-", "-t", "waveaudio", device];
|
|
1109
|
+
} else {
|
|
1110
|
+
throw new Error(`Unsupported platform: ${platform}`);
|
|
1111
|
+
}
|
|
1112
|
+
const player = spawn(playerCmd, playerArgs, {
|
|
1113
|
+
stdio: ["pipe", "ignore", "pipe"],
|
|
1114
|
+
timeout: 6e4
|
|
1115
|
+
});
|
|
1116
|
+
if (agentId) {
|
|
1117
|
+
activeResponsePlayers.set(agentId, player);
|
|
1118
|
+
globalThis.__activeResponsePlayers = activeResponsePlayers;
|
|
1119
|
+
player.on("close", () => {
|
|
1120
|
+
activeResponsePlayers.delete(agentId);
|
|
1121
|
+
});
|
|
1122
|
+
}
|
|
1123
|
+
const fetchStart = Date.now();
|
|
1124
|
+
const res = await _fetchTTSStream(apiKey, text, voiceId, options);
|
|
1125
|
+
console.log(`[voice] TTS API responded in ${Date.now() - fetchStart}ms`);
|
|
1126
|
+
const reader = res.body.getReader();
|
|
1127
|
+
let totalBytes = 0;
|
|
1128
|
+
let firstChunk = true;
|
|
1129
|
+
player.stdin.on("error", () => {
|
|
1130
|
+
});
|
|
1131
|
+
try {
|
|
1132
|
+
while (true) {
|
|
1133
|
+
const { done, value } = await reader.read();
|
|
1134
|
+
if (done) break;
|
|
1135
|
+
if (firstChunk) {
|
|
1136
|
+
console.log(`[voice] First audio chunk in ${Date.now() - fetchStart}ms (${value.length} bytes)`);
|
|
1137
|
+
firstChunk = false;
|
|
1138
|
+
}
|
|
1139
|
+
totalBytes += value.length;
|
|
1140
|
+
try {
|
|
1141
|
+
const canWrite = player.stdin.write(Buffer.from(value));
|
|
1142
|
+
if (!canWrite) {
|
|
1143
|
+
await new Promise((resolve) => player.stdin.once("drain", resolve));
|
|
1144
|
+
}
|
|
1145
|
+
} catch {
|
|
1146
|
+
break;
|
|
1147
|
+
}
|
|
1148
|
+
}
|
|
1149
|
+
try {
|
|
1150
|
+
player.stdin.end();
|
|
1151
|
+
} catch {
|
|
1152
|
+
}
|
|
1153
|
+
} catch (e) {
|
|
1154
|
+
try {
|
|
1155
|
+
player.kill();
|
|
1156
|
+
} catch {
|
|
1157
|
+
}
|
|
1158
|
+
throw new Error(`Stream pipe failed: ${e.message}`);
|
|
1159
|
+
}
|
|
1160
|
+
await new Promise((resolve, reject) => {
|
|
1161
|
+
const timeout = setTimeout(() => {
|
|
1162
|
+
player.kill();
|
|
1163
|
+
reject(new Error("Playback timed out"));
|
|
1164
|
+
}, 6e4);
|
|
1165
|
+
player.on("close", (code) => {
|
|
1166
|
+
clearTimeout(timeout);
|
|
1167
|
+
if (code === 0 || code === null) resolve();
|
|
1168
|
+
else reject(new Error(`Player exited with code ${code}`));
|
|
1169
|
+
});
|
|
1170
|
+
player.on("error", (err) => {
|
|
1171
|
+
clearTimeout(timeout);
|
|
1172
|
+
reject(err);
|
|
1173
|
+
});
|
|
1174
|
+
});
|
|
1175
|
+
return { audioSize: totalBytes, durationMs: Date.now() - startTime };
|
|
1176
|
+
}
|
|
1177
|
+
async function _fetchTTSStream(apiKey, text, voiceId, options) {
|
|
1178
|
+
const model = options?.model || "eleven_turbo_v2_5";
|
|
1179
|
+
const url = `${ELEVENLABS_BASE}/text-to-speech/${voiceId}/stream`;
|
|
1180
|
+
const res = await fetch(url, {
|
|
1181
|
+
method: "POST",
|
|
1182
|
+
headers: {
|
|
1183
|
+
"xi-api-key": apiKey,
|
|
1184
|
+
"Content-Type": "application/json",
|
|
1185
|
+
"Accept": "audio/mpeg"
|
|
1186
|
+
},
|
|
1187
|
+
body: JSON.stringify({
|
|
1188
|
+
text,
|
|
1189
|
+
model_id: model,
|
|
1190
|
+
voice_settings: {
|
|
1191
|
+
stability: options?.stability ?? 0.5,
|
|
1192
|
+
similarity_boost: options?.similarity ?? 0.75,
|
|
1193
|
+
style: 0,
|
|
1194
|
+
use_speaker_boost: true
|
|
1195
|
+
},
|
|
1196
|
+
output_format: "mp3_22050_32",
|
|
1197
|
+
// Lower bitrate = smaller chunks = faster first-byte
|
|
1198
|
+
optimize_streaming_latency: 4
|
|
1199
|
+
// Maximum latency optimization (may reduce quality slightly)
|
|
1200
|
+
})
|
|
1201
|
+
});
|
|
1202
|
+
if (!res.ok) {
|
|
1203
|
+
const err = await res.text().catch(() => "");
|
|
1204
|
+
throw new Error(`ElevenLabs API ${res.status}: ${err}`);
|
|
1205
|
+
}
|
|
1206
|
+
return res;
|
|
1207
|
+
}
|
|
1208
|
+
async function playAudioToDevice(audioPath, device) {
|
|
1209
|
+
const { exec: execCb } = await import("child_process");
|
|
1210
|
+
const { promisify } = await import("util");
|
|
1211
|
+
const exec = promisify(execCb);
|
|
1212
|
+
if (process.platform === "darwin") {
|
|
1213
|
+
if (device) {
|
|
1214
|
+
try {
|
|
1215
|
+
await exec(`sox "${audioPath}" -t coreaudio "${device}"`, { timeout: 3e4 });
|
|
1216
|
+
return;
|
|
1217
|
+
} catch {
|
|
1218
|
+
console.warn(`[meeting-voice] sox failed for device "${device}", falling back to afplay`);
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
await exec(`afplay "${audioPath}"`, { timeout: 3e4 });
|
|
1222
|
+
} else if (process.platform === "linux") {
|
|
1223
|
+
if (device) {
|
|
1224
|
+
try {
|
|
1225
|
+
await exec(`paplay --device="${device}" "${audioPath}"`, { timeout: 3e4 });
|
|
1226
|
+
} catch {
|
|
1227
|
+
await exec(`aplay -D "${device}" "${audioPath}"`, { timeout: 3e4 });
|
|
1228
|
+
}
|
|
1229
|
+
} else {
|
|
1230
|
+
try {
|
|
1231
|
+
await exec(`paplay "${audioPath}"`, { timeout: 3e4 });
|
|
1232
|
+
} catch {
|
|
1233
|
+
await exec(`aplay "${audioPath}"`, { timeout: 3e4 });
|
|
1234
|
+
}
|
|
1235
|
+
}
|
|
1236
|
+
} else if (process.platform === "win32") {
|
|
1237
|
+
if (device) {
|
|
1238
|
+
try {
|
|
1239
|
+
await exec(`sox "${audioPath}" -t waveaudio "${device}"`, { timeout: 3e4 });
|
|
1240
|
+
return;
|
|
1241
|
+
} catch {
|
|
1242
|
+
console.warn(`[meeting-voice] sox device routing failed on Windows, using default playback`);
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1245
|
+
try {
|
|
1246
|
+
await exec(`powershell -Command "(New-Object System.Media.SoundPlayer '${audioPath}').PlaySync()"`, { timeout: 3e4 });
|
|
1247
|
+
} catch {
|
|
1248
|
+
await exec(`ffplay -nodisp -autoexit "${audioPath}"`, { timeout: 3e4 });
|
|
1249
|
+
}
|
|
1250
|
+
} else {
|
|
1251
|
+
throw new Error(`Unsupported platform for audio playback: ${process.platform}`);
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
async function checkAudioSetup() {
|
|
1255
|
+
const { exec: execCb } = await import("child_process");
|
|
1256
|
+
const { promisify } = await import("util");
|
|
1257
|
+
const exec = promisify(execCb);
|
|
1258
|
+
const platform = process.platform;
|
|
1259
|
+
let hasBlackHole = false;
|
|
1260
|
+
let hasSox = false;
|
|
1261
|
+
let devices = [];
|
|
1262
|
+
if (platform === "darwin") {
|
|
1263
|
+
try {
|
|
1264
|
+
const { stdout } = await exec("system_profiler SPAudioDataType 2>/dev/null");
|
|
1265
|
+
if (stdout.includes("BlackHole")) hasBlackHole = true;
|
|
1266
|
+
const lines = stdout.split("\n");
|
|
1267
|
+
for (const line of lines) {
|
|
1268
|
+
const match = line.match(/^\s+(BlackHole|Built-in|External|USB|Aggregate|DELL|Mac mini)/);
|
|
1269
|
+
if (match) devices.push(line.trim());
|
|
1270
|
+
}
|
|
1271
|
+
} catch {
|
|
1272
|
+
}
|
|
1273
|
+
if (!hasBlackHole) {
|
|
1274
|
+
try {
|
|
1275
|
+
const { existsSync: existsSync2 } = await import("fs");
|
|
1276
|
+
if (existsSync2("/Library/Audio/Plug-Ins/HAL/BlackHole2ch.driver")) {
|
|
1277
|
+
hasBlackHole = true;
|
|
1278
|
+
devices.push("BlackHole 2ch (driver installed, may need coreaudiod restart)");
|
|
1279
|
+
}
|
|
1280
|
+
} catch {
|
|
1281
|
+
}
|
|
1282
|
+
}
|
|
1283
|
+
if (!hasBlackHole) {
|
|
1284
|
+
try {
|
|
1285
|
+
const { stdout } = await exec("SwitchAudioSource -a -t output 2>/dev/null");
|
|
1286
|
+
if (stdout.includes("BlackHole")) {
|
|
1287
|
+
hasBlackHole = true;
|
|
1288
|
+
devices.push("BlackHole 2ch");
|
|
1289
|
+
}
|
|
1290
|
+
} catch {
|
|
1291
|
+
}
|
|
1292
|
+
}
|
|
1293
|
+
if (!hasBlackHole) {
|
|
1294
|
+
try {
|
|
1295
|
+
const { existsSync: existsSync2 } = await import("fs");
|
|
1296
|
+
if (existsSync2("/Library/Audio/Plug-Ins/HAL/BlackHole2ch.driver")) {
|
|
1297
|
+
console.log("[audio] BlackHole driver installed but not loaded \u2014 attempting coreaudiod restart...");
|
|
1298
|
+
try {
|
|
1299
|
+
await exec("sudo launchctl kickstart -kp system/com.apple.audio.coreaudiod 2>/dev/null", { timeout: 1e4 });
|
|
1300
|
+
await new Promise((r) => setTimeout(r, 3e3));
|
|
1301
|
+
try {
|
|
1302
|
+
const { stdout } = await exec("SwitchAudioSource -a -t output 2>/dev/null");
|
|
1303
|
+
if (stdout.includes("BlackHole")) {
|
|
1304
|
+
hasBlackHole = true;
|
|
1305
|
+
devices.push("BlackHole 2ch (loaded after coreaudiod restart)");
|
|
1306
|
+
console.log("[audio] \u2705 BlackHole now available after coreaudiod restart");
|
|
1307
|
+
}
|
|
1308
|
+
} catch {
|
|
1309
|
+
}
|
|
1310
|
+
} catch {
|
|
1311
|
+
console.warn("[audio] BlackHole driver exists but coreaudiod restart needs sudo. Run: sudo launchctl kickstart -kp system/com.apple.audio.coreaudiod");
|
|
1312
|
+
}
|
|
1313
|
+
}
|
|
1314
|
+
} catch {
|
|
1315
|
+
}
|
|
1316
|
+
}
|
|
1317
|
+
try {
|
|
1318
|
+
await exec("which sox");
|
|
1319
|
+
hasSox = true;
|
|
1320
|
+
} catch {
|
|
1321
|
+
}
|
|
1322
|
+
} else if (platform === "linux") {
|
|
1323
|
+
try {
|
|
1324
|
+
const { stdout } = await exec("pactl list short sinks 2>/dev/null");
|
|
1325
|
+
devices = stdout.split("\n").filter(Boolean);
|
|
1326
|
+
hasBlackHole = devices.some((d) => d.includes("virtual") || d.includes("null") || d.includes("pipewire"));
|
|
1327
|
+
} catch {
|
|
1328
|
+
}
|
|
1329
|
+
try {
|
|
1330
|
+
await exec("which sox");
|
|
1331
|
+
hasSox = true;
|
|
1332
|
+
} catch {
|
|
1333
|
+
}
|
|
1334
|
+
} else if (platform === "win32") {
|
|
1335
|
+
try {
|
|
1336
|
+
const { stdout } = await exec('powershell -Command "Get-AudioDevice -List 2>$null | Select-Object -ExpandProperty Name"', { timeout: 1e4 });
|
|
1337
|
+
devices = stdout.split("\n").map((d) => d.trim()).filter(Boolean);
|
|
1338
|
+
hasBlackHole = devices.some((d) => /cable|virtual|vb-audio/i.test(d));
|
|
1339
|
+
} catch {
|
|
1340
|
+
try {
|
|
1341
|
+
const { stdout } = await exec("sox --help 2>&1");
|
|
1342
|
+
if (stdout.includes("waveaudio")) hasSox = true;
|
|
1343
|
+
} catch {
|
|
1344
|
+
}
|
|
1345
|
+
}
|
|
1346
|
+
try {
|
|
1347
|
+
await exec("where sox");
|
|
1348
|
+
hasSox = true;
|
|
1349
|
+
} catch {
|
|
1350
|
+
}
|
|
1351
|
+
}
|
|
1352
|
+
return { hasBlackHole, hasSox, devices, platform };
|
|
1353
|
+
}
|
|
1354
|
+
function createMeetingVoiceTools(config, _options) {
|
|
1355
|
+
const agentId = _options?.agentId || "default";
|
|
1356
|
+
const getApiKey = async () => {
|
|
1357
|
+
if (config.elevenLabsApiKey) return config.elevenLabsApiKey;
|
|
1358
|
+
if (config.elevenLabsKeyResolver) {
|
|
1359
|
+
try {
|
|
1360
|
+
return await config.elevenLabsKeyResolver();
|
|
1361
|
+
} catch {
|
|
1362
|
+
return null;
|
|
1363
|
+
}
|
|
1364
|
+
}
|
|
1365
|
+
return process.env.ELEVENLABS_API_KEY || null;
|
|
1366
|
+
};
|
|
1367
|
+
return [
|
|
1368
|
+
// ─── Speak in Meeting ──────────────────────────────
|
|
1369
|
+
{
|
|
1370
|
+
name: "meeting_speak",
|
|
1371
|
+
description: `Speak in a meeting using your voice. Participants will HEAR you through the virtual microphone. Audio streams in real-time (near-zero latency). Auto-falls back to meeting chat if voice fails.
|
|
1372
|
+
|
|
1373
|
+
IMPORTANT: When meeting_speak succeeds (status: "spoken"), DO NOT also send the same message via chat. That would duplicate your message. Only use meeting chat for links, code, or data that's better as text.
|
|
1374
|
+
|
|
1375
|
+
Tips:
|
|
1376
|
+
- Keep messages SHORT: 1-2 sentences per turn, like a real conversation
|
|
1377
|
+
- Wait for others to finish (check captions) before speaking
|
|
1378
|
+
- For long content, break it into multiple short meeting_speak calls`,
|
|
1379
|
+
category: "utility",
|
|
1380
|
+
parameters: {
|
|
1381
|
+
type: "object",
|
|
1382
|
+
properties: {
|
|
1383
|
+
text: { type: "string", description: "Text to speak in the meeting" },
|
|
1384
|
+
voice: { type: "string", description: "Voice name or ElevenLabs voice ID. Built-in voices: rachel (female, warm), drew (male, confident), sarah (female, soft), josh (male, deep), adam (male, deep), sam (male, raspy). Default: agent's configured voice." },
|
|
1385
|
+
model: { type: "string", description: 'ElevenLabs model: "eleven_turbo_v2_5" (fastest, default), "eleven_multilingual_v2" (best quality, supports 29 languages)' }
|
|
1386
|
+
},
|
|
1387
|
+
required: ["text"]
|
|
1388
|
+
},
|
|
1389
|
+
async execute(_id, params) {
|
|
1390
|
+
try {
|
|
1391
|
+
const text = params.text;
|
|
1392
|
+
if (!text || text.trim().length === 0) {
|
|
1393
|
+
return errorResult("No text to speak.");
|
|
1394
|
+
}
|
|
1395
|
+
const vcm = voiceCapability;
|
|
1396
|
+
const status = await vcm.getStatus(agentId, getApiKey, config.voiceId, config.voiceName, config.audioDevice);
|
|
1397
|
+
const fallbackToChat = async (reason) => {
|
|
1398
|
+
console.log(`[voice:${agentId}] Falling back to chat: ${reason}`);
|
|
1399
|
+
vcm.recordFailure(agentId);
|
|
1400
|
+
try {
|
|
1401
|
+
const { getActiveVoiceIntelligence } = await import("./meeting-voice-intelligence-RZZCAD6G.js");
|
|
1402
|
+
const voiceIntel = getActiveVoiceIntelligence(agentId);
|
|
1403
|
+
if (voiceIntel) {
|
|
1404
|
+
voiceIntel.stopAll();
|
|
1405
|
+
voiceIntel.shutdown();
|
|
1406
|
+
}
|
|
1407
|
+
} catch {
|
|
1408
|
+
}
|
|
1409
|
+
try {
|
|
1410
|
+
const { ensureBrowser: ensureBrowser3, sendChatMessage: sendChatMessage2 } = await import("./meetings-TL77WLKW.js");
|
|
1411
|
+
const { page } = await ensureBrowser3(false, agentId, false);
|
|
1412
|
+
if (!vcm._announcedChatFallback?.has(agentId)) {
|
|
1413
|
+
if (!vcm._announcedChatFallback) vcm._announcedChatFallback = /* @__PURE__ */ new Set();
|
|
1414
|
+
vcm._announcedChatFallback.add(agentId);
|
|
1415
|
+
await sendChatMessage2(page, `Sorry, something is wrong with my microphone so I'll be responding via chat for now!`);
|
|
1416
|
+
}
|
|
1417
|
+
const chatResult = await sendChatMessage2(page, text);
|
|
1418
|
+
if (chatResult.sent) {
|
|
1419
|
+
return jsonResult({
|
|
1420
|
+
action: "meeting_speak",
|
|
1421
|
+
status: "sent_as_chat",
|
|
1422
|
+
method: "chat_fallback",
|
|
1423
|
+
text,
|
|
1424
|
+
reason,
|
|
1425
|
+
chatMethod: chatResult.method,
|
|
1426
|
+
note: "Voice unavailable \u2014 message sent as meeting chat instead."
|
|
1427
|
+
});
|
|
1428
|
+
}
|
|
1429
|
+
} catch (chatErr) {
|
|
1430
|
+
console.error(`[voice:${agentId}] Chat fallback also failed: ${chatErr.message}`);
|
|
1431
|
+
}
|
|
1432
|
+
return jsonResult({
|
|
1433
|
+
action: "meeting_speak",
|
|
1434
|
+
status: "failed",
|
|
1435
|
+
text,
|
|
1436
|
+
reason,
|
|
1437
|
+
hint: "Both voice and chat failed. Check meeting connection and audio setup."
|
|
1438
|
+
});
|
|
1439
|
+
};
|
|
1440
|
+
if (!vcm.shouldUseVoice(agentId)) {
|
|
1441
|
+
return fallbackToChat(status.degraded ? `Voice degraded (${status.consecutiveFailures} consecutive failures)` : `Voice unavailable: ${status.issues.join(", ")}`);
|
|
1442
|
+
}
|
|
1443
|
+
const apiKey = await getApiKey();
|
|
1444
|
+
if (!apiKey) {
|
|
1445
|
+
return fallbackToChat("No ElevenLabs API key");
|
|
1446
|
+
}
|
|
1447
|
+
let voiceId = config.voiceId || DEFAULT_VOICES["rachel"];
|
|
1448
|
+
if (params.voice) {
|
|
1449
|
+
const lower = params.voice.toLowerCase();
|
|
1450
|
+
if (DEFAULT_VOICES[lower]) {
|
|
1451
|
+
voiceId = DEFAULT_VOICES[lower];
|
|
1452
|
+
} else if (params.voice.length > 10) {
|
|
1453
|
+
voiceId = params.voice;
|
|
1454
|
+
}
|
|
1455
|
+
}
|
|
1456
|
+
try {
|
|
1457
|
+
const { getActiveVoiceIntelligence } = await import("./meeting-voice-intelligence-RZZCAD6G.js");
|
|
1458
|
+
const voiceIntel = getActiveVoiceIntelligence(agentId);
|
|
1459
|
+
if (voiceIntel) {
|
|
1460
|
+
voiceIntel.stopAll();
|
|
1461
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
1462
|
+
voiceIntel.markSpeaking(text);
|
|
1463
|
+
}
|
|
1464
|
+
} catch {
|
|
1465
|
+
}
|
|
1466
|
+
const device = config.audioDevice || status.audioDevice || "BlackHole 2ch";
|
|
1467
|
+
try {
|
|
1468
|
+
const result = await streamSpeechToDevice(apiKey, text, voiceId, device, {
|
|
1469
|
+
model: params.model
|
|
1470
|
+
}, agentId);
|
|
1471
|
+
vcm.recordSuccess(agentId);
|
|
1472
|
+
try {
|
|
1473
|
+
const { getActiveVoiceIntelligence: getVI } = await import("./meeting-voice-intelligence-RZZCAD6G.js");
|
|
1474
|
+
getVI(agentId)?.markDoneSpeaking();
|
|
1475
|
+
} catch {
|
|
1476
|
+
}
|
|
1477
|
+
return jsonResult({
|
|
1478
|
+
action: "meeting_speak",
|
|
1479
|
+
status: "spoken",
|
|
1480
|
+
method: "voice",
|
|
1481
|
+
text,
|
|
1482
|
+
voiceId,
|
|
1483
|
+
voiceName: config.voiceName || Object.entries(DEFAULT_VOICES).find(([, id]) => id === voiceId)?.[0] || "custom",
|
|
1484
|
+
audioSize: result.audioSize,
|
|
1485
|
+
durationMs: result.durationMs,
|
|
1486
|
+
streaming: true
|
|
1487
|
+
});
|
|
1488
|
+
} catch (streamErr) {
|
|
1489
|
+
console.warn(`[voice:${agentId}] Streaming failed (${streamErr.message}), trying file-based playback...`);
|
|
1490
|
+
try {
|
|
1491
|
+
const audioBuffer = await generateSpeech(apiKey, text, voiceId, { model: params.model });
|
|
1492
|
+
const audioDir = path2.join(os2.tmpdir(), "agenticmail-voice");
|
|
1493
|
+
await fs2.mkdir(audioDir, { recursive: true });
|
|
1494
|
+
const audioFile = path2.join(audioDir, `speak-${Date.now()}.mp3`);
|
|
1495
|
+
await fs2.writeFile(audioFile, audioBuffer);
|
|
1496
|
+
await playAudioToDevice(audioFile, device);
|
|
1497
|
+
vcm.recordSuccess(agentId);
|
|
1498
|
+
return jsonResult({
|
|
1499
|
+
action: "meeting_speak",
|
|
1500
|
+
status: "spoken",
|
|
1501
|
+
method: "voice",
|
|
1502
|
+
text,
|
|
1503
|
+
voiceId,
|
|
1504
|
+
voiceName: config.voiceName || Object.entries(DEFAULT_VOICES).find(([, id]) => id === voiceId)?.[0] || "custom",
|
|
1505
|
+
audioFile,
|
|
1506
|
+
audioSize: audioBuffer.length,
|
|
1507
|
+
streaming: false
|
|
1508
|
+
});
|
|
1509
|
+
} catch (fileErr) {
|
|
1510
|
+
try {
|
|
1511
|
+
const { getActiveVoiceIntelligence: getVI } = await import("./meeting-voice-intelligence-RZZCAD6G.js");
|
|
1512
|
+
getVI(agentId)?.markDoneSpeaking();
|
|
1513
|
+
} catch {
|
|
1514
|
+
}
|
|
1515
|
+
return fallbackToChat(`Voice failed: ${streamErr.message}, file fallback: ${fileErr.message}`);
|
|
1516
|
+
}
|
|
1517
|
+
}
|
|
1518
|
+
} catch (e) {
|
|
1519
|
+
return errorResult(e.message);
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
},
|
|
1523
|
+
// ─── Check Audio Setup ─────────────────────────────
|
|
1524
|
+
{
|
|
1525
|
+
name: "meeting_audio_setup",
|
|
1526
|
+
description: "Check if the machine has the required audio setup for meeting voice (virtual audio device, sox, etc.). Run this before using meeting_speak to verify the setup.",
|
|
1527
|
+
category: "utility",
|
|
1528
|
+
parameters: {
|
|
1529
|
+
type: "object",
|
|
1530
|
+
properties: {},
|
|
1531
|
+
required: []
|
|
1532
|
+
},
|
|
1533
|
+
async execute(_id, _params) {
|
|
1534
|
+
try {
|
|
1535
|
+
const setup = await checkAudioSetup();
|
|
1536
|
+
const apiKey = await getApiKey();
|
|
1537
|
+
const issues = [];
|
|
1538
|
+
if (!apiKey) issues.push('ElevenLabs API key not configured. Add it in Dashboard \u2192 Settings \u2192 Integrations (key name: "elevenlabs"), or set ELEVENLABS_API_KEY env var.');
|
|
1539
|
+
if (!setup.hasBlackHole) {
|
|
1540
|
+
if (setup.platform === "darwin") issues.push("BlackHole virtual audio not found (install: brew install blackhole-2ch)");
|
|
1541
|
+
else if (setup.platform === "linux") issues.push("No virtual audio sink found (create one: pactl load-module module-null-sink sink_name=virtual)");
|
|
1542
|
+
else if (setup.platform === "win32") issues.push("No virtual audio cable found (install VB-CABLE from https://vb-audio.com/Cable/ or choco install vb-cable)");
|
|
1543
|
+
}
|
|
1544
|
+
if (!setup.hasSox) {
|
|
1545
|
+
if (setup.platform === "darwin") issues.push("sox not found (install: brew install sox)");
|
|
1546
|
+
else if (setup.platform === "linux") issues.push("sox not found (install: sudo apt install sox)");
|
|
1547
|
+
else if (setup.platform === "win32") issues.push("sox not found (install: choco install sox.portable or winget install sox.sox)");
|
|
1548
|
+
}
|
|
1549
|
+
const defaultDevice = setup.platform === "darwin" ? "BlackHole 2ch" : setup.platform === "win32" ? "CABLE Input (VB-Audio Virtual Cable)" : "virtual";
|
|
1550
|
+
return jsonResult({
|
|
1551
|
+
action: "meeting_audio_setup",
|
|
1552
|
+
ready: issues.length === 0,
|
|
1553
|
+
issues,
|
|
1554
|
+
platform: setup.platform,
|
|
1555
|
+
hasElevenLabsKey: !!apiKey,
|
|
1556
|
+
hasVirtualAudio: setup.hasBlackHole,
|
|
1557
|
+
hasSox: setup.hasSox,
|
|
1558
|
+
audioDevices: setup.devices,
|
|
1559
|
+
configuredVoice: config.voiceName || config.voiceId || "rachel (default)",
|
|
1560
|
+
configuredDevice: config.audioDevice || `${defaultDevice} (default)`,
|
|
1561
|
+
availableVoices: Object.keys(DEFAULT_VOICES),
|
|
1562
|
+
setupInstructions: issues.length > 0 ? [
|
|
1563
|
+
"macOS: brew install blackhole-2ch sox",
|
|
1564
|
+
"Linux: sudo apt install pulseaudio-utils sox && pactl load-module module-null-sink sink_name=virtual",
|
|
1565
|
+
"Windows: choco install sox.portable vb-cable (or winget install sox.sox + VB-CABLE from vb-audio.com)",
|
|
1566
|
+
"Then add ElevenLabs API key in Dashboard \u2192 Settings \u2192 Integrations",
|
|
1567
|
+
"Optionally select a voice in agent profile \u2192 Personal Details \u2192 Voice"
|
|
1568
|
+
] : ["All good \u2014 meeting voice is ready to use."]
|
|
1569
|
+
});
|
|
1570
|
+
} catch (e) {
|
|
1571
|
+
return errorResult(e.message);
|
|
1572
|
+
}
|
|
1573
|
+
}
|
|
1574
|
+
},
|
|
1575
|
+
// ─── List Available Voices ──────────────────────────
|
|
1576
|
+
{
|
|
1577
|
+
name: "meeting_voices",
|
|
1578
|
+
description: "List available ElevenLabs voices for meeting speech. Shows built-in voices and optionally fetches your custom voices from ElevenLabs.",
|
|
1579
|
+
category: "utility",
|
|
1580
|
+
parameters: {
|
|
1581
|
+
type: "object",
|
|
1582
|
+
properties: {
|
|
1583
|
+
includeCustom: { type: "string", description: '"true" to fetch custom voices from your ElevenLabs account' }
|
|
1584
|
+
},
|
|
1585
|
+
required: []
|
|
1586
|
+
},
|
|
1587
|
+
async execute(_id, params) {
|
|
1588
|
+
try {
|
|
1589
|
+
const builtIn = Object.entries(DEFAULT_VOICES).map(([name, id]) => ({ name, id, source: "built-in" }));
|
|
1590
|
+
if (params.includeCustom === "true") {
|
|
1591
|
+
const apiKey = await getApiKey();
|
|
1592
|
+
if (!apiKey) return jsonResult({ voices: builtIn, note: "Add ElevenLabs key in Dashboard \u2192 Settings \u2192 Integrations to see custom voices" });
|
|
1593
|
+
try {
|
|
1594
|
+
const res = await fetch(`${ELEVENLABS_BASE}/voices`, {
|
|
1595
|
+
headers: { "xi-api-key": apiKey }
|
|
1596
|
+
});
|
|
1597
|
+
if (res.ok) {
|
|
1598
|
+
const data = await res.json();
|
|
1599
|
+
const custom = (data.voices || []).map((v) => ({
|
|
1600
|
+
name: v.name,
|
|
1601
|
+
id: v.voice_id,
|
|
1602
|
+
category: v.category,
|
|
1603
|
+
source: "elevenlabs"
|
|
1604
|
+
}));
|
|
1605
|
+
return jsonResult({ voices: [...builtIn, ...custom] });
|
|
1606
|
+
}
|
|
1607
|
+
} catch {
|
|
1608
|
+
}
|
|
1609
|
+
}
|
|
1610
|
+
return jsonResult({ voices: builtIn, currentVoice: config.voiceName || config.voiceId || "rachel" });
|
|
1611
|
+
} catch (e) {
|
|
1612
|
+
return errorResult(e.message);
|
|
1613
|
+
}
|
|
1614
|
+
}
|
|
1615
|
+
}
|
|
1616
|
+
];
|
|
1617
|
+
}
|
|
1618
|
+
|
|
1619
|
+
// src/agent-tools/tools/google/meetings.ts
|
|
1620
|
+
import * as path3 from "path";
|
|
1621
|
+
import * as os3 from "os";
|
|
1622
|
+
import { promises as fs3 } from "fs";
|
|
1623
|
+
var ensureBrowser2 = ensureBrowser;
|
|
1624
|
+
var CALENDAR_BASE = "https://www.googleapis.com/calendar/v3";
|
|
1625
|
+
async function calendarApi(token, path4, opts) {
|
|
1626
|
+
const url = new URL(CALENDAR_BASE + path4);
|
|
1627
|
+
if (opts?.query) for (const [k, v] of Object.entries(opts.query)) {
|
|
1628
|
+
if (v) url.searchParams.set(k, v);
|
|
1629
|
+
}
|
|
1630
|
+
const res = await fetch(url.toString(), {
|
|
1631
|
+
method: opts?.method || "GET",
|
|
1632
|
+
headers: { Authorization: `Bearer ${token}`, "Content-Type": "application/json" },
|
|
1633
|
+
body: opts?.body ? JSON.stringify(opts.body) : void 0
|
|
1634
|
+
});
|
|
1635
|
+
if (!res.ok) throw new Error(`Calendar API ${res.status}: ${await res.text()}`);
|
|
1636
|
+
return res.json();
|
|
1637
|
+
}
|
|
1638
|
+
function extractMeetingLink(event) {
|
|
1639
|
+
if (event.conferenceData?.entryPoints) {
|
|
1640
|
+
for (const ep of event.conferenceData.entryPoints) {
|
|
1641
|
+
if (ep.entryPointType === "video" && ep.uri) return { platform: "google_meet", url: ep.uri };
|
|
1642
|
+
}
|
|
1643
|
+
}
|
|
1644
|
+
if (event.hangoutLink) return { platform: "google_meet", url: event.hangoutLink };
|
|
1645
|
+
const text = [event.description || "", event.location || ""].join(" ");
|
|
1646
|
+
const zoomMatch = text.match(/https:\/\/[\w.-]*zoom\.us\/[jw]\/[\d?=&\w]+/i);
|
|
1647
|
+
if (zoomMatch) return { platform: "zoom", url: zoomMatch[0] };
|
|
1648
|
+
const teamsMatch = text.match(/https:\/\/teams\.microsoft\.com\/l\/meetup-join\/[^\s"<>]+/i);
|
|
1649
|
+
if (teamsMatch) return { platform: "teams", url: teamsMatch[0] };
|
|
1650
|
+
const genericMeet = text.match(/https:\/\/meet\.google\.com\/[a-z-]+/i);
|
|
1651
|
+
if (genericMeet) return { platform: "google_meet", url: genericMeet[0] };
|
|
1652
|
+
return null;
|
|
1653
|
+
}
|
|
1654
|
+
function parseEventTime(event) {
|
|
1655
|
+
const startStr = event.start?.dateTime || event.start?.date;
|
|
1656
|
+
const endStr = event.end?.dateTime || event.end?.date;
|
|
1657
|
+
if (!startStr) return null;
|
|
1658
|
+
return {
|
|
1659
|
+
start: new Date(startStr),
|
|
1660
|
+
end: endStr ? new Date(endStr) : new Date(new Date(startStr).getTime() + 36e5)
|
|
1661
|
+
};
|
|
1662
|
+
}
|
|
1663
|
+
function delay(ms) {
|
|
1664
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
1665
|
+
}
|
|
1666
|
+
async function saveScreenshot(page) {
|
|
1667
|
+
const buf = await page.screenshot({ type: "png", fullPage: false });
|
|
1668
|
+
const dir = path3.join(os3.tmpdir(), "agenticmail-screenshots");
|
|
1669
|
+
await fs3.mkdir(dir, { recursive: true });
|
|
1670
|
+
const file = `meeting-${Date.now()}.png`;
|
|
1671
|
+
const filePath = path3.join(dir, file);
|
|
1672
|
+
await fs3.writeFile(filePath, buf);
|
|
1673
|
+
return { path: filePath, base64: buf.toString("base64") };
|
|
1674
|
+
}
|
|
1675
|
+
async function ariaClick(page, labels, timeout = 5e3) {
|
|
1676
|
+
for (const label of labels) {
|
|
1677
|
+
try {
|
|
1678
|
+
await page.click(`[aria-label*="${label}" i]`, { timeout });
|
|
1679
|
+
return true;
|
|
1680
|
+
} catch {
|
|
1681
|
+
}
|
|
1682
|
+
}
|
|
1683
|
+
for (const label of labels) {
|
|
1684
|
+
try {
|
|
1685
|
+
await page.getByRole("button", { name: new RegExp(label, "i") }).click({ timeout: 2e3 });
|
|
1686
|
+
return true;
|
|
1687
|
+
} catch {
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
return false;
|
|
1691
|
+
}
|
|
1692
|
+
async function readCaptionsFromDOM(page) {
|
|
1693
|
+
return page.evaluate(() => {
|
|
1694
|
+
const region = document.querySelector('[aria-label="Captions"]');
|
|
1695
|
+
if (region) {
|
|
1696
|
+
const entries = [];
|
|
1697
|
+
const children = region.querySelectorAll(":scope > div");
|
|
1698
|
+
for (const child of children) {
|
|
1699
|
+
const divs = child.querySelectorAll(":scope > div");
|
|
1700
|
+
if (divs.length >= 2) {
|
|
1701
|
+
entries.push({ speaker: divs[0].textContent?.trim() || "", text: divs[1].textContent?.trim() || "" });
|
|
1702
|
+
} else if (child.textContent?.trim()) {
|
|
1703
|
+
entries.push({ speaker: "", text: child.textContent.trim() });
|
|
1704
|
+
}
|
|
1705
|
+
}
|
|
1706
|
+
if (entries.length > 0) return entries;
|
|
1707
|
+
}
|
|
1708
|
+
const container = document.querySelector(".a4cQT");
|
|
1709
|
+
if (container) {
|
|
1710
|
+
const allText = container.innerText || "";
|
|
1711
|
+
const lines = allText.split("\n").filter(
|
|
1712
|
+
(l) => l.trim().length > 0 && !l.includes("BETA") && !l.includes("caption") && !l.includes("Font size") && !l.includes("Font color") && l.trim().length < 500
|
|
1713
|
+
);
|
|
1714
|
+
if (lines.length > 0) return [{ speaker: "", text: lines.join(" ") }];
|
|
1715
|
+
}
|
|
1716
|
+
return [];
|
|
1717
|
+
});
|
|
1718
|
+
}
|
|
1719
|
+
async function sendChatMessage(page, message) {
|
|
1720
|
+
const MAX_CHUNK = 450;
|
|
1721
|
+
if (message.length > MAX_CHUNK) {
|
|
1722
|
+
const chunks = splitMessageIntoChunks(message, MAX_CHUNK);
|
|
1723
|
+
let allSent = true;
|
|
1724
|
+
let lastMethod = "";
|
|
1725
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
1726
|
+
const prefix = chunks.length > 1 ? `(${i + 1}/${chunks.length}) ` : "";
|
|
1727
|
+
const result = await sendChatMessage(page, prefix + chunks[i]);
|
|
1728
|
+
if (!result.sent) {
|
|
1729
|
+
return { sent: false, method: result.method, error: `Failed on chunk ${i + 1}/${chunks.length}: ${result.error}` };
|
|
1730
|
+
}
|
|
1731
|
+
lastMethod = result.method;
|
|
1732
|
+
if (i < chunks.length - 1) await delay(800);
|
|
1733
|
+
}
|
|
1734
|
+
return { sent: allSent, method: `chunked (${chunks.length}x) via ${lastMethod}` };
|
|
1735
|
+
}
|
|
1736
|
+
try {
|
|
1737
|
+
const chatOpen = await page.evaluate(() => {
|
|
1738
|
+
const panel = document.querySelector('[aria-label="Side panel"]');
|
|
1739
|
+
if (!panel) return false;
|
|
1740
|
+
const text = panel.innerText || "";
|
|
1741
|
+
return text.includes("In-call messages") || text.includes("Send a message");
|
|
1742
|
+
});
|
|
1743
|
+
if (!chatOpen) {
|
|
1744
|
+
const clicked = await ariaClick(page, ["Chat with everyone", "Open chat"], 3e3);
|
|
1745
|
+
if (!clicked) {
|
|
1746
|
+
await page.keyboard.press("d");
|
|
1747
|
+
}
|
|
1748
|
+
await delay(1500);
|
|
1749
|
+
}
|
|
1750
|
+
} catch {
|
|
1751
|
+
}
|
|
1752
|
+
const selectors = [
|
|
1753
|
+
'textarea[aria-label*="Send a message" i]',
|
|
1754
|
+
'input[aria-label*="Send a message" i]',
|
|
1755
|
+
'[aria-label*="Send a message" i][contenteditable]',
|
|
1756
|
+
'textarea[placeholder*="Send" i]',
|
|
1757
|
+
'[data-is-persistent="true"] textarea'
|
|
1758
|
+
];
|
|
1759
|
+
for (const selector of selectors) {
|
|
1760
|
+
try {
|
|
1761
|
+
const el = page.locator(selector).first();
|
|
1762
|
+
const visible = await el.isVisible({ timeout: 2e3 }).catch(() => false);
|
|
1763
|
+
if (!visible) continue;
|
|
1764
|
+
await el.click({ timeout: 2e3 });
|
|
1765
|
+
await delay(200);
|
|
1766
|
+
await page.keyboard.press("Control+a");
|
|
1767
|
+
await delay(100);
|
|
1768
|
+
await page.keyboard.press("Backspace");
|
|
1769
|
+
await delay(100);
|
|
1770
|
+
try {
|
|
1771
|
+
const inserted = await page.evaluate((text) => {
|
|
1772
|
+
const active = document.activeElement;
|
|
1773
|
+
if (active && "value" in active) {
|
|
1774
|
+
const setter = Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, "value")?.set || Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, "value")?.set;
|
|
1775
|
+
if (setter) {
|
|
1776
|
+
setter.call(active, text);
|
|
1777
|
+
active.dispatchEvent(new Event("input", { bubbles: true }));
|
|
1778
|
+
active.dispatchEvent(new Event("change", { bubbles: true }));
|
|
1779
|
+
return true;
|
|
1780
|
+
}
|
|
1781
|
+
}
|
|
1782
|
+
return document.execCommand("insertText", false, text);
|
|
1783
|
+
}, message);
|
|
1784
|
+
if (!inserted) throw new Error("insertText returned false");
|
|
1785
|
+
await delay(300);
|
|
1786
|
+
} catch {
|
|
1787
|
+
try {
|
|
1788
|
+
await page.evaluate(async (text) => {
|
|
1789
|
+
await navigator.clipboard.writeText(text);
|
|
1790
|
+
}, message);
|
|
1791
|
+
const isMac = process.platform === "darwin";
|
|
1792
|
+
await page.keyboard.press(isMac ? "Meta+v" : "Control+v");
|
|
1793
|
+
await delay(300);
|
|
1794
|
+
} catch {
|
|
1795
|
+
if (message.length <= 100) {
|
|
1796
|
+
await page.keyboard.type(message, { delay: 20 });
|
|
1797
|
+
await delay(300);
|
|
1798
|
+
} else {
|
|
1799
|
+
continue;
|
|
1800
|
+
}
|
|
1801
|
+
}
|
|
1802
|
+
}
|
|
1803
|
+
await page.keyboard.press("Enter");
|
|
1804
|
+
await delay(500);
|
|
1805
|
+
const inputEmpty = await el.inputValue().then((v) => v.trim() === "").catch(() => true);
|
|
1806
|
+
if (inputEmpty) {
|
|
1807
|
+
return { sent: true, method: `paste + selector: ${selector}` };
|
|
1808
|
+
}
|
|
1809
|
+
} catch {
|
|
1810
|
+
}
|
|
1811
|
+
}
|
|
1812
|
+
return { sent: false, method: "all strategies failed", error: 'Could not find or interact with the chat input. Try meeting_action(action: "screenshot") to see the current state.' };
|
|
1813
|
+
}
|
|
1814
|
+
function splitMessageIntoChunks(text, maxLen) {
|
|
1815
|
+
const chunks = [];
|
|
1816
|
+
let remaining = text;
|
|
1817
|
+
while (remaining.length > maxLen) {
|
|
1818
|
+
let breakIdx = -1;
|
|
1819
|
+
for (const sep of ["\n\n", "\n", ". ", "! ", "? ", ", "]) {
|
|
1820
|
+
const idx = remaining.lastIndexOf(sep, maxLen);
|
|
1821
|
+
if (idx > maxLen * 0.3) {
|
|
1822
|
+
breakIdx = idx + sep.length;
|
|
1823
|
+
break;
|
|
1824
|
+
}
|
|
1825
|
+
}
|
|
1826
|
+
if (breakIdx <= 0) breakIdx = maxLen;
|
|
1827
|
+
chunks.push(remaining.slice(0, breakIdx).trim());
|
|
1828
|
+
remaining = remaining.slice(breakIdx).trim();
|
|
1829
|
+
}
|
|
1830
|
+
if (remaining) chunks.push(remaining);
|
|
1831
|
+
return chunks;
|
|
1832
|
+
}
|
|
1833
|
+
async function configureAudioInput() {
|
|
1834
|
+
const { exec: execCb } = await import("child_process");
|
|
1835
|
+
const { promisify } = await import("util");
|
|
1836
|
+
const exec = promisify(execCb);
|
|
1837
|
+
const platform = process.platform;
|
|
1838
|
+
try {
|
|
1839
|
+
if (platform === "darwin") {
|
|
1840
|
+
const { stdout: inputs } = await exec("SwitchAudioSource -a -t input 2>/dev/null");
|
|
1841
|
+
if (inputs.includes("BlackHole")) {
|
|
1842
|
+
await exec('SwitchAudioSource -t input -s "BlackHole 2ch"');
|
|
1843
|
+
console.log("[audio] \u2705 Set system audio input to BlackHole 2ch");
|
|
1844
|
+
return { configured: true, device: "BlackHole 2ch" };
|
|
1845
|
+
}
|
|
1846
|
+
return { configured: false, error: "BlackHole not found as input device" };
|
|
1847
|
+
} else if (platform === "linux") {
|
|
1848
|
+
try {
|
|
1849
|
+
const { stdout } = await exec("pactl list short sources 2>/dev/null");
|
|
1850
|
+
const virtualSource = stdout.split("\n").find((l) => /virtual|null/.test(l));
|
|
1851
|
+
if (virtualSource) {
|
|
1852
|
+
const sourceName = virtualSource.split(" ")[1];
|
|
1853
|
+
await exec(`pactl set-default-source ${sourceName}`);
|
|
1854
|
+
console.log(`[audio] \u2705 Set PulseAudio default source to ${sourceName}`);
|
|
1855
|
+
return { configured: true, device: sourceName };
|
|
1856
|
+
}
|
|
1857
|
+
} catch {
|
|
1858
|
+
}
|
|
1859
|
+
return { configured: false, error: "No virtual audio source found" };
|
|
1860
|
+
} else if (platform === "win32") {
|
|
1861
|
+
try {
|
|
1862
|
+
await exec('nircmd setdefaultsounddevice "CABLE Output" 2');
|
|
1863
|
+
console.log("[audio] \u2705 Set default recording device to VB-CABLE");
|
|
1864
|
+
return { configured: true, device: "CABLE Output (VB-Audio)" };
|
|
1865
|
+
} catch {
|
|
1866
|
+
return { configured: false, error: "Could not set VB-CABLE as default input" };
|
|
1867
|
+
}
|
|
1868
|
+
}
|
|
1869
|
+
return { configured: false, error: `Unsupported platform: ${platform}` };
|
|
1870
|
+
} catch (e) {
|
|
1871
|
+
return { configured: false, error: e.message };
|
|
1872
|
+
}
|
|
1873
|
+
}
|
|
1874
|
+
async function joinGoogleMeet(page, url) {
|
|
1875
|
+
const audioConfig = await configureAudioInput();
|
|
1876
|
+
if (audioConfig.configured) {
|
|
1877
|
+
console.log(`[meeting-join] Audio input configured: ${audioConfig.device}`);
|
|
1878
|
+
} else {
|
|
1879
|
+
console.warn(`[meeting-join] Audio input not configured: ${audioConfig.error}`);
|
|
1880
|
+
}
|
|
1881
|
+
await page.goto(url, { timeout: 6e4, waitUntil: "domcontentloaded" });
|
|
1882
|
+
await delay(2e3);
|
|
1883
|
+
for (let i = 0; i < 3; i++) {
|
|
1884
|
+
const dismissed = await ariaClick(page, ["Close"], 1500);
|
|
1885
|
+
if (!dismissed) break;
|
|
1886
|
+
await delay(300);
|
|
1887
|
+
}
|
|
1888
|
+
const joined = await ariaClick(page, ["Ask to join", "Join now", "Join"], 1e4);
|
|
1889
|
+
if (!joined) {
|
|
1890
|
+
const screenshot2 = await saveScreenshot(page);
|
|
1891
|
+
return { joined: false, error: "Could not find Join button.", screenshot: screenshot2.path, screenshotBase64: screenshot2.base64 };
|
|
1892
|
+
}
|
|
1893
|
+
await delay(3e3);
|
|
1894
|
+
const state = await page.evaluate(() => {
|
|
1895
|
+
const text = document.body.innerText || "";
|
|
1896
|
+
if (text.includes("Please wait until a meeting host")) return "waiting_room";
|
|
1897
|
+
if (text.includes("You have joined the call") || text.includes("Leave call")) return "in_call";
|
|
1898
|
+
if (text.includes("Ask to join")) return "pre_join";
|
|
1899
|
+
return "unknown";
|
|
1900
|
+
});
|
|
1901
|
+
const screenshot = await saveScreenshot(page);
|
|
1902
|
+
return {
|
|
1903
|
+
joined: state === "in_call" || state === "waiting_room",
|
|
1904
|
+
state,
|
|
1905
|
+
url: page.url(),
|
|
1906
|
+
screenshot: screenshot.path,
|
|
1907
|
+
screenshotBase64: screenshot.base64
|
|
1908
|
+
};
|
|
1909
|
+
}
|
|
1910
|
+
function createMeetingTools(config, _options) {
|
|
1911
|
+
const tp = config.tokenProvider;
|
|
1912
|
+
const agentId = _options?.agentId || "default";
|
|
1913
|
+
const runtimeRef = _options?.runtimeRef;
|
|
1914
|
+
return [
|
|
1915
|
+
// ─── Upcoming Meetings ─────────────────────────────
|
|
1916
|
+
{
|
|
1917
|
+
name: "meetings_upcoming",
|
|
1918
|
+
description: "List upcoming meetings with join links, times, and attendees from Google Calendar.",
|
|
1919
|
+
category: "utility",
|
|
1920
|
+
parameters: {
|
|
1921
|
+
type: "object",
|
|
1922
|
+
properties: {
|
|
1923
|
+
hours: { type: "number", description: "Look ahead this many hours (default: 24)" },
|
|
1924
|
+
calendarId: { type: "string", description: 'Calendar ID (default: "primary")' },
|
|
1925
|
+
includeDeclined: { type: "string", description: '"true" to include declined meetings' }
|
|
1926
|
+
},
|
|
1927
|
+
required: []
|
|
1928
|
+
},
|
|
1929
|
+
async execute(_id, params) {
|
|
1930
|
+
try {
|
|
1931
|
+
const token = await tp.getAccessToken();
|
|
1932
|
+
const hours = params.hours || 24;
|
|
1933
|
+
const now = /* @__PURE__ */ new Date();
|
|
1934
|
+
const later = new Date(now.getTime() + hours * 36e5);
|
|
1935
|
+
const calendarId = params.calendarId || "primary";
|
|
1936
|
+
const data = await calendarApi(token, `/calendars/${encodeURIComponent(calendarId)}/events`, {
|
|
1937
|
+
query: {
|
|
1938
|
+
timeMin: now.toISOString(),
|
|
1939
|
+
timeMax: later.toISOString(),
|
|
1940
|
+
singleEvents: "true",
|
|
1941
|
+
orderBy: "startTime",
|
|
1942
|
+
maxResults: "50"
|
|
1943
|
+
}
|
|
1944
|
+
});
|
|
1945
|
+
const meetings = (data.items || []).map((event) => {
|
|
1946
|
+
const times = parseEventTime(event);
|
|
1947
|
+
const meetingLink = extractMeetingLink(event);
|
|
1948
|
+
const myStatus = (event.attendees || []).find((a) => a.self)?.responseStatus;
|
|
1949
|
+
if (params.includeDeclined !== "true" && myStatus === "declined") return null;
|
|
1950
|
+
return {
|
|
1951
|
+
id: event.id,
|
|
1952
|
+
title: event.summary,
|
|
1953
|
+
start: times?.start?.toISOString(),
|
|
1954
|
+
end: times?.end?.toISOString(),
|
|
1955
|
+
startsIn: times ? Math.round((times.start.getTime() - now.getTime()) / 6e4) + " minutes" : null,
|
|
1956
|
+
isNow: times ? now >= times.start && now <= times.end : false,
|
|
1957
|
+
meetingLink: meetingLink?.url || null,
|
|
1958
|
+
platform: meetingLink?.platform || null,
|
|
1959
|
+
organizer: event.organizer?.email,
|
|
1960
|
+
attendees: (event.attendees || []).map((a) => ({
|
|
1961
|
+
email: a.email,
|
|
1962
|
+
name: a.displayName,
|
|
1963
|
+
status: a.responseStatus,
|
|
1964
|
+
self: a.self || false
|
|
1965
|
+
})),
|
|
1966
|
+
myStatus,
|
|
1967
|
+
description: event.description?.slice(0, 500),
|
|
1968
|
+
location: event.location
|
|
1969
|
+
};
|
|
1970
|
+
}).filter(Boolean);
|
|
1971
|
+
return jsonResult({
|
|
1972
|
+
meetings,
|
|
1973
|
+
total: meetings.length,
|
|
1974
|
+
withMeetingLinks: meetings.filter((m) => m.meetingLink).length,
|
|
1975
|
+
happeningNow: meetings.filter((m) => m.isNow).length,
|
|
1976
|
+
nextMeeting: meetings[0] || null
|
|
1977
|
+
});
|
|
1978
|
+
} catch (e) {
|
|
1979
|
+
return errorResult(e.message);
|
|
1980
|
+
}
|
|
1981
|
+
}
|
|
1982
|
+
},
|
|
1983
|
+
// ─── Join Meeting ──────────────────────────────────
|
|
1984
|
+
{
|
|
1985
|
+
name: "meeting_join",
|
|
1986
|
+
description: `Join a Google Meet video meeting. After joining:
|
|
1987
|
+
- Captions are auto-enabled
|
|
1988
|
+
- A MeetingMonitor starts streaming captions and chat to you in real-time
|
|
1989
|
+
- You will receive "[Meeting Monitor \u2014 Live Update]" messages with new captions/chat
|
|
1990
|
+
- When someone talks to you, respond using meeting_action(action: "chat", message: "...")
|
|
1991
|
+
- You do NOT need to manually poll for captions \u2014 they come to you automatically
|
|
1992
|
+
- The monitor will tell you when the meeting ends`,
|
|
1993
|
+
category: "utility",
|
|
1994
|
+
parameters: {
|
|
1995
|
+
type: "object",
|
|
1996
|
+
properties: {
|
|
1997
|
+
url: { type: "string", description: "Meeting URL (Google Meet link)" },
|
|
1998
|
+
eventId: { type: "string", description: "Google Calendar event ID \u2014 will auto-extract the meeting link" }
|
|
1999
|
+
},
|
|
2000
|
+
required: []
|
|
2001
|
+
},
|
|
2002
|
+
async execute(_id, params) {
|
|
2003
|
+
try {
|
|
2004
|
+
const token = await tp.getAccessToken();
|
|
2005
|
+
let meetingUrl = params.url;
|
|
2006
|
+
if (!meetingUrl && params.eventId) {
|
|
2007
|
+
const event = await calendarApi(token, `/calendars/primary/events/${params.eventId}`);
|
|
2008
|
+
const link = extractMeetingLink(event);
|
|
2009
|
+
if (!link) return errorResult("No meeting link found in calendar event: " + (event.summary || params.eventId));
|
|
2010
|
+
meetingUrl = link.url;
|
|
2011
|
+
}
|
|
2012
|
+
if (!meetingUrl) return errorResult("No meeting URL provided. Pass url or eventId.");
|
|
2013
|
+
const { page } = await ensureBrowser2(false, agentId, false);
|
|
2014
|
+
const result = await joinGoogleMeet(page, meetingUrl);
|
|
2015
|
+
if (result.joined) {
|
|
2016
|
+
if (result.state === "in_call" || result.state === "unknown") {
|
|
2017
|
+
try {
|
|
2018
|
+
await page.keyboard.press("c");
|
|
2019
|
+
await delay(1e3);
|
|
2020
|
+
} catch {
|
|
2021
|
+
}
|
|
2022
|
+
try {
|
|
2023
|
+
await page.keyboard.press("d");
|
|
2024
|
+
await delay(1500);
|
|
2025
|
+
const voiceConfig = _options?.voiceConfig || {};
|
|
2026
|
+
if (!voiceConfig.voiceId) {
|
|
2027
|
+
const chatResult = await sendChatMessage(page, `Hi, I'm joining to take notes. I'll communicate via chat.`);
|
|
2028
|
+
console.log(`[meeting-join:${agentId}] Intro chat: ${chatResult.sent ? "sent" : "failed"} (${chatResult.method})`);
|
|
2029
|
+
}
|
|
2030
|
+
} catch {
|
|
2031
|
+
}
|
|
2032
|
+
}
|
|
2033
|
+
if (runtimeRef?.sendMessage && runtimeRef?.getCurrentSessionId) {
|
|
2034
|
+
const sessionId = runtimeRef.getCurrentSessionId();
|
|
2035
|
+
if (sessionId) {
|
|
2036
|
+
if (runtimeRef.setKeepAlive) {
|
|
2037
|
+
runtimeRef.setKeepAlive(sessionId, true);
|
|
2038
|
+
}
|
|
2039
|
+
const startMonitor = async () => {
|
|
2040
|
+
const monitor = new MeetingMonitor({
|
|
2041
|
+
page,
|
|
2042
|
+
agentId,
|
|
2043
|
+
sessionId,
|
|
2044
|
+
sendMessage: runtimeRef.sendMessage,
|
|
2045
|
+
flushIntervalMs: 2500,
|
|
2046
|
+
sendChatIndicator: async (p, text) => {
|
|
2047
|
+
try {
|
|
2048
|
+
await sendChatMessage(p, text);
|
|
2049
|
+
} catch {
|
|
2050
|
+
}
|
|
2051
|
+
},
|
|
2052
|
+
onMeetingEnd: () => {
|
|
2053
|
+
console.log(`[meeting-join:${agentId}] Monitor detected meeting end`);
|
|
2054
|
+
removeMonitor(agentId);
|
|
2055
|
+
if (runtimeRef.setKeepAlive) {
|
|
2056
|
+
runtimeRef.setKeepAlive(sessionId, false);
|
|
2057
|
+
}
|
|
2058
|
+
}
|
|
2059
|
+
});
|
|
2060
|
+
registerMonitor(agentId, monitor);
|
|
2061
|
+
await monitor.start();
|
|
2062
|
+
console.log(`[meeting-join:${agentId}] \u2705 MeetingMonitor started for session ${sessionId}, keep-alive ON`);
|
|
2063
|
+
};
|
|
2064
|
+
if (result.state === "in_call" || result.state === "unknown") {
|
|
2065
|
+
await startMonitor();
|
|
2066
|
+
}
|
|
2067
|
+
if (result.state === "waiting_room") {
|
|
2068
|
+
console.log(`[meeting-join:${agentId}] In waiting room \u2014 monitor will start after admission`);
|
|
2069
|
+
const admissionWatcher = setInterval(async () => {
|
|
2070
|
+
try {
|
|
2071
|
+
const currentState = await page.evaluate(() => {
|
|
2072
|
+
const text = document.body.innerText || "";
|
|
2073
|
+
if (text.includes("Please wait until a meeting host")) return "waiting_room";
|
|
2074
|
+
if (text.includes("Leave call")) return "in_call";
|
|
2075
|
+
if (text.includes("You've left the meeting") || text.includes("Call ended")) return "ended";
|
|
2076
|
+
return "unknown";
|
|
2077
|
+
});
|
|
2078
|
+
if (currentState === "in_call" || currentState === "unknown") {
|
|
2079
|
+
clearInterval(admissionWatcher);
|
|
2080
|
+
console.log(`[meeting-join:${agentId}] \u2705 Admitted to meeting!`);
|
|
2081
|
+
try {
|
|
2082
|
+
await page.keyboard.press("c");
|
|
2083
|
+
await delay(1e3);
|
|
2084
|
+
} catch {
|
|
2085
|
+
}
|
|
2086
|
+
try {
|
|
2087
|
+
await page.keyboard.press("d");
|
|
2088
|
+
await delay(1500);
|
|
2089
|
+
const voiceConfig = _options?.voiceConfig || {};
|
|
2090
|
+
if (!voiceConfig.voiceId) {
|
|
2091
|
+
await sendChatMessage(page, `Hi, I'm joining to take notes. I'll communicate via chat.`);
|
|
2092
|
+
}
|
|
2093
|
+
} catch {
|
|
2094
|
+
}
|
|
2095
|
+
try {
|
|
2096
|
+
await startMonitor();
|
|
2097
|
+
} catch (e) {
|
|
2098
|
+
console.error(`[meeting-join:${agentId}] Failed to start monitor after admission: ${e.message}`);
|
|
2099
|
+
}
|
|
2100
|
+
try {
|
|
2101
|
+
await runtimeRef.sendMessage(
|
|
2102
|
+
sessionId,
|
|
2103
|
+
`[Meeting Monitor] You have been admitted to the meeting! Captions are now enabled. Chat panel is open. You will start receiving live updates.`
|
|
2104
|
+
);
|
|
2105
|
+
} catch {
|
|
2106
|
+
}
|
|
2107
|
+
} else if (currentState === "ended") {
|
|
2108
|
+
clearInterval(admissionWatcher);
|
|
2109
|
+
console.log(`[meeting-join:${agentId}] Meeting ended while in waiting room`);
|
|
2110
|
+
if (runtimeRef.setKeepAlive) runtimeRef.setKeepAlive(sessionId, false);
|
|
2111
|
+
}
|
|
2112
|
+
} catch {
|
|
2113
|
+
clearInterval(admissionWatcher);
|
|
2114
|
+
}
|
|
2115
|
+
}, 5e3);
|
|
2116
|
+
}
|
|
2117
|
+
}
|
|
2118
|
+
} else {
|
|
2119
|
+
console.warn(`[meeting-join:${agentId}] No runtimeRef \u2014 MeetingMonitor NOT started`);
|
|
2120
|
+
}
|
|
2121
|
+
}
|
|
2122
|
+
let voiceStatus = null;
|
|
2123
|
+
try {
|
|
2124
|
+
const voiceConfig = _options?.voiceConfig || {};
|
|
2125
|
+
const elevenLabsKeyResolver = _options?.elevenLabsKeyResolver;
|
|
2126
|
+
const getApiKey = async () => {
|
|
2127
|
+
if (process.env.ELEVENLABS_API_KEY) return process.env.ELEVENLABS_API_KEY;
|
|
2128
|
+
if (elevenLabsKeyResolver) try {
|
|
2129
|
+
return await elevenLabsKeyResolver();
|
|
2130
|
+
} catch {
|
|
2131
|
+
return null;
|
|
2132
|
+
}
|
|
2133
|
+
return null;
|
|
2134
|
+
};
|
|
2135
|
+
voiceStatus = await voiceCapability.preflight(
|
|
2136
|
+
agentId,
|
|
2137
|
+
getApiKey,
|
|
2138
|
+
voiceConfig.voiceId,
|
|
2139
|
+
voiceConfig.voiceName,
|
|
2140
|
+
voiceConfig.audioDevice
|
|
2141
|
+
);
|
|
2142
|
+
console.log(`[meeting-join:${agentId}] Voice: ${voiceStatus.mode} (${voiceStatus.available ? "ready" : voiceStatus.issues.join(", ")})`);
|
|
2143
|
+
if (voiceStatus?.available) {
|
|
2144
|
+
const apiKey = await getApiKey();
|
|
2145
|
+
console.log(`[meeting-join:${agentId}] Voice Intelligence: apiKey=${apiKey ? "present" : "MISSING"}`);
|
|
2146
|
+
if (apiKey) {
|
|
2147
|
+
const { createMeetingVoiceIntelligence } = await import("./meeting-voice-intelligence-RZZCAD6G.js");
|
|
2148
|
+
console.log(`[meeting-join:${agentId}] Creating Voice Intelligence instance...`);
|
|
2149
|
+
const voiceIntel = createMeetingVoiceIntelligence({
|
|
2150
|
+
agentId,
|
|
2151
|
+
agentName: _options?.agentName || "AI Assistant",
|
|
2152
|
+
agentAliases: _options?.agentAliases || [],
|
|
2153
|
+
voiceId: voiceConfig.voiceId || voiceStatus.voiceId,
|
|
2154
|
+
voiceName: voiceConfig.voiceName || voiceStatus.voiceName,
|
|
2155
|
+
audioDevice: voiceConfig.audioDevice || voiceStatus.audioDevice,
|
|
2156
|
+
apiKey,
|
|
2157
|
+
modelSpeed: "fast"
|
|
2158
|
+
// Assume fast model for meetings (Sonnet)
|
|
2159
|
+
});
|
|
2160
|
+
voiceIntel.initialize().then((result2) => {
|
|
2161
|
+
console.log(`[meeting-join:${agentId}] Voice Intelligence: ${result2.generated} audio clips ready (${result2.durationMs}ms)`);
|
|
2162
|
+
}).catch((e) => {
|
|
2163
|
+
console.warn(`[meeting-join:${agentId}] Voice Intelligence init failed: ${e.message}`);
|
|
2164
|
+
});
|
|
2165
|
+
}
|
|
2166
|
+
}
|
|
2167
|
+
} catch (e) {
|
|
2168
|
+
console.warn(`[meeting-join:${agentId}] Voice preflight failed: ${e.message}`);
|
|
2169
|
+
}
|
|
2170
|
+
const _voiceBlock = voiceStatus ? voiceCapability.buildPromptBlock(agentId) : "\n## Voice: NOT CHECKED\nUse meeting_speak to talk \u2014 it auto-falls back to chat if voice fails.\n";
|
|
2171
|
+
const hasVoice = voiceStatus?.available === true;
|
|
2172
|
+
const monitorActive = !!getActiveMonitor(agentId);
|
|
2173
|
+
const communicationInstructions = hasVoice ? 'You have a VOICE in this meeting. Use meeting_speak(text: "...") to talk out loud \u2014 participants will hear you. Use meeting_action(action: "chat", message: "...") for links/code/long text. meeting_speak auto-falls back to chat if voice fails.' : 'You do NOT have a microphone. Communicate via meeting chat: meeting_action(action: "chat", message: "...").';
|
|
2174
|
+
const joinResult = {
|
|
2175
|
+
...result,
|
|
2176
|
+
screenshotBase64: void 0,
|
|
2177
|
+
monitorActive,
|
|
2178
|
+
voiceAvailable: hasVoice,
|
|
2179
|
+
voiceMode: voiceStatus?.mode || "chat-only",
|
|
2180
|
+
voiceName: voiceStatus?.voiceName || null,
|
|
2181
|
+
instructions: result.joined ? monitorActive ? `You are in the meeting. A MeetingMonitor is streaming captions and chat to you automatically. ${communicationInstructions} You do NOT need to call read_captions \u2014 updates come to you.` : `You are in the meeting. Captions are enabled. ${communicationInstructions} Call meeting_action(action: "read_captions") periodically to see what people are saying.` : "Join failed. Check screenshot.",
|
|
2182
|
+
voiceStatusDetail: voiceStatus ? {
|
|
2183
|
+
mode: voiceStatus.mode,
|
|
2184
|
+
voice: voiceStatus.voiceName,
|
|
2185
|
+
issues: voiceStatus.issues
|
|
2186
|
+
} : null
|
|
2187
|
+
};
|
|
2188
|
+
if (result.screenshotBase64) {
|
|
2189
|
+
return {
|
|
2190
|
+
content: [
|
|
2191
|
+
{ type: "text", text: JSON.stringify(joinResult, null, 2) },
|
|
2192
|
+
{ type: "image", data: result.screenshotBase64, mimeType: "image/png" }
|
|
2193
|
+
]
|
|
2194
|
+
};
|
|
2195
|
+
}
|
|
2196
|
+
return jsonResult(joinResult);
|
|
2197
|
+
} catch (e) {
|
|
2198
|
+
return errorResult(e.message);
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
},
|
|
2202
|
+
// ─── Meeting Actions ───────────────────────────────
|
|
2203
|
+
{
|
|
2204
|
+
name: "meeting_action",
|
|
2205
|
+
description: `Perform actions during an active Google Meet meeting:
|
|
2206
|
+
- screenshot: Take a screenshot of the current meeting state
|
|
2207
|
+
- read_captions: Read live captions (Note: if MeetingMonitor is active, captions come to you automatically \u2014 use this only for on-demand reads)
|
|
2208
|
+
- read_chat: Read in-call chat messages
|
|
2209
|
+
- chat: Send a message in the meeting chat (requires "message" param). Uses robust multi-strategy input.
|
|
2210
|
+
- participants: List meeting participants
|
|
2211
|
+
- toggle_captions: Turn captions on/off
|
|
2212
|
+
- share_screen: Share your entire screen with meeting participants
|
|
2213
|
+
- share_tab: Share a specific browser tab (optional "url" param to open a URL in a new tab and share it)
|
|
2214
|
+
- stop_sharing: Stop any active screen/tab sharing
|
|
2215
|
+
- leave: Leave the meeting and stop the monitor`,
|
|
2216
|
+
category: "utility",
|
|
2217
|
+
parameters: {
|
|
2218
|
+
type: "object",
|
|
2219
|
+
properties: {
|
|
2220
|
+
action: { type: "string", description: "Action to perform" },
|
|
2221
|
+
message: { type: "string", description: 'Chat message to send (for "chat" action)' },
|
|
2222
|
+
url: { type: "string", description: 'URL to open and share (for "share_tab" action)' }
|
|
2223
|
+
},
|
|
2224
|
+
required: ["action"]
|
|
2225
|
+
},
|
|
2226
|
+
async execute(_id, params) {
|
|
2227
|
+
try {
|
|
2228
|
+
const action = params.action;
|
|
2229
|
+
let page;
|
|
2230
|
+
try {
|
|
2231
|
+
const result = await ensureBrowser2(false, agentId, false);
|
|
2232
|
+
page = result.page;
|
|
2233
|
+
} catch (err) {
|
|
2234
|
+
return errorResult("No active browser session. Join a meeting first with meeting_join.");
|
|
2235
|
+
}
|
|
2236
|
+
const pageUrl = page.url() || "";
|
|
2237
|
+
if (!pageUrl.includes("meet.google.com")) {
|
|
2238
|
+
return errorResult(`Not on a Google Meet page (current URL: ${pageUrl}). Join a meeting first.`);
|
|
2239
|
+
}
|
|
2240
|
+
switch (action) {
|
|
2241
|
+
case "screenshot": {
|
|
2242
|
+
const screenshot = await saveScreenshot(page);
|
|
2243
|
+
return {
|
|
2244
|
+
content: [
|
|
2245
|
+
{ type: "text", text: `Meeting screenshot taken.
|
|
2246
|
+
URL: ${pageUrl}
|
|
2247
|
+
Saved: ${screenshot.path}` },
|
|
2248
|
+
{ type: "image", data: screenshot.base64, mimeType: "image/png" }
|
|
2249
|
+
]
|
|
2250
|
+
};
|
|
2251
|
+
}
|
|
2252
|
+
case "read_captions": {
|
|
2253
|
+
const captionsOn = await page.evaluate(() => !!document.querySelector('[aria-label*="Turn off captions"]'));
|
|
2254
|
+
if (!captionsOn) {
|
|
2255
|
+
await page.keyboard.press("c");
|
|
2256
|
+
await delay(1500);
|
|
2257
|
+
}
|
|
2258
|
+
const captions = await readCaptionsFromDOM(page);
|
|
2259
|
+
const monitor = getActiveMonitor(agentId);
|
|
2260
|
+
return jsonResult({
|
|
2261
|
+
action: "read_captions",
|
|
2262
|
+
captions,
|
|
2263
|
+
count: captions.length,
|
|
2264
|
+
captionsEnabled: true,
|
|
2265
|
+
monitorActive: !!monitor,
|
|
2266
|
+
note: monitor ? "MeetingMonitor is active \u2014 captions are being streamed to you automatically. You only need this tool for on-demand reads." : "No monitor active. Call this periodically to stay updated."
|
|
2267
|
+
});
|
|
2268
|
+
}
|
|
2269
|
+
case "toggle_captions": {
|
|
2270
|
+
await page.keyboard.press("c");
|
|
2271
|
+
await delay(500);
|
|
2272
|
+
const isOn = await page.evaluate(() => !!document.querySelector('[aria-label*="Turn off captions"]'));
|
|
2273
|
+
return jsonResult({ action: "toggle_captions", captionsOn: isOn });
|
|
2274
|
+
}
|
|
2275
|
+
case "read_chat": {
|
|
2276
|
+
await ariaClick(page, ["Chat with everyone", "Open chat"], 3e3);
|
|
2277
|
+
await delay(1e3);
|
|
2278
|
+
const messages = await page.evaluate(() => {
|
|
2279
|
+
const msgs = [];
|
|
2280
|
+
const panel = document.querySelector('[aria-label="Side panel"]') || document.querySelector('[aria-label*="In-call messages"]');
|
|
2281
|
+
if (!panel) return msgs;
|
|
2282
|
+
const msgEls = panel.querySelectorAll("[data-message-text]");
|
|
2283
|
+
for (const el of msgEls) {
|
|
2284
|
+
const text = el.getAttribute("data-message-text") || el.textContent?.trim() || "";
|
|
2285
|
+
let sender = "";
|
|
2286
|
+
const parent = el.closest('[class*="message"]') || el.parentElement?.parentElement;
|
|
2287
|
+
if (parent) {
|
|
2288
|
+
const nameEl = parent.querySelector('[class*="sender"], [class*="name"]');
|
|
2289
|
+
if (nameEl) sender = nameEl.textContent?.trim() || "";
|
|
2290
|
+
}
|
|
2291
|
+
if (text) msgs.push({ sender, text });
|
|
2292
|
+
}
|
|
2293
|
+
if (msgs.length === 0) {
|
|
2294
|
+
const allText = panel.innerText || "";
|
|
2295
|
+
const lines = allText.split("\n").filter(
|
|
2296
|
+
(l) => l.trim().length > 0 && l.trim() !== "In-call messages" && !l.includes("Continuous chat") && !l.includes("Messages won't be saved") && !l.includes("No chat messages") && !l.includes("Send a message") && !l.includes("pin a message") && l.trim().length < 500
|
|
2297
|
+
);
|
|
2298
|
+
for (const line of lines) msgs.push({ sender: "", text: line.trim() });
|
|
2299
|
+
}
|
|
2300
|
+
return msgs;
|
|
2301
|
+
});
|
|
2302
|
+
return jsonResult({ action: "read_chat", messages, count: messages.length });
|
|
2303
|
+
}
|
|
2304
|
+
case "chat": {
|
|
2305
|
+
const message = params.message;
|
|
2306
|
+
if (!message) return errorResult('No message provided. Set the "message" parameter.');
|
|
2307
|
+
const result = await sendChatMessage(page, message);
|
|
2308
|
+
if (result.sent) {
|
|
2309
|
+
return jsonResult({ action: "chat", status: "sent", message, method: result.method });
|
|
2310
|
+
} else {
|
|
2311
|
+
return errorResult(`Failed to send chat message: ${result.error}. Method: ${result.method}`);
|
|
2312
|
+
}
|
|
2313
|
+
}
|
|
2314
|
+
case "participants": {
|
|
2315
|
+
await ariaClick(page, ["People"], 3e3);
|
|
2316
|
+
await delay(1e3);
|
|
2317
|
+
const participants = await page.evaluate(() => {
|
|
2318
|
+
const names = [];
|
|
2319
|
+
const panel = document.querySelector('[aria-label="Side panel"]');
|
|
2320
|
+
if (!panel) return names;
|
|
2321
|
+
const items = panel.querySelectorAll('[class*="participant"], [data-participant-id], [role="listitem"]');
|
|
2322
|
+
for (const item of items) {
|
|
2323
|
+
const name = item.textContent?.trim();
|
|
2324
|
+
if (name && name.length < 100) names.push(name);
|
|
2325
|
+
}
|
|
2326
|
+
if (names.length === 0) {
|
|
2327
|
+
const tiles = document.querySelectorAll("[data-self-name], [data-participant-id]");
|
|
2328
|
+
for (const t of tiles) {
|
|
2329
|
+
const n = t.getAttribute("data-self-name") || t.textContent?.trim();
|
|
2330
|
+
if (n && n.length < 100) names.push(n);
|
|
2331
|
+
}
|
|
2332
|
+
}
|
|
2333
|
+
return [...new Set(names)];
|
|
2334
|
+
});
|
|
2335
|
+
await ariaClick(page, ["Close"], 1500);
|
|
2336
|
+
return jsonResult({ action: "participants", participants, count: participants.length });
|
|
2337
|
+
}
|
|
2338
|
+
case "share_screen": {
|
|
2339
|
+
try {
|
|
2340
|
+
const clicked = await ariaClick(page, ["Present now", "Share screen", "Present"], 5e3);
|
|
2341
|
+
if (!clicked) return errorResult('Could not find "Present now" button. Are you in an active meeting?');
|
|
2342
|
+
await delay(1500);
|
|
2343
|
+
const entireScreen = await ariaClick(page, ["Your entire screen", "Entire screen", "A window"], 5e3);
|
|
2344
|
+
if (!entireScreen) {
|
|
2345
|
+
const firstOption = await page.evaluate(() => {
|
|
2346
|
+
const options = document.querySelectorAll('[role="menuitem"], [role="option"], [data-is-tooltip-wrapper]');
|
|
2347
|
+
for (const opt of options) {
|
|
2348
|
+
const text = opt.textContent?.toLowerCase() || "";
|
|
2349
|
+
if (text.includes("entire screen") || text.includes("your screen") || text.includes("a window")) {
|
|
2350
|
+
opt.click();
|
|
2351
|
+
return true;
|
|
2352
|
+
}
|
|
2353
|
+
}
|
|
2354
|
+
return false;
|
|
2355
|
+
});
|
|
2356
|
+
if (!firstOption) return errorResult("Could not select screen sharing option. The sharing menu may have changed.");
|
|
2357
|
+
}
|
|
2358
|
+
await delay(2e3);
|
|
2359
|
+
const isSharing = await page.evaluate(() => {
|
|
2360
|
+
const indicators = document.querySelectorAll('[aria-label*="Stop presenting"], [aria-label*="stop sharing"], [aria-label*="You are presenting"]');
|
|
2361
|
+
return indicators.length > 0;
|
|
2362
|
+
});
|
|
2363
|
+
return jsonResult({
|
|
2364
|
+
action: "share_screen",
|
|
2365
|
+
status: isSharing ? "sharing" : "initiated",
|
|
2366
|
+
type: "entire_screen",
|
|
2367
|
+
note: isSharing ? 'Screen sharing is active. Use meeting_action(action: "stop_sharing") to stop.' : "Screen sharing was initiated. The OS picker may require manual confirmation if auto-select did not work."
|
|
2368
|
+
});
|
|
2369
|
+
} catch (e) {
|
|
2370
|
+
return errorResult(`Failed to share screen: ${e.message}`);
|
|
2371
|
+
}
|
|
2372
|
+
}
|
|
2373
|
+
case "share_tab": {
|
|
2374
|
+
const targetUrl = params.url || params.targetUrl;
|
|
2375
|
+
try {
|
|
2376
|
+
if (targetUrl) {
|
|
2377
|
+
const newPage = await page.context().newPage();
|
|
2378
|
+
await newPage.goto(targetUrl, { waitUntil: "domcontentloaded", timeout: 15e3 });
|
|
2379
|
+
await delay(1e3);
|
|
2380
|
+
await page.bringToFront();
|
|
2381
|
+
await delay(500);
|
|
2382
|
+
}
|
|
2383
|
+
const clicked = await ariaClick(page, ["Present now", "Share screen", "Present"], 5e3);
|
|
2384
|
+
if (!clicked) return errorResult('Could not find "Present now" button.');
|
|
2385
|
+
await delay(1500);
|
|
2386
|
+
const tabOption = await ariaClick(page, ["A tab", "Chrome tab", "Browser tab"], 5e3);
|
|
2387
|
+
if (!tabOption) {
|
|
2388
|
+
const found = await page.evaluate(() => {
|
|
2389
|
+
const items = document.querySelectorAll('[role="menuitem"], [role="option"], [data-is-tooltip-wrapper]');
|
|
2390
|
+
for (const item of items) {
|
|
2391
|
+
const text = item.textContent?.toLowerCase() || "";
|
|
2392
|
+
if (text.includes("tab")) {
|
|
2393
|
+
item.click();
|
|
2394
|
+
return true;
|
|
2395
|
+
}
|
|
2396
|
+
}
|
|
2397
|
+
return false;
|
|
2398
|
+
});
|
|
2399
|
+
if (!found) return errorResult('Could not find "A tab" option in sharing menu.');
|
|
2400
|
+
}
|
|
2401
|
+
await delay(2e3);
|
|
2402
|
+
const isSharing = await page.evaluate(() => {
|
|
2403
|
+
const indicators = document.querySelectorAll('[aria-label*="Stop presenting"], [aria-label*="stop sharing"], [aria-label*="You are presenting"]');
|
|
2404
|
+
return indicators.length > 0;
|
|
2405
|
+
});
|
|
2406
|
+
return jsonResult({
|
|
2407
|
+
action: "share_tab",
|
|
2408
|
+
status: isSharing ? "sharing" : "initiated",
|
|
2409
|
+
type: "browser_tab",
|
|
2410
|
+
targetUrl: targetUrl || null,
|
|
2411
|
+
note: isSharing ? 'Tab sharing is active. Use meeting_action(action: "stop_sharing") to stop.' : "Tab sharing initiated. You may need to select the tab in the browser picker."
|
|
2412
|
+
});
|
|
2413
|
+
} catch (e) {
|
|
2414
|
+
return errorResult(`Failed to share tab: ${e.message}`);
|
|
2415
|
+
}
|
|
2416
|
+
}
|
|
2417
|
+
case "stop_sharing": {
|
|
2418
|
+
try {
|
|
2419
|
+
const stopped = await ariaClick(page, ["Stop presenting", "Stop sharing", "Stop presentation"], 3e3);
|
|
2420
|
+
if (stopped) {
|
|
2421
|
+
await delay(500);
|
|
2422
|
+
return jsonResult({ action: "stop_sharing", status: "stopped" });
|
|
2423
|
+
}
|
|
2424
|
+
const clickedStop = await page.evaluate(() => {
|
|
2425
|
+
const buttons = document.querySelectorAll("button");
|
|
2426
|
+
for (const btn of buttons) {
|
|
2427
|
+
const text = btn.textContent?.toLowerCase() || "";
|
|
2428
|
+
const label = btn.getAttribute("aria-label")?.toLowerCase() || "";
|
|
2429
|
+
if (text.includes("stop") && (text.includes("present") || text.includes("shar")) || label.includes("stop") && (label.includes("present") || label.includes("shar"))) {
|
|
2430
|
+
btn.click();
|
|
2431
|
+
return true;
|
|
2432
|
+
}
|
|
2433
|
+
}
|
|
2434
|
+
return false;
|
|
2435
|
+
});
|
|
2436
|
+
if (clickedStop) {
|
|
2437
|
+
await delay(500);
|
|
2438
|
+
return jsonResult({ action: "stop_sharing", status: "stopped" });
|
|
2439
|
+
}
|
|
2440
|
+
return jsonResult({ action: "stop_sharing", status: "no_active_share", note: "No active screen share was detected." });
|
|
2441
|
+
} catch (e) {
|
|
2442
|
+
return errorResult(`Failed to stop sharing: ${e.message}`);
|
|
2443
|
+
}
|
|
2444
|
+
}
|
|
2445
|
+
case "leave": {
|
|
2446
|
+
removeMonitor(agentId);
|
|
2447
|
+
if (runtimeRef?.setKeepAlive && runtimeRef?.getCurrentSessionId) {
|
|
2448
|
+
const sid = runtimeRef.getCurrentSessionId();
|
|
2449
|
+
if (sid) runtimeRef.setKeepAlive(sid, false);
|
|
2450
|
+
}
|
|
2451
|
+
await ariaClick(page, ["Leave call"], 3e3);
|
|
2452
|
+
return jsonResult({ action: "leave", status: "left_meeting", monitorStopped: true, keepAliveReleased: true });
|
|
2453
|
+
}
|
|
2454
|
+
default:
|
|
2455
|
+
return errorResult(`Unknown action: "${action}". Supported: screenshot, read_captions, read_chat, chat, participants, toggle_captions, share_screen, share_tab, stop_sharing, leave`);
|
|
2456
|
+
}
|
|
2457
|
+
} catch (e) {
|
|
2458
|
+
return errorResult(e.message);
|
|
2459
|
+
}
|
|
2460
|
+
}
|
|
2461
|
+
},
|
|
2462
|
+
// ─── Detect Meeting Invites in Email ───────────────
|
|
2463
|
+
{
|
|
2464
|
+
name: "meetings_scan_inbox",
|
|
2465
|
+
description: "Scan recent emails for meeting invitations and extract meeting links.",
|
|
2466
|
+
category: "utility",
|
|
2467
|
+
parameters: {
|
|
2468
|
+
type: "object",
|
|
2469
|
+
properties: {
|
|
2470
|
+
hours: { type: "number", description: "Scan emails from last N hours (default: 24)" },
|
|
2471
|
+
maxResults: { type: "number", description: "Max emails to scan (default: 30)" }
|
|
2472
|
+
},
|
|
2473
|
+
required: []
|
|
2474
|
+
},
|
|
2475
|
+
async execute(_id, params) {
|
|
2476
|
+
try {
|
|
2477
|
+
const token = await tp.getAccessToken();
|
|
2478
|
+
const hours = params.hours || 24;
|
|
2479
|
+
const after = new Date(Date.now() - hours * 36e5);
|
|
2480
|
+
const dateStr = `${after.getFullYear()}/${String(after.getMonth() + 1).padStart(2, "0")}/${String(after.getDate()).padStart(2, "0")}`;
|
|
2481
|
+
const GMAIL_BASE = "https://gmail.googleapis.com/gmail/v1/users/me";
|
|
2482
|
+
const query = `after:${dateStr} (meet.google.com OR zoom.us OR teams.microsoft.com OR "meeting invitation" OR filename:ics)`;
|
|
2483
|
+
const searchUrl = new URL(`${GMAIL_BASE}/messages`);
|
|
2484
|
+
searchUrl.searchParams.set("q", query);
|
|
2485
|
+
searchUrl.searchParams.set("maxResults", String(params.maxResults || 30));
|
|
2486
|
+
const searchRes = await fetch(searchUrl.toString(), { headers: { Authorization: `Bearer ${token}` } });
|
|
2487
|
+
if (!searchRes.ok) throw new Error(`Gmail search failed: ${searchRes.status}`);
|
|
2488
|
+
const searchData = await searchRes.json();
|
|
2489
|
+
if (!searchData.messages?.length) {
|
|
2490
|
+
return jsonResult({ meetings: [], count: 0, message: "No meeting invites found in the last " + hours + " hours" });
|
|
2491
|
+
}
|
|
2492
|
+
const meetings = [];
|
|
2493
|
+
for (const msg of searchData.messages.slice(0, 20)) {
|
|
2494
|
+
try {
|
|
2495
|
+
let walkParts2 = function(part) {
|
|
2496
|
+
if (part.body?.data) bodyText += Buffer.from(part.body.data.replace(/-/g, "+").replace(/_/g, "/"), "base64").toString("utf-8") + " ";
|
|
2497
|
+
if (part.parts) part.parts.forEach(walkParts2);
|
|
2498
|
+
};
|
|
2499
|
+
var walkParts = walkParts2;
|
|
2500
|
+
const msgRes = await fetch(`${GMAIL_BASE}/messages/${msg.id}?format=full`, { headers: { Authorization: `Bearer ${token}` } });
|
|
2501
|
+
if (!msgRes.ok) continue;
|
|
2502
|
+
const msgData = await msgRes.json();
|
|
2503
|
+
const headers = msgData.payload?.headers || [];
|
|
2504
|
+
const subject = headers.find((h) => h.name?.toLowerCase() === "subject")?.value || "";
|
|
2505
|
+
const from = headers.find((h) => h.name?.toLowerCase() === "from")?.value || "";
|
|
2506
|
+
const date = headers.find((h) => h.name?.toLowerCase() === "date")?.value || "";
|
|
2507
|
+
let bodyText = "";
|
|
2508
|
+
walkParts2(msgData.payload);
|
|
2509
|
+
const meetLinks = [];
|
|
2510
|
+
const patterns = [
|
|
2511
|
+
{ regex: /https:\/\/meet\.google\.com\/[a-z-]+/gi, platform: "google_meet" },
|
|
2512
|
+
{ regex: /https:\/\/[\w.-]*zoom\.us\/[jw]\/[\d?=&\w]+/gi, platform: "zoom" },
|
|
2513
|
+
{ regex: /https:\/\/teams\.microsoft\.com\/l\/meetup-join\/[^\s"<>]+/gi, platform: "teams" }
|
|
2514
|
+
];
|
|
2515
|
+
for (const p of patterns) {
|
|
2516
|
+
const matches = bodyText.match(p.regex);
|
|
2517
|
+
if (matches) for (const url of [...new Set(matches)]) meetLinks.push({ platform: p.platform, url });
|
|
2518
|
+
}
|
|
2519
|
+
if (meetLinks.length > 0) meetings.push({ messageId: msg.id, subject, from, date, meetingLinks: meetLinks, snippet: msgData.snippet?.slice(0, 200) });
|
|
2520
|
+
} catch {
|
|
2521
|
+
}
|
|
2522
|
+
}
|
|
2523
|
+
return jsonResult({ meetings, count: meetings.length, scannedEmails: searchData.messages.length });
|
|
2524
|
+
} catch (e) {
|
|
2525
|
+
return errorResult(e.message);
|
|
2526
|
+
}
|
|
2527
|
+
}
|
|
2528
|
+
},
|
|
2529
|
+
// ─── RSVP to Meeting ───────────────────────────────
|
|
2530
|
+
{
|
|
2531
|
+
name: "meeting_rsvp",
|
|
2532
|
+
description: "Accept or decline a Google Calendar meeting invitation.",
|
|
2533
|
+
category: "utility",
|
|
2534
|
+
parameters: {
|
|
2535
|
+
type: "object",
|
|
2536
|
+
properties: {
|
|
2537
|
+
eventId: { type: "string", description: "Calendar event ID (required)" },
|
|
2538
|
+
response: { type: "string", description: '"accepted", "declined", or "tentative" (required)' },
|
|
2539
|
+
calendarId: { type: "string", description: 'Calendar ID (default: "primary")' }
|
|
2540
|
+
},
|
|
2541
|
+
required: ["eventId", "response"]
|
|
2542
|
+
},
|
|
2543
|
+
async execute(_id, params) {
|
|
2544
|
+
try {
|
|
2545
|
+
const token = await tp.getAccessToken();
|
|
2546
|
+
const calendarId = params.calendarId || "primary";
|
|
2547
|
+
const email = tp.getEmail();
|
|
2548
|
+
const event = await calendarApi(token, `/calendars/${encodeURIComponent(calendarId)}/events/${params.eventId}`);
|
|
2549
|
+
const attendees = (event.attendees || []).map((a) => {
|
|
2550
|
+
if (a.self || a.email === email) return { ...a, responseStatus: params.response };
|
|
2551
|
+
return a;
|
|
2552
|
+
});
|
|
2553
|
+
if (!attendees.find((a) => a.self || a.email === email)) {
|
|
2554
|
+
attendees.push({ email, responseStatus: params.response });
|
|
2555
|
+
}
|
|
2556
|
+
const updated = await calendarApi(token, `/calendars/${encodeURIComponent(calendarId)}/events/${params.eventId}`, {
|
|
2557
|
+
method: "PATCH",
|
|
2558
|
+
query: { sendUpdates: "all" },
|
|
2559
|
+
body: { attendees }
|
|
2560
|
+
});
|
|
2561
|
+
const link = extractMeetingLink(updated);
|
|
2562
|
+
return jsonResult({
|
|
2563
|
+
rsvp: params.response,
|
|
2564
|
+
eventId: params.eventId,
|
|
2565
|
+
title: updated.summary,
|
|
2566
|
+
start: updated.start?.dateTime || updated.start?.date,
|
|
2567
|
+
meetingLink: link?.url
|
|
2568
|
+
});
|
|
2569
|
+
} catch (e) {
|
|
2570
|
+
return errorResult(e.message);
|
|
2571
|
+
}
|
|
2572
|
+
}
|
|
2573
|
+
}
|
|
2574
|
+
];
|
|
2575
|
+
}
|
|
2576
|
+
|
|
2577
|
+
export {
|
|
2578
|
+
createBrowserTool,
|
|
2579
|
+
createMeetingVoiceTools,
|
|
2580
|
+
ensureBrowser2 as ensureBrowser,
|
|
2581
|
+
sendChatMessage,
|
|
2582
|
+
createMeetingTools
|
|
2583
|
+
};
|