@bool01master/gemini-web-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,678 @@
1
+ import fs from "node:fs/promises";
2
+ import os from "node:os";
3
+ import path from "node:path";
4
+
5
+ import { chromium } from "playwright-core";
6
+
7
+ const GEMINI_URL = "https://gemini.google.com/";
8
+ const DEFAULT_CHROME_PATH =
9
+ process.env.GEMINI_WEB_CHROME_PATH ||
10
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome";
11
+ const DEFAULT_USER_DATA_DIR = path.resolve(
12
+ process.env.GEMINI_WEB_USER_DATA_DIR ||
13
+ process.env.GEMINI_WEB_PROFILE_DIR ||
14
+ ".chrome-profile",
15
+ );
16
+ const DEFAULT_PROFILE_NAME = process.env.GEMINI_WEB_PROFILE_NAME || "Default";
17
+ const DEFAULT_OUTPUT_DIR = path.resolve(
18
+ process.env.GEMINI_WEB_OUTPUT_DIR || "artifacts",
19
+ );
20
+ const DEFAULT_PROXY_SERVER =
21
+ process.env.GEMINI_WEB_PROXY_SERVER ||
22
+ process.env.ALL_PROXY ||
23
+ process.env.all_proxy ||
24
+ process.env.HTTPS_PROXY ||
25
+ process.env.https_proxy ||
26
+ process.env.HTTP_PROXY ||
27
+ process.env.http_proxy ||
28
+ "";
29
+ const DEFAULT_REMOTE_DEBUGGING_PORT =
30
+ process.env.GEMINI_WEB_REMOTE_DEBUGGING_PORT || "";
31
+ const DEFAULT_CDP_URL =
32
+ process.env.GEMINI_WEB_CDP_URL ||
33
+ (DEFAULT_REMOTE_DEBUGGING_PORT
34
+ ? `http://127.0.0.1:${DEFAULT_REMOTE_DEBUGGING_PORT}`
35
+ : "");
36
+ const VIEWPORT = { width: 1440, height: 1100 };
37
+ const COMPOSER_SELECTORS = [
38
+ 'textarea',
39
+ '[role="textbox"]',
40
+ '[contenteditable="true"]',
41
+ 'div[contenteditable="true"]',
42
+ ];
43
+ const SEND_BUTTON_NAMES = [/^send$/i, /send message/i, /发送/, /提交/];
44
+ const LOGIN_BUTTON_NAMES = [/sign in/i, /login/i, /登录/, /登入/];
45
+ const NEW_CHAT_BUTTON_NAMES = [/new chat/i, /new conversation/i, /新对话/, /新聊天/];
46
+ const STOP_BUTTON_NAMES = [/stop/i, /停止/, /cancel/i, /取消/];
47
+
48
+ function sleep(ms) {
49
+ return new Promise((resolve) => setTimeout(resolve, ms));
50
+ }
51
+
52
+ function normalizeWhitespace(text) {
53
+ return text
54
+ .replace(/\u00a0/g, " ")
55
+ .replace(/[ \t]+\n/g, "\n")
56
+ .replace(/\n{3,}/g, "\n\n")
57
+ .trim();
58
+ }
59
+
60
+ function slugify(text) {
61
+ return text
62
+ .toLowerCase()
63
+ .replace(/[^a-z0-9]+/g, "-")
64
+ .replace(/^-+|-+$/g, "")
65
+ .slice(0, 48) || "prompt";
66
+ }
67
+
68
+ function makeTimestamp() {
69
+ const now = new Date();
70
+ const pad = (value) => String(value).padStart(2, "0");
71
+ return [
72
+ now.getFullYear(),
73
+ pad(now.getMonth() + 1),
74
+ pad(now.getDate()),
75
+ "-",
76
+ pad(now.getHours()),
77
+ pad(now.getMinutes()),
78
+ pad(now.getSeconds()),
79
+ ].join("");
80
+ }
81
+
82
+ function dataUrlToBuffer(dataUrl) {
83
+ const [, base64] = dataUrl.split(",", 2);
84
+ return Buffer.from(base64, "base64");
85
+ }
86
+
87
+ async function ensureDir(dir) {
88
+ await fs.mkdir(dir, { recursive: true });
89
+ }
90
+
91
+ async function firstVisibleLocator(page, selectors, options = {}) {
92
+ const minWidth = options.minWidth ?? 120;
93
+ const minHeight = options.minHeight ?? 24;
94
+ const maxPerSelector = options.maxPerSelector ?? 10;
95
+ const candidates = [];
96
+
97
+ for (const selector of selectors) {
98
+ const locator = page.locator(selector);
99
+ const count = Math.min(await locator.count(), maxPerSelector);
100
+
101
+ for (let index = 0; index < count; index += 1) {
102
+ const item = locator.nth(index);
103
+ const visible = await item.isVisible().catch(() => false);
104
+ if (!visible) {
105
+ continue;
106
+ }
107
+
108
+ const box = await item.boundingBox().catch(() => null);
109
+ if (!box || box.width < minWidth || box.height < minHeight) {
110
+ continue;
111
+ }
112
+
113
+ candidates.push({ box, item });
114
+ }
115
+ }
116
+
117
+ candidates.sort(
118
+ (left, right) =>
119
+ right.box.y + right.box.height - (left.box.y + left.box.height),
120
+ );
121
+
122
+ return candidates[0]?.item || null;
123
+ }
124
+
125
+ async function firstVisibleButton(page, namePatterns) {
126
+ for (const pattern of namePatterns) {
127
+ const button = page.getByRole("button", { name: pattern }).first();
128
+ const visible = await button.isVisible().catch(() => false);
129
+ if (visible) {
130
+ return button;
131
+ }
132
+ }
133
+
134
+ return null;
135
+ }
136
+
137
+ async function pathExists(targetPath) {
138
+ try {
139
+ await fs.access(targetPath);
140
+ return true;
141
+ } catch {
142
+ return false;
143
+ }
144
+ }
145
+
146
+ export class GeminiWebClient {
147
+ constructor(options = {}) {
148
+ this.chromePath = options.chromePath || DEFAULT_CHROME_PATH;
149
+ this.userDataDir = path.resolve(
150
+ options.userDataDir || options.profileDir || DEFAULT_USER_DATA_DIR,
151
+ );
152
+ this.profileName = options.profileName || DEFAULT_PROFILE_NAME;
153
+ this.outputDir = path.resolve(options.outputDir || DEFAULT_OUTPUT_DIR);
154
+ this.proxyServer = options.proxyServer || DEFAULT_PROXY_SERVER;
155
+ this.cdpUrl = options.cdpUrl || DEFAULT_CDP_URL;
156
+ this.browser = null;
157
+ this.context = null;
158
+ this.page = null;
159
+ this.headless = null;
160
+ this.connectionMode = "launch";
161
+ }
162
+
163
+ async openSession(options = {}) {
164
+ const headless = options.headless ?? false;
165
+ await this.ensurePage({ headless });
166
+
167
+ return this.getLoginStatus();
168
+ }
169
+
170
+ async getLoginStatus() {
171
+ if (!this.context) {
172
+ await this.ensurePage({ headless: false });
173
+ }
174
+
175
+ const composer = await this.findComposer();
176
+ const page = await this.ensurePageReference();
177
+ const loginButton = await firstVisibleButton(page, LOGIN_BUTTON_NAMES);
178
+
179
+ return {
180
+ ready: Boolean(composer),
181
+ url: page.url(),
182
+ title: await page.title(),
183
+ headless: this.headless,
184
+ profileDir: this.userDataDir,
185
+ userDataDir: this.userDataDir,
186
+ profileName: this.profileName,
187
+ outputDir: this.outputDir,
188
+ loginButtonVisible: Boolean(loginButton),
189
+ chromePath: this.chromePath,
190
+ proxyServer: this.proxyServer || null,
191
+ cdpUrl: this.cdpUrl || null,
192
+ connectionMode: this.connectionMode,
193
+ };
194
+ }
195
+
196
+ async runPrompt(options) {
197
+ const prompt = options.prompt?.trim();
198
+ if (!prompt) {
199
+ throw new Error("`prompt` cannot be empty.");
200
+ }
201
+
202
+ const headless = options.headless ?? false;
203
+ const waitTimeoutMs = options.waitTimeoutMs ?? 120000;
204
+ const maxImages = options.maxImages ?? 4;
205
+ const newChat = options.newChat ?? true;
206
+ const takeScreenshot = options.takeScreenshot ?? true;
207
+
208
+ await this.ensurePage({ headless });
209
+ await this.navigateHome();
210
+
211
+ const status = await this.getLoginStatus();
212
+ if (!status.ready) {
213
+ throw new Error(
214
+ [
215
+ "Gemini session is not logged in yet.",
216
+ `Chrome user data dir: ${this.userDataDir}`,
217
+ `Chrome profile name: ${this.profileName}`,
218
+ "Run `gemini_open_session`, sign in in the opened window, then retry.",
219
+ ].join(" "),
220
+ );
221
+ }
222
+
223
+ if (newChat) {
224
+ await this.tryOpenNewChat();
225
+ }
226
+
227
+ const page = await this.ensurePageReference();
228
+ const composer = await this.findComposer();
229
+ if (!composer) {
230
+ throw new Error(
231
+ "Unable to find the Gemini prompt input box. The page layout may have changed.",
232
+ );
233
+ }
234
+
235
+ const beforeState = await this.capturePageState();
236
+
237
+ await composer.click({ force: true }).catch(async () => composer.click());
238
+ await page.keyboard.press("Meta+A").catch(() => {});
239
+ await page.keyboard.press("Backspace").catch(() => {});
240
+ await composer.pressSequentially(prompt, { delay: 14 });
241
+
242
+ const started = await this.sendPromptAndWaitForStart(beforeState);
243
+ if (!started) {
244
+ throw new Error(
245
+ "The prompt was typed, but Gemini did not appear to start responding.",
246
+ );
247
+ }
248
+
249
+ const finalState = await this.waitForResponseCompletion(
250
+ beforeState,
251
+ waitTimeoutMs,
252
+ );
253
+ const runDir = await this.createRunDir(prompt);
254
+ const screenshotPath = takeScreenshot
255
+ ? path.join(runDir, "page.png")
256
+ : null;
257
+
258
+ if (screenshotPath) {
259
+ await page.screenshot({ path: screenshotPath, fullPage: true });
260
+ }
261
+
262
+ const imagePaths = await this.extractVisibleImages({ runDir, maxImages });
263
+ const result = {
264
+ prompt,
265
+ ready: true,
266
+ title: await page.title(),
267
+ url: page.url(),
268
+ text: finalState.text,
269
+ textLength: finalState.textLength,
270
+ imageCountOnPage: finalState.imageCount,
271
+ imagePaths,
272
+ screenshotPath,
273
+ runDir,
274
+ profileDir: this.userDataDir,
275
+ userDataDir: this.userDataDir,
276
+ profileName: this.profileName,
277
+ outputDir: this.outputDir,
278
+ headless: this.headless,
279
+ hostname: os.hostname(),
280
+ proxyServer: this.proxyServer || null,
281
+ cdpUrl: this.cdpUrl || null,
282
+ connectionMode: this.connectionMode,
283
+ };
284
+
285
+ await fs.writeFile(
286
+ path.join(runDir, "result.json"),
287
+ JSON.stringify(result, null, 2),
288
+ "utf8",
289
+ );
290
+
291
+ return result;
292
+ }
293
+
294
+ async closeSession() {
295
+ if (this.context) {
296
+ if (this.browser) {
297
+ await this.browser.close();
298
+ } else {
299
+ await this.context.close();
300
+ }
301
+ }
302
+
303
+ this.browser = null;
304
+ this.context = null;
305
+ this.page = null;
306
+ this.headless = null;
307
+ this.connectionMode = "launch";
308
+
309
+ return {
310
+ closed: true,
311
+ profileDir: this.userDataDir,
312
+ userDataDir: this.userDataDir,
313
+ profileName: this.profileName,
314
+ outputDir: this.outputDir,
315
+ proxyServer: this.proxyServer || null,
316
+ cdpUrl: this.cdpUrl || null,
317
+ connectionMode: this.connectionMode,
318
+ };
319
+ }
320
+
321
+ async ensurePage(options = {}) {
322
+ const headless = options.headless ?? false;
323
+
324
+ if (this.context) {
325
+ await this.ensurePageReference();
326
+ if (!headless) {
327
+ await this.page.bringToFront().catch(() => {});
328
+ }
329
+ return this.page;
330
+ }
331
+
332
+ if (this.cdpUrl) {
333
+ this.browser = await chromium.connectOverCDP(this.cdpUrl);
334
+ this.connectionMode = "cdp";
335
+ this.context = this.browser.contexts()[0];
336
+ if (!this.context) {
337
+ throw new Error(
338
+ `No default browser context was available at ${this.cdpUrl}.`,
339
+ );
340
+ }
341
+
342
+ this.context.on("page", (page) => {
343
+ this.page = page;
344
+ });
345
+
346
+ this.page =
347
+ this.context
348
+ .pages()
349
+ .find((page) => /gemini\.google\.com/i.test(page.url())) ||
350
+ this.context.pages()[0] ||
351
+ (await this.context.newPage());
352
+ this.page.setDefaultTimeout(30000);
353
+ this.page.setDefaultNavigationTimeout(120000);
354
+ this.headless = false;
355
+ return this.page;
356
+ }
357
+
358
+ if (!(await pathExists(this.chromePath))) {
359
+ throw new Error(
360
+ `Chrome executable not found at ${this.chromePath}. Set GEMINI_WEB_CHROME_PATH if needed.`,
361
+ );
362
+ }
363
+
364
+ await ensureDir(this.userDataDir);
365
+ await ensureDir(this.outputDir);
366
+
367
+ this.context = await chromium.launchPersistentContext(this.userDataDir, {
368
+ headless,
369
+ executablePath: this.chromePath,
370
+ proxy: this.proxyServer ? { server: this.proxyServer } : undefined,
371
+ viewport: VIEWPORT,
372
+ acceptDownloads: true,
373
+ args: [
374
+ "--no-first-run",
375
+ "--disable-default-browser-check",
376
+ `--profile-directory=${this.profileName}`,
377
+ ],
378
+ });
379
+ this.connectionMode = "launch";
380
+ this.headless = headless;
381
+
382
+ this.context.on("page", (page) => {
383
+ this.page = page;
384
+ });
385
+
386
+ this.page = this.context.pages()[0] || (await this.context.newPage());
387
+ this.page.setDefaultTimeout(30000);
388
+ this.page.setDefaultNavigationTimeout(120000);
389
+
390
+ await this.navigateHome();
391
+ if (!headless) {
392
+ await this.page.bringToFront().catch(() => {});
393
+ }
394
+
395
+ return this.page;
396
+ }
397
+
398
+ async ensurePageReference() {
399
+ if (!this.context) {
400
+ throw new Error("Browser session is not open.");
401
+ }
402
+
403
+ if (!this.page || this.page.isClosed()) {
404
+ this.page =
405
+ this.context.pages().find((page) => !page.isClosed()) ||
406
+ (await this.context.newPage());
407
+ this.page.setDefaultTimeout(30000);
408
+ this.page.setDefaultNavigationTimeout(120000);
409
+ }
410
+
411
+ return this.page;
412
+ }
413
+
414
+ async navigateHome() {
415
+ const page = await this.ensurePageReference();
416
+ await page.goto(GEMINI_URL, {
417
+ waitUntil: "commit",
418
+ timeout: 120000,
419
+ });
420
+ await sleep(2500);
421
+ }
422
+
423
+ async findComposer() {
424
+ const page = await this.ensurePageReference();
425
+ return firstVisibleLocator(page, COMPOSER_SELECTORS, {
426
+ minWidth: 160,
427
+ minHeight: 24,
428
+ maxPerSelector: 12,
429
+ });
430
+ }
431
+
432
+ async tryOpenNewChat() {
433
+ const page = await this.ensurePageReference();
434
+ const button = await firstVisibleButton(page, NEW_CHAT_BUTTON_NAMES);
435
+
436
+ if (!button) {
437
+ return false;
438
+ }
439
+
440
+ await button.click().catch(() => {});
441
+ await sleep(1500);
442
+ return true;
443
+ }
444
+
445
+ async sendPromptAndWaitForStart(beforeState) {
446
+ const page = await this.ensurePageReference();
447
+
448
+ const sendButton = await firstVisibleButton(page, SEND_BUTTON_NAMES);
449
+ if (sendButton) {
450
+ await sendButton.click().catch(() => {});
451
+ } else {
452
+ await page.keyboard.press("Enter").catch(() => {});
453
+ }
454
+
455
+ if (await this.waitForResponseStart(beforeState, 8000)) {
456
+ return true;
457
+ }
458
+
459
+ if (!sendButton) {
460
+ const secondTryButton = await firstVisibleButton(page, SEND_BUTTON_NAMES);
461
+ if (secondTryButton) {
462
+ await secondTryButton.click().catch(() => {});
463
+ if (await this.waitForResponseStart(beforeState, 8000)) {
464
+ return true;
465
+ }
466
+ }
467
+ }
468
+
469
+ await page.keyboard.press("Meta+Enter").catch(() => {});
470
+ return this.waitForResponseStart(beforeState, 8000);
471
+ }
472
+
473
+ async waitForResponseStart(beforeState, timeoutMs) {
474
+ const startedAt = Date.now();
475
+
476
+ while (Date.now() - startedAt < timeoutMs) {
477
+ const state = await this.capturePageState();
478
+ const changed =
479
+ state.textLength > beforeState.textLength + 20 ||
480
+ state.imageCount > beforeState.imageCount ||
481
+ state.stopVisible;
482
+
483
+ if (changed) {
484
+ return true;
485
+ }
486
+
487
+ await sleep(1000);
488
+ }
489
+
490
+ return false;
491
+ }
492
+
493
+ async waitForResponseCompletion(beforeState, timeoutMs) {
494
+ const startedAt = Date.now();
495
+ let stableSignature = "";
496
+ let stableMs = 0;
497
+
498
+ while (Date.now() - startedAt < timeoutMs) {
499
+ const state = await this.capturePageState();
500
+ const changed =
501
+ state.textLength > beforeState.textLength + 20 ||
502
+ state.imageCount > beforeState.imageCount;
503
+
504
+ if (!changed) {
505
+ await sleep(1200);
506
+ continue;
507
+ }
508
+
509
+ const signature = JSON.stringify({
510
+ textTail: state.text.slice(-1200),
511
+ imageCount: state.imageCount,
512
+ stopVisible: state.stopVisible,
513
+ });
514
+
515
+ if (signature === stableSignature) {
516
+ stableMs += 1200;
517
+ } else {
518
+ stableSignature = signature;
519
+ stableMs = 0;
520
+ }
521
+
522
+ if (stableMs >= 4800 && !state.stopVisible) {
523
+ return state;
524
+ }
525
+
526
+ await sleep(1200);
527
+ }
528
+
529
+ return this.capturePageState();
530
+ }
531
+
532
+ async capturePageState() {
533
+ const page = await this.ensurePageReference();
534
+
535
+ return page.evaluate(
536
+ ({ stopPatterns }) => {
537
+ const root =
538
+ document.querySelector("main, [role='main']") || document.body;
539
+
540
+ const isVisible = (element) => {
541
+ const rect = element.getBoundingClientRect();
542
+ const style = window.getComputedStyle(element);
543
+ return (
544
+ rect.width >= 100 &&
545
+ rect.height >= 40 &&
546
+ style.display !== "none" &&
547
+ style.visibility !== "hidden" &&
548
+ style.opacity !== "0"
549
+ );
550
+ };
551
+
552
+ const text = normalize(root.innerText || document.body.innerText || "");
553
+ const imageCount = Array.from(document.querySelectorAll("img, canvas")).filter(
554
+ (element) => isVisible(element),
555
+ ).length;
556
+ const visibleButtons = Array.from(document.querySelectorAll("button"))
557
+ .map(
558
+ (button) =>
559
+ (button.innerText || button.getAttribute("aria-label") || "").trim(),
560
+ )
561
+ .filter(Boolean)
562
+ .slice(-40);
563
+ const stopVisible = visibleButtons.some((label) =>
564
+ stopPatterns.some((pattern) => new RegExp(pattern, "i").test(label)),
565
+ );
566
+
567
+ return {
568
+ text: text.slice(-20000),
569
+ textLength: text.length,
570
+ imageCount,
571
+ visibleButtons,
572
+ stopVisible,
573
+ };
574
+
575
+ function normalize(value) {
576
+ return value
577
+ .replace(/\u00a0/g, " ")
578
+ .replace(/[ \t]+\n/g, "\n")
579
+ .replace(/\n{3,}/g, "\n\n")
580
+ .trim();
581
+ }
582
+ },
583
+ {
584
+ stopPatterns: STOP_BUTTON_NAMES.map((pattern) => pattern.source),
585
+ },
586
+ );
587
+ }
588
+
589
+ async createRunDir(prompt) {
590
+ const runDir = path.join(
591
+ this.outputDir,
592
+ `${makeTimestamp()}-${slugify(prompt)}`,
593
+ );
594
+ await ensureDir(runDir);
595
+ return runDir;
596
+ }
597
+
598
+ async extractVisibleImages({ runDir, maxImages }) {
599
+ const page = await this.ensurePageReference();
600
+ const images = await page.evaluate(async (limit) => {
601
+ const isVisible = (element) => {
602
+ const rect = element.getBoundingClientRect();
603
+ const style = window.getComputedStyle(element);
604
+ return (
605
+ rect.width >= 180 &&
606
+ rect.height >= 180 &&
607
+ style.display !== "none" &&
608
+ style.visibility !== "hidden" &&
609
+ style.opacity !== "0"
610
+ );
611
+ };
612
+
613
+ const score = (element) => {
614
+ const rect = element.getBoundingClientRect();
615
+ return rect.bottom * 100000 + rect.width * rect.height;
616
+ };
617
+
618
+ const nodes = Array.from(document.querySelectorAll("img, canvas"))
619
+ .filter((element) => isVisible(element))
620
+ .sort((left, right) => score(right) - score(left))
621
+ .slice(0, limit);
622
+
623
+ const results = [];
624
+
625
+ for (const element of nodes) {
626
+ const rect = element.getBoundingClientRect();
627
+ let dataUrl = null;
628
+ let error = null;
629
+
630
+ try {
631
+ if (element instanceof HTMLCanvasElement) {
632
+ dataUrl = element.toDataURL("image/png");
633
+ } else if (element instanceof HTMLImageElement) {
634
+ if (element.src.startsWith("data:image/")) {
635
+ dataUrl = element.src;
636
+ } else {
637
+ const canvas = document.createElement("canvas");
638
+ const width = element.naturalWidth || Math.round(rect.width);
639
+ const height = element.naturalHeight || Math.round(rect.height);
640
+ canvas.width = width;
641
+ canvas.height = height;
642
+ const context = canvas.getContext("2d");
643
+ context.drawImage(element, 0, 0, width, height);
644
+ dataUrl = canvas.toDataURL("image/png");
645
+ }
646
+ }
647
+ } catch (caughtError) {
648
+ error = String(caughtError);
649
+ }
650
+
651
+ results.push({
652
+ dataUrl,
653
+ width: Math.round(rect.width),
654
+ height: Math.round(rect.height),
655
+ tagName: element.tagName.toLowerCase(),
656
+ alt: element.alt || "",
657
+ error,
658
+ });
659
+ }
660
+
661
+ return results;
662
+ }, maxImages);
663
+
664
+ const savedPaths = [];
665
+
666
+ for (const [index, image] of images.entries()) {
667
+ if (!image.dataUrl) {
668
+ continue;
669
+ }
670
+
671
+ const targetPath = path.join(runDir, `image-${index + 1}.png`);
672
+ await fs.writeFile(targetPath, dataUrlToBuffer(image.dataUrl));
673
+ savedPaths.push(targetPath);
674
+ }
675
+
676
+ return savedPaths;
677
+ }
678
+ }