chrome-ai-bridge 1.0.5 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -256,7 +256,7 @@ export const askChatGPTWeb = defineTool({
256
256
  response.appendResponseLine(`既存のプロジェクトチャットを使用: ${latestSession.url}`);
257
257
  }
258
258
  else {
259
- response.appendResponseLine('既存チャットが見つかりませんでした。新規作成します。');
259
+ response.appendResponseLine('📝 新規チャットを作成します');
260
260
  isNewChat = true;
261
261
  }
262
262
  }
@@ -0,0 +1,370 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2025 Google LLC
4
+ * SPDX-License-Identifier: Apache-2.0
5
+ */
6
+ import fs from 'node:fs';
7
+ import path from 'node:path';
8
+ import os from 'node:os';
9
+ import { Jimp } from 'jimp';
10
+ import z from 'zod';
11
+ import { GEMINI_CONFIG } from '../config.js';
12
+ import { getLoginStatus, waitForLoginStatus, LoginStatus, } from '../login-helper.js';
13
+ import { ToolCategories } from './categories.js';
14
+ import { defineTool } from './ToolDefinition.js';
15
+ /**
16
+ * Default crop margin in pixels (will be adjusted based on actual watermark size)
17
+ */
18
+ const DEFAULT_CROP_MARGIN = 80;
19
+ /**
20
+ * Navigate with retry logic
21
+ */
22
+ async function navigateWithRetry(page, url, options = { waitUntil: 'networkidle2', maxRetries: 3 }) {
23
+ const { waitUntil, maxRetries = 3 } = options;
24
+ let lastError = null;
25
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
26
+ try {
27
+ await page.goto(url, { waitUntil, timeout: 30000 });
28
+ return;
29
+ }
30
+ catch (error) {
31
+ lastError = error instanceof Error ? error : new Error(String(error));
32
+ const isRetryable = lastError.message.includes('ERR_ABORTED') ||
33
+ lastError.message.includes('net::ERR_');
34
+ if (!isRetryable || attempt === maxRetries) {
35
+ throw lastError;
36
+ }
37
+ await new Promise(resolve => setTimeout(resolve, 1000 * attempt));
38
+ }
39
+ }
40
+ throw lastError;
41
+ }
42
+ /**
43
+ * Find or create a dedicated Gemini tab
44
+ */
45
+ async function getOrCreateGeminiPage(context) {
46
+ await context.createPagesSnapshot();
47
+ const pages = context.getPages();
48
+ for (const page of pages) {
49
+ const url = page.url();
50
+ if (url.includes('gemini.google.com')) {
51
+ await page.bringToFront();
52
+ return page;
53
+ }
54
+ }
55
+ const newPage = await context.newPage();
56
+ return newPage;
57
+ }
58
+ /**
59
+ * Enhance prompt for better watermark cropping
60
+ * Adds composition requirements to center the subject and use solid background
61
+ */
62
+ function enhancePromptForCropping(prompt) {
63
+ const compositionRequirements = `
64
+
65
+ Composition requirements:
66
+ - Center the main subject with generous padding on all sides (at least 15% margin from edges)
67
+ - Use a clean, solid background color
68
+ - Ensure no important elements touch the image edges, especially the bottom-right corner`;
69
+ return prompt + compositionRequirements;
70
+ }
71
+ /**
72
+ * Crop image to remove watermark (uniform crop from all sides)
73
+ */
74
+ async function cropWatermark(inputPath, outputPath, margin = DEFAULT_CROP_MARGIN) {
75
+ const image = await Jimp.read(inputPath);
76
+ const { width, height } = image;
77
+ // Crop from all sides
78
+ const newWidth = width - margin * 2;
79
+ const newHeight = height - margin * 2;
80
+ if (newWidth <= 0 || newHeight <= 0) {
81
+ throw new Error(`Image too small to crop: ${width}x${height} with margin ${margin}`);
82
+ }
83
+ image.crop({ x: margin, y: margin, w: newWidth, h: newHeight });
84
+ await image.write(outputPath);
85
+ return { width: newWidth, height: newHeight };
86
+ }
87
+ /**
88
+ * Wait for download to complete and return the file path
89
+ * Looks for new image files (png, jpg, jpeg) in the download directory
90
+ */
91
+ async function waitForDownload(downloadDir, timeoutMs = 60000) {
92
+ const startTime = Date.now();
93
+ const checkInterval = 1000; // Check every second
94
+ // Get initial files with their mtimes
95
+ const initialFiles = new Map();
96
+ try {
97
+ const files = await fs.promises.readdir(downloadDir);
98
+ for (const f of files) {
99
+ if (/\.(png|jpg|jpeg)$/i.test(f)) {
100
+ const stat = await fs.promises.stat(path.join(downloadDir, f));
101
+ initialFiles.set(f, stat.mtime.getTime());
102
+ }
103
+ }
104
+ }
105
+ catch {
106
+ // Directory might not exist, continue
107
+ }
108
+ while (Date.now() - startTime < timeoutMs) {
109
+ await new Promise(resolve => setTimeout(resolve, checkInterval));
110
+ try {
111
+ const currentFiles = await fs.promises.readdir(downloadDir);
112
+ for (const f of currentFiles) {
113
+ // Only check image files
114
+ if (!/\.(png|jpg|jpeg)$/i.test(f))
115
+ continue;
116
+ // Skip incomplete downloads
117
+ if (f.endsWith('.crdownload') || f.endsWith('.tmp'))
118
+ continue;
119
+ const filePath = path.join(downloadDir, f);
120
+ const stat = await fs.promises.stat(filePath);
121
+ const mtime = stat.mtime.getTime();
122
+ // Check if this is a new file or modified after we started
123
+ const initialMtime = initialFiles.get(f);
124
+ if (!initialMtime || mtime > initialMtime) {
125
+ // Verify file is complete (size > 0 and not growing)
126
+ await new Promise(resolve => setTimeout(resolve, 500));
127
+ const stat2 = await fs.promises.stat(filePath);
128
+ if (stat2.size > 0 && stat2.size === stat.size) {
129
+ return filePath;
130
+ }
131
+ }
132
+ }
133
+ }
134
+ catch {
135
+ // Continue on error
136
+ }
137
+ }
138
+ throw new Error(`Download timeout after ${timeoutMs}ms`);
139
+ }
140
+ export const askGeminiImage = defineTool({
141
+ name: 'ask_gemini_image',
142
+ description: 'Generate image using Gemini (Nano Banana / 3 Preview) via browser. ' +
143
+ 'Automatically crops watermark from edges. ' +
144
+ 'Rate limit: ~2 images/day for free users.',
145
+ annotations: {
146
+ category: ToolCategories.NAVIGATION_AUTOMATION,
147
+ readOnlyHint: false,
148
+ },
149
+ schema: {
150
+ prompt: z
151
+ .string()
152
+ .describe('Image generation prompt. Use natural language descriptions. ' +
153
+ 'Structure: [Subject + Adjectives] doing [Action] in [Location/Context]. ' +
154
+ '[Composition/Camera Angle]. [Lighting/Atmosphere]. [Style/Media]. ' +
155
+ 'HEX color codes like "#9F2B68" are supported.'),
156
+ outputPath: z
157
+ .string()
158
+ .describe('Output file path for the generated image. ' +
159
+ 'Will be cropped to remove watermark. Example: /tmp/generated-image.png'),
160
+ cropMargin: z
161
+ .number()
162
+ .optional()
163
+ .describe(`Pixels to crop from each edge to remove watermark. Default: ${DEFAULT_CROP_MARGIN}`),
164
+ skipCrop: z
165
+ .boolean()
166
+ .optional()
167
+ .describe('Skip watermark cropping (keep original image). Default: false'),
168
+ },
169
+ handler: async (request, response, context) => {
170
+ const { prompt, outputPath, cropMargin = DEFAULT_CROP_MARGIN, skipCrop = false, } = request.params;
171
+ const page = await getOrCreateGeminiPage(context);
172
+ try {
173
+ response.appendResponseLine('Geminiに接続中...');
174
+ // Navigate to Gemini
175
+ await navigateWithRetry(page, GEMINI_CONFIG.BASE_URL + 'app', {
176
+ waitUntil: 'networkidle2',
177
+ });
178
+ // Wait for UI to stabilize
179
+ try {
180
+ await Promise.race([
181
+ page.waitForSelector('button[aria-label*="Account"], button[aria-label*="アカウント"]', { timeout: 10000 }),
182
+ page.waitForSelector('[role="textbox"]', { timeout: 10000 }),
183
+ ]);
184
+ }
185
+ catch {
186
+ response.appendResponseLine('⚠️ UI安定化待機タイムアウト(続行)');
187
+ }
188
+ // Check login
189
+ const loginStatus = await getLoginStatus(page, 'gemini');
190
+ if (loginStatus === LoginStatus.NEEDS_LOGIN) {
191
+ response.appendResponseLine('\n❌ Geminiへのログインが必要です');
192
+ response.appendResponseLine('📱 ブラウザでGoogleアカウントにログインしてください');
193
+ const finalStatus = await waitForLoginStatus(page, 'gemini', 120000, msg => response.appendResponseLine(msg));
194
+ if (finalStatus !== LoginStatus.LOGGED_IN) {
195
+ response.appendResponseLine('❌ ログインタイムアウト');
196
+ return;
197
+ }
198
+ }
199
+ response.appendResponseLine('✅ ログイン確認完了');
200
+ // Enhance prompt for better cropping
201
+ const enhancedPrompt = enhancePromptForCropping(prompt);
202
+ response.appendResponseLine('プロンプトを送信中...');
203
+ // Input enhanced prompt
204
+ const questionSent = await page.evaluate(promptText => {
205
+ const clearElement = (el) => {
206
+ while (el.firstChild) {
207
+ el.removeChild(el.firstChild);
208
+ }
209
+ };
210
+ const textbox = document.querySelector('[role="textbox"]');
211
+ if (textbox) {
212
+ textbox.focus();
213
+ clearElement(textbox);
214
+ textbox.textContent = promptText;
215
+ textbox.dispatchEvent(new Event('input', { bubbles: true }));
216
+ return true;
217
+ }
218
+ return false;
219
+ }, enhancedPrompt);
220
+ if (!questionSent) {
221
+ response.appendResponseLine('❌ 入力欄が見つかりません');
222
+ return;
223
+ }
224
+ await new Promise(resolve => setTimeout(resolve, 500));
225
+ // Click send button
226
+ const sent = await page.evaluate(() => {
227
+ const buttons = Array.from(document.querySelectorAll('button'));
228
+ const sendButton = buttons.find(b => b.textContent?.includes('プロンプトを送信') ||
229
+ b.textContent?.includes('送信') ||
230
+ b.getAttribute('aria-label')?.includes('送信') ||
231
+ b.getAttribute('aria-label')?.includes('Send'));
232
+ if (sendButton && !sendButton.disabled) {
233
+ sendButton.click();
234
+ return true;
235
+ }
236
+ return false;
237
+ });
238
+ if (!sent) {
239
+ await page.keyboard.press('Enter');
240
+ response.appendResponseLine('⚠️ 送信ボタンが見つかりません (Enterキーを試行)');
241
+ }
242
+ response.appendResponseLine('🎨 画像生成中... (1-2分かかることがあります)');
243
+ // Wait for image generation to complete
244
+ // Look for generated image or download button
245
+ const startTime = Date.now();
246
+ const maxWaitTime = 180000; // 3 minutes
247
+ let imageFound = false;
248
+ while (Date.now() - startTime < maxWaitTime) {
249
+ await new Promise(resolve => setTimeout(resolve, 2000));
250
+ const status = await page.evaluate(() => {
251
+ // Check for generated image
252
+ const images = document.querySelectorAll('img[src*="blob:"], img[src*="generated"]');
253
+ // Check for download button or menu
254
+ const downloadButtons = Array.from(document.querySelectorAll('button, [role="menuitem"]'));
255
+ const hasDownload = downloadButtons.some(b => b.textContent?.includes('ダウンロード') ||
256
+ b.textContent?.includes('Download') ||
257
+ b.getAttribute('aria-label')?.includes('download') ||
258
+ b.getAttribute('aria-label')?.includes('ダウンロード'));
259
+ // Check if still generating
260
+ const isGenerating = document.body.innerText.includes('生成中') ||
261
+ document.body.innerText.includes('Generating') ||
262
+ document.querySelector('[role="progressbar"]') !== null;
263
+ return {
264
+ imageCount: images.length,
265
+ hasDownload,
266
+ isGenerating,
267
+ };
268
+ });
269
+ if (status.imageCount > 0 || status.hasDownload) {
270
+ imageFound = true;
271
+ response.appendResponseLine(`✅ 画像生成完了 (${Math.floor((Date.now() - startTime) / 1000)}秒)`);
272
+ break;
273
+ }
274
+ if (!status.isGenerating && Date.now() - startTime > 30000) {
275
+ // Not generating and no image after 30s - might have failed
276
+ response.appendResponseLine('⚠️ 生成中インジケータが消えました...');
277
+ }
278
+ }
279
+ if (!imageFound) {
280
+ response.appendResponseLine('❌ 画像生成タイムアウト (3分)');
281
+ return;
282
+ }
283
+ // Try to download the image
284
+ response.appendResponseLine('📥 画像をダウンロード中...');
285
+ // Click download button - Gemini uses "フルサイズの画像をダウンロード" button
286
+ const downloadClicked = await page.evaluate(() => {
287
+ const buttons = Array.from(document.querySelectorAll('button'));
288
+ // Look for "フルサイズの画像をダウンロード" or "フルサイズでダウンロード" button
289
+ const downloadBtn = buttons.find(b => {
290
+ const text = b.textContent || '';
291
+ const ariaLabel = b.getAttribute('aria-label') || '';
292
+ const description = b.getAttribute('aria-describedby')
293
+ ? document.getElementById(b.getAttribute('aria-describedby'))?.textContent || ''
294
+ : '';
295
+ return (text.includes('フルサイズ') ||
296
+ text.includes('ダウンロード') ||
297
+ ariaLabel.includes('ダウンロード') ||
298
+ ariaLabel.includes('download') ||
299
+ description.includes('フルサイズ') ||
300
+ description.includes('ダウンロード'));
301
+ });
302
+ if (downloadBtn) {
303
+ downloadBtn.click();
304
+ return true;
305
+ }
306
+ return false;
307
+ });
308
+ if (!downloadClicked) {
309
+ response.appendResponseLine('⚠️ ダウンロードボタンが見つかりません');
310
+ response.appendResponseLine('ヒント: ブラウザで画像を右クリックして保存してください');
311
+ return;
312
+ }
313
+ // Wait for download to start (Gemini shows progress bar)
314
+ response.appendResponseLine('⏳ ダウンロード処理を待機中...');
315
+ await new Promise(resolve => setTimeout(resolve, 3000));
316
+ // Wait for download to complete - check user's Downloads folder
317
+ const userDownloadsDir = path.join(os.homedir(), 'Downloads');
318
+ let downloadedPath;
319
+ try {
320
+ downloadedPath = await waitForDownload(userDownloadsDir, 60000); // 60 seconds
321
+ response.appendResponseLine(`✅ ダウンロード完了: ${path.basename(downloadedPath)}`);
322
+ }
323
+ catch (error) {
324
+ response.appendResponseLine('❌ ダウンロード待機タイムアウト (60秒)');
325
+ response.appendResponseLine('ヒント: ブラウザで画像を右クリックして「画像を保存」してください');
326
+ return;
327
+ }
328
+ // Ensure output directory exists
329
+ const outputDir = path.dirname(outputPath);
330
+ await fs.promises.mkdir(outputDir, { recursive: true });
331
+ // Crop watermark or copy directly
332
+ if (skipCrop) {
333
+ await fs.promises.copyFile(downloadedPath, outputPath);
334
+ response.appendResponseLine(`📄 画像保存(クロップなし): ${outputPath}`);
335
+ }
336
+ else {
337
+ response.appendResponseLine(`✂️ ウォーターマークをクロップ中 (margin: ${cropMargin}px)...`);
338
+ try {
339
+ const { width, height } = await cropWatermark(downloadedPath, outputPath, cropMargin);
340
+ response.appendResponseLine(`✅ クロップ完了: ${width}x${height}px → ${outputPath}`);
341
+ }
342
+ catch (error) {
343
+ const msg = error instanceof Error ? error.message : String(error);
344
+ response.appendResponseLine(`⚠️ クロップ失敗: ${msg}`);
345
+ response.appendResponseLine('元の画像をそのまま保存します...');
346
+ await fs.promises.copyFile(downloadedPath, outputPath);
347
+ }
348
+ }
349
+ // Cleanup temp file
350
+ try {
351
+ await fs.promises.unlink(downloadedPath);
352
+ }
353
+ catch {
354
+ // Ignore cleanup errors
355
+ }
356
+ response.appendResponseLine('\n🎉 画像生成完了!');
357
+ response.appendResponseLine(`📁 出力: ${outputPath}`);
358
+ }
359
+ catch (error) {
360
+ const msg = error instanceof Error ? error.message : String(error);
361
+ if (msg.includes('Target closed') || msg.includes('Session closed')) {
362
+ response.appendResponseLine('❌ ブラウザ接続が切れました');
363
+ response.appendResponseLine('→ MCPサーバーを再起動してください');
364
+ }
365
+ else {
366
+ response.appendResponseLine(`❌ エラー: ${msg}`);
367
+ }
368
+ }
369
+ },
370
+ });
@@ -4,12 +4,14 @@
4
4
  * SPDX-License-Identifier: Apache-2.0
5
5
  */
6
6
  import * as chatgptWebTools from './chatgpt-web.js';
7
+ import * as geminiImageTools from './gemini-image.js';
7
8
  import * as geminiWebTools from './gemini-web.js';
8
9
  /**
9
10
  * All optional (web-llm) tools as an array.
10
11
  */
11
12
  export const optionalTools = [
12
13
  ...Object.values(chatgptWebTools),
14
+ ...Object.values(geminiImageTools),
13
15
  ...Object.values(geminiWebTools),
14
16
  ];
15
17
  /**
@@ -61,9 +63,9 @@ export function getOptionalToolCount() {
61
63
  * Metadata about optional tools for documentation.
62
64
  */
63
65
  export const WEB_LLM_TOOLS_INFO = {
64
- disclaimer: 'Web-LLM tools (ask_chatgpt_web, ask_gemini_web) are experimental and best-effort. ' +
66
+ disclaimer: 'Web-LLM tools (ask_chatgpt_web, ask_gemini_web, ask_gemini_image) are experimental and best-effort. ' +
65
67
  'They depend on specific website UIs and may break when those UIs change. ' +
66
68
  'For production use, consider using official APIs instead.',
67
69
  disableEnvVar: 'MCP_DISABLE_WEB_LLM',
68
- tools: ['ask_chatgpt_web', 'ask_gemini_web'],
70
+ tools: ['ask_chatgpt_web', 'ask_gemini_image', 'ask_gemini_web'],
69
71
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "chrome-ai-bridge",
3
- "version": "1.0.5",
3
+ "version": "1.0.7",
4
4
  "description": "MCP server bridging Chrome browser and AI assistants (ChatGPT, Gemini). Browser automation + AI consultation.",
5
5
  "type": "module",
6
6
  "bin": "./scripts/cli.mjs",
@@ -59,6 +59,7 @@
59
59
  "@modelcontextprotocol/sdk": "1.18.1",
60
60
  "archiver": "^7.0.1",
61
61
  "debug": "4.4.3",
62
+ "jimp": "^1.6.0",
62
63
  "puppeteer": "^24.31.0",
63
64
  "yargs": "18.0.0"
64
65
  },