@blackbox_ai/blackbox-cli-core 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/dist/src/config/config.d.ts +3 -0
  2. package/dist/src/config/config.js +14 -0
  3. package/dist/src/config/config.js.map +1 -1
  4. package/dist/src/core/openaiContentGenerator/converter.js +8 -0
  5. package/dist/src/core/openaiContentGenerator/converter.js.map +1 -1
  6. package/dist/src/core/openaiContentGenerator/pipeline.d.ts +3 -0
  7. package/dist/src/core/openaiContentGenerator/pipeline.js +38 -1
  8. package/dist/src/core/openaiContentGenerator/pipeline.js.map +1 -1
  9. package/dist/src/core/prompts.js +11 -1
  10. package/dist/src/core/prompts.js.map +1 -1
  11. package/dist/src/generated/git-commit.d.ts +2 -2
  12. package/dist/src/generated/git-commit.js +2 -2
  13. package/dist/src/services/gitService.js +1 -1
  14. package/dist/src/services/gitService.js.map +1 -1
  15. package/dist/src/services/gitService.test.js +1 -1
  16. package/dist/src/services/gitService.test.js.map +1 -1
  17. package/dist/src/services/loopDetectionService.js +2 -2
  18. package/dist/src/services/loopDetectionService.js.map +1 -1
  19. package/dist/src/tools/browser_use.d.ts +106 -0
  20. package/dist/src/tools/browser_use.js +870 -0
  21. package/dist/src/tools/browser_use.js.map +1 -0
  22. package/dist/src/tools/browser_use.test.d.ts +6 -0
  23. package/dist/src/tools/browser_use.test.js +141 -0
  24. package/dist/src/tools/browser_use.test.js.map +1 -0
  25. package/dist/src/tools/shell.js +27 -3
  26. package/dist/src/tools/shell.js.map +1 -1
  27. package/dist/src/tools/tool-error.d.ts +7 -1
  28. package/dist/src/tools/tool-error.js +7 -0
  29. package/dist/src/tools/tool-error.js.map +1 -1
  30. package/dist/src/tools/tool-names.d.ts +8 -0
  31. package/dist/src/tools/tool-names.js +8 -0
  32. package/dist/src/tools/tool-names.js.map +1 -1
  33. package/dist/tsconfig.tsbuildinfo +1 -1
  34. package/package.json +2 -2
@@ -0,0 +1,870 @@
1
+ /**
2
+ * @license
3
+ * Copyright 2025 Google LLC
4
+ * SPDX-License-Identifier: Apache-2.0
5
+ */
6
+ import { chromium } from 'playwright';
7
+ import { ToolNames } from './tool-names.js';
8
+ import { ToolErrorType } from './tool-error.js';
9
+ import { BaseDeclarativeTool, BaseToolInvocation, Kind, } from './tools.js';
10
+ const screenshotPrompt = `Here are the action result, console logs and screenshot after the action execution.
11
+ Carefully review and decide the next steps to complete the task successfully.`;
12
+ /**
13
+ * Singleton browser session manager
14
+ */
15
+ export class ServerBrowserSession {
16
+ browser;
17
+ page;
18
+ currentMousePosition;
19
+ static instance;
20
+ constructor() { }
21
+ static getInstance() {
22
+ if (!ServerBrowserSession.instance) {
23
+ ServerBrowserSession.instance = new ServerBrowserSession();
24
+ }
25
+ return ServerBrowserSession.instance;
26
+ }
27
+ async launchBrowser() {
28
+ console.log('Launching browser...');
29
+ if (this.browser) {
30
+ await this.closeBrowser();
31
+ }
32
+ try {
33
+ this.browser = await chromium.launch({
34
+ args: [
35
+ '--user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/537.36',
36
+ '--no-sandbox',
37
+ '--disable-setuid-sandbox',
38
+ '--disable-dev-shm-usage',
39
+ '--disable-gpu',
40
+ '--disable-web-security',
41
+ '--disable-extensions',
42
+ '--force-device-scale-factor=1',
43
+ '--disable-background-timer-throttling',
44
+ '--disable-backgrounding-occluded-windows',
45
+ '--disable-renderer-backgrounding',
46
+ ],
47
+ headless: true,
48
+ });
49
+ const context = await this.browser.newContext({
50
+ viewport: { width: 900, height: 600 },
51
+ deviceScaleFactor: 1,
52
+ screen: { width: 900, height: 600 },
53
+ ignoreHTTPSErrors: true,
54
+ bypassCSP: true,
55
+ });
56
+ this.page = await context.newPage();
57
+ // Set a default background to ensure screenshots work
58
+ await this.page.addStyleTag({
59
+ content: `
60
+ html, body {
61
+ background-color: white !important;
62
+ min-height: 100vh;
63
+ }
64
+ `,
65
+ });
66
+ console.log('Browser launched successfully with 900x600 viewport');
67
+ return {
68
+ execution_success: true,
69
+ logs: 'Browser session started successfully with 900x600 viewport',
70
+ execution_logs: 'Browser launched and ready for interaction at 900x600 resolution',
71
+ };
72
+ }
73
+ catch (error) {
74
+ const errorMessage = `Failed to launch browser: ${error instanceof Error ? error.message : String(error)}`;
75
+ console.error(`[Error] Exception during Starting browser - ${errorMessage}`);
76
+ return {
77
+ execution_success: false,
78
+ logs: errorMessage,
79
+ execution_logs: errorMessage,
80
+ };
81
+ }
82
+ }
83
+ async closeBrowser() {
84
+ if (this.browser || this.page) {
85
+ console.log('Closing browser...');
86
+ try {
87
+ await this.browser?.close();
88
+ this.browser = undefined;
89
+ this.page = undefined;
90
+ this.currentMousePosition = undefined;
91
+ return {
92
+ execution_success: true,
93
+ logs: 'Browser session closed successfully',
94
+ execution_logs: 'Browser closed successfully',
95
+ };
96
+ }
97
+ catch (error) {
98
+ const errorMessage = `Error closing browser: ${error instanceof Error ? error.message : String(error)}`;
99
+ console.warn(errorMessage);
100
+ return {
101
+ execution_success: false,
102
+ logs: errorMessage,
103
+ execution_logs: errorMessage,
104
+ };
105
+ }
106
+ }
107
+ return {
108
+ execution_success: true,
109
+ logs: 'Browser was already closed',
110
+ execution_logs: 'No browser to close',
111
+ };
112
+ }
113
+ async waitTillHTMLStable(timeout = 5000) {
114
+ if (!this.page)
115
+ return;
116
+ const checkDurationMs = 500;
117
+ const maxChecks = timeout / checkDurationMs;
118
+ let lastHTMLSize = 0;
119
+ let checkCounts = 1;
120
+ let countStableSizeIterations = 0;
121
+ const minStableSizeIterations = 3;
122
+ while (checkCounts <= maxChecks) {
123
+ try {
124
+ const html = await this.page.content();
125
+ const currentHTMLSize = html.length;
126
+ console.log(`last: ${lastHTMLSize} <> curr: ${currentHTMLSize}`);
127
+ if (lastHTMLSize !== 0 && currentHTMLSize === lastHTMLSize) {
128
+ countStableSizeIterations++;
129
+ }
130
+ else {
131
+ countStableSizeIterations = 0;
132
+ }
133
+ if (countStableSizeIterations >= minStableSizeIterations) {
134
+ console.log('Page rendered fully...');
135
+ break;
136
+ }
137
+ lastHTMLSize = currentHTMLSize;
138
+ await new Promise((resolve) => setTimeout(resolve, checkDurationMs));
139
+ checkCounts++;
140
+ }
141
+ catch (error) {
142
+ console.warn('Error checking HTML stability:', error);
143
+ break;
144
+ }
145
+ }
146
+ }
147
+ async doAction(action) {
148
+ let executionSuccess = true;
149
+ let screenshot;
150
+ if (!this.page) {
151
+ executionSuccess = false;
152
+ throw new Error('Browser is not launched. This may occur if the browser was automatically closed.');
153
+ }
154
+ const logs = [];
155
+ let executionLog = '';
156
+ let lastLogTs = Date.now();
157
+ const consoleListener = (msg) => {
158
+ try {
159
+ if (msg.type() === 'log') {
160
+ logs.push(msg.text());
161
+ }
162
+ else {
163
+ logs.push(`[${msg.type()}] ${msg.text()}`);
164
+ }
165
+ lastLogTs = Date.now();
166
+ }
167
+ catch (error) {
168
+ logs.push(`[Console Error] ${error instanceof Error ? error.message : String(error)}`);
169
+ }
170
+ };
171
+ this.page.on('console', consoleListener);
172
+ try {
173
+ const result = await action();
174
+ executionLog += `\n ${result}`;
175
+ }
176
+ catch (err) {
177
+ executionLog += `\n [Error] ${err instanceof Error ? err.message : String(err)}`;
178
+ executionSuccess = false;
179
+ }
180
+ // Wait for console inactivity
181
+ try {
182
+ await this.waitForConsoleInactivity(lastLogTs);
183
+ }
184
+ catch (error) {
185
+ // Timeout is expected
186
+ }
187
+ try {
188
+ // Ensure page is ready for screenshot
189
+ await this.page.waitForLoadState('domcontentloaded', { timeout: 5000 });
190
+ // Take high-quality screenshot with exact 900x600 dimensions to match viewport
191
+ const screenshotBytes = await this.page.screenshot({
192
+ type: 'png',
193
+ fullPage: false, // Only capture viewport
194
+ clip: { x: 0, y: 0, width: 900, height: 600 }, // Exact viewport dimensions
195
+ omitBackground: false,
196
+ });
197
+ if (screenshotBytes && screenshotBytes.length > 0) {
198
+ const screenshotBase64 = screenshotBytes.toString('base64');
199
+ screenshot = `data:image/png;base64,${screenshotBase64}`;
200
+ // Log screenshot success with dimensions
201
+ console.log(`Screenshot captured: 900x600px, ${screenshotBase64.length} chars, data URI length: ${screenshot.length}`);
202
+ executionLog += `\nScreenshot captured at 900x600 resolution (1:1 scale with viewport)`;
203
+ }
204
+ else {
205
+ console.error('Screenshot capture returned empty buffer');
206
+ executionLog += `\n[Error] Screenshot capture returned empty buffer`;
207
+ }
208
+ }
209
+ catch (error) {
210
+ console.error('Screenshot capture failed:', error);
211
+ executionLog += `\n[Error] Error taking screenshot of the current state of page! ${error instanceof Error ? error.message : String(error)}`;
212
+ // Try alternative screenshot method as fallback
213
+ try {
214
+ console.log('Attempting fallback screenshot method...');
215
+ const fallbackBytes = await this.page.screenshot({
216
+ type: 'png',
217
+ fullPage: false,
218
+ });
219
+ if (fallbackBytes && fallbackBytes.length > 0) {
220
+ const fallbackBase64 = fallbackBytes.toString('base64');
221
+ screenshot = `data:image/png;base64,${fallbackBase64}`;
222
+ console.log(`Fallback screenshot captured: ${fallbackBase64.length} chars`);
223
+ executionLog += `\nFallback screenshot captured successfully`;
224
+ }
225
+ }
226
+ catch (fallbackError) {
227
+ console.error('Fallback screenshot also failed:', fallbackError);
228
+ executionLog += `\n[Error] Fallback screenshot also failed: ${fallbackError instanceof Error ? fallbackError.message : String(fallbackError)}`;
229
+ }
230
+ }
231
+ try {
232
+ this.page.off('console', consoleListener);
233
+ }
234
+ catch (error) {
235
+ console.log(`Error removing console listener: ${error instanceof Error ? error.message : String(error)}`);
236
+ }
237
+ if (executionSuccess) {
238
+ executionLog += '\n Action executed Successfully!';
239
+ }
240
+ return {
241
+ execution_success: executionSuccess,
242
+ screenshot,
243
+ console_logs: logs.join('\n'),
244
+ execution_logs: executionLog,
245
+ currentUrl: this.page.url(),
246
+ currentMousePosition: this.currentMousePosition,
247
+ // Also provide the old format for backward compatibility
248
+ logs: logs.join('\n'),
249
+ };
250
+ }
251
+ async waitForConsoleInactivity(lastLogTs, timeout = 3000) {
252
+ const startTime = Date.now();
253
+ while (Date.now() - lastLogTs < 500 &&
254
+ Date.now() - startTime < timeout) {
255
+ await new Promise((resolve) => setTimeout(resolve, 100));
256
+ }
257
+ }
258
+ async navigateToUrl(url) {
259
+ if (!this.page || !this.browser) {
260
+ const launchResult = await this.launchBrowser();
261
+ if (!launchResult.execution_success) {
262
+ return launchResult;
263
+ }
264
+ }
265
+ return this.doAction(async () => {
266
+ if (!this.page)
267
+ throw new Error('Page not available');
268
+ let executionLog = '';
269
+ console.log(`Navigating to URL: ${url}`);
270
+ try {
271
+ const response = await this.page.goto(url, {
272
+ timeout: 30000,
273
+ waitUntil: 'domcontentloaded',
274
+ });
275
+ if (!response) {
276
+ executionLog += `\nNavigation failed or no response received for URL: ${url}`;
277
+ throw new Error(`Navigation failed or no response received for URL: ${url}`);
278
+ }
279
+ const status = response.status();
280
+ executionLog += `\nNavigated to URL: ${url} (Status: ${status})`;
281
+ if (status >= 400) {
282
+ executionLog += `\nWarning: HTTP status ${status} - page may have errors`;
283
+ }
284
+ // Wait for network to be idle and page to stabilize
285
+ await this.page.waitForLoadState('networkidle', { timeout: 10000 });
286
+ await this.waitTillHTMLStable();
287
+ console.log(`Page navigation completed successfully for: ${url}`);
288
+ }
289
+ catch (error) {
290
+ const errorMsg = error instanceof Error ? error.message : String(error);
291
+ console.error(`Navigation error for ${url}:`, errorMsg);
292
+ executionLog += `\nNavigation error: ${errorMsg}`;
293
+ throw error;
294
+ }
295
+ return executionLog;
296
+ });
297
+ }
298
+ async click(coordinate) {
299
+ const [x, y] = coordinate.split(',').map(Number);
300
+ // Validate coordinates are within viewport bounds
301
+ if (isNaN(x) || isNaN(y)) {
302
+ throw new Error(`Invalid coordinates: ${coordinate}. Must be in "x,y" format with valid numbers.`);
303
+ }
304
+ if (x < 0 || x > 900 || y < 0 || y > 600) {
305
+ throw new Error(`Coordinates (${x}, ${y}) are outside viewport bounds (0-900, 0-600).`);
306
+ }
307
+ return this.doAction(async () => {
308
+ if (!this.page)
309
+ throw new Error('Page not available');
310
+ let hasNetworkActivity = false;
311
+ let executionLog = '';
312
+ console.log(`Clicking at coordinates: (${x}, ${y}) within 900x600 viewport`);
313
+ const requestListener = () => {
314
+ hasNetworkActivity = true;
315
+ };
316
+ try {
317
+ this.page.on('request', requestListener);
318
+ // Move mouse to position first, then click
319
+ await this.page.mouse.move(x, y);
320
+ await this.page.mouse.click(x, y);
321
+ this.currentMousePosition = coordinate;
322
+ executionLog += `\nClick Action Performed at exact coordinates (${x}, ${y})`;
323
+ executionLog += `\nViewport: 900x600 pixels, Click position: ${((x / 900) * 100).toFixed(1)}% from left, ${((y / 600) * 100).toFixed(1)}% from top`;
324
+ // Wait a moment for potential page changes
325
+ await new Promise((resolve) => setTimeout(resolve, 500));
326
+ if (hasNetworkActivity) {
327
+ try {
328
+ console.log('Network activity detected, waiting for page to stabilize...');
329
+ await this.page.waitForLoadState('networkidle', { timeout: 7000 });
330
+ await this.waitTillHTMLStable();
331
+ executionLog += '\nPage updated after click';
332
+ }
333
+ catch (error) {
334
+ // Navigation timeout is common and not necessarily an error
335
+ console.log('Navigation wait timeout (expected for non-navigating clicks)');
336
+ executionLog += '\nClick completed (no page navigation)';
337
+ }
338
+ }
339
+ else {
340
+ executionLog += '\nClick completed (no network activity)';
341
+ }
342
+ console.log('Click action completed successfully');
343
+ }
344
+ catch (error) {
345
+ const errorMsg = error instanceof Error ? error.message : String(error);
346
+ console.error('Click action failed:', errorMsg);
347
+ executionLog += `\nClick error: ${errorMsg}`;
348
+ throw error;
349
+ }
350
+ finally {
351
+ this.page.off('request', requestListener);
352
+ }
353
+ return executionLog;
354
+ });
355
+ }
356
+ async type(text) {
357
+ return this.doAction(async () => {
358
+ if (!this.page)
359
+ throw new Error('Page not available');
360
+ await this.page.keyboard.type(text);
361
+ return 'Type action performed!';
362
+ });
363
+ }
364
+ async scrollDown() {
365
+ return this.doAction(async () => {
366
+ if (!this.page)
367
+ throw new Error('Page not available');
368
+ await this.page.evaluate("window.scrollBy({top: 400, behavior: 'auto'})");
369
+ await new Promise((resolve) => setTimeout(resolve, 300));
370
+ return 'Scroll down action performed!';
371
+ });
372
+ }
373
+ async scrollUp() {
374
+ return this.doAction(async () => {
375
+ if (!this.page)
376
+ throw new Error('Page not available');
377
+ await this.page.evaluate("window.scrollBy({top: -600, behavior: 'auto'})");
378
+ await new Promise((resolve) => setTimeout(resolve, 300));
379
+ return 'Scroll up action performed!';
380
+ });
381
+ }
382
+ }
383
+ class BrowserLaunchToolInvocation extends BaseToolInvocation {
384
+ constructor(params) {
385
+ super(params);
386
+ }
387
+ getDescription() {
388
+ return 'Launching browser with 900x600 viewport';
389
+ }
390
+ async execute() {
391
+ const session = ServerBrowserSession.getInstance();
392
+ const result = await session.launchBrowser();
393
+ console.log('[BrowserLaunchTool] Browser launch completed', {
394
+ success: result.execution_success,
395
+ });
396
+ if (!result.execution_success) {
397
+ return {
398
+ llmContent: result.execution_logs || 'Failed to launch browser',
399
+ returnDisplay: result.logs || 'Failed to launch browser',
400
+ error: {
401
+ message: result.logs || 'Failed to launch browser',
402
+ type: ToolErrorType.BROWSER_LAUNCH_ERROR,
403
+ },
404
+ };
405
+ }
406
+ return {
407
+ llmContent: result.execution_logs || 'Browser launched successfully',
408
+ returnDisplay: result.logs || 'Browser launched successfully',
409
+ };
410
+ }
411
+ }
412
+ export class BrowserLaunchTool extends BaseDeclarativeTool {
413
+ config;
414
+ static Name = ToolNames.BROWSER_LAUNCH;
415
+ // @ts-expect-error - Required by base class pattern
416
+ constructor(config) {
417
+ super(BrowserLaunchTool.Name, 'BrowserLaunch', 'Launches a Puppeteer-controlled browser instance with a 900x600 viewport. This must always be the first browser action before any other browser operations.', Kind.Execute, {
418
+ type: 'object',
419
+ properties: {},
420
+ required: [],
421
+ }, false);
422
+ this.config = config;
423
+ }
424
+ createInvocation(params) {
425
+ return new BrowserLaunchToolInvocation(params);
426
+ }
427
+ }
428
+ class BrowserNavigateToolInvocation extends BaseToolInvocation {
429
+ constructor(params) {
430
+ super(params);
431
+ }
432
+ getDescription() {
433
+ return `Navigating to ${this.params.url}`;
434
+ }
435
+ async execute() {
436
+ const session = ServerBrowserSession.getInstance();
437
+ const result = await session.navigateToUrl(this.params.url);
438
+ console.log('[BrowserNavigateTool] Navigation completed', {
439
+ url: this.params.url,
440
+ success: result.execution_success,
441
+ currentUrl: result.currentUrl,
442
+ });
443
+ const llmContent = [
444
+ `${screenshotPrompt}`,
445
+ `URL: ${this.params.url}`,
446
+ `Status: ${result.execution_success ? 'Success' : 'Failed'}`,
447
+ `Current URL: ${result.currentUrl || 'N/A'}`,
448
+ `Console Logs: ${result.console_logs || '(none)'}`,
449
+ `Execution Details: ${result.execution_logs || ''}`,
450
+ ].join('\n');
451
+ if (!result.execution_success) {
452
+ return {
453
+ llmContent,
454
+ returnDisplay: result.logs || 'Navigation failed',
455
+ error: {
456
+ message: result.logs || 'Navigation failed',
457
+ type: ToolErrorType.BROWSER_NAVIGATE_ERROR,
458
+ },
459
+ };
460
+ }
461
+ // Include screenshot in response
462
+ const llmParts = [llmContent];
463
+ if (result.screenshot) {
464
+ llmParts.push({
465
+ inlineData: {
466
+ mimeType: 'image/png',
467
+ data: result.screenshot.split(',')[1], // Remove data:image/png;base64, prefix
468
+ },
469
+ });
470
+ }
471
+ return {
472
+ llmContent: llmParts,
473
+ returnDisplay: result.logs || 'Navigation successful',
474
+ };
475
+ }
476
+ }
477
+ export class BrowserNavigateTool extends BaseDeclarativeTool {
478
+ config;
479
+ static Name = ToolNames.BROWSER_NAVIGATE;
480
+ // @ts-expect-error - Required by base class pattern
481
+ constructor(config) {
482
+ super(BrowserNavigateTool.Name, 'BrowserNavigate', 'Navigates the browser to a specified URL. The browser must be launched first. Returns a screenshot of the page after navigation.', Kind.Execute, {
483
+ type: 'object',
484
+ properties: {
485
+ url: {
486
+ type: 'string',
487
+ description: 'The URL to navigate to (e.g., http://localhost:3000, https://example.com, file:///path/to/file.html)',
488
+ },
489
+ },
490
+ required: ['url'],
491
+ }, false);
492
+ this.config = config;
493
+ }
494
+ validateToolParamValues(params) {
495
+ if (!params.url || params.url.trim() === '') {
496
+ return 'URL parameter must be non-empty';
497
+ }
498
+ // Basic URL validation
499
+ try {
500
+ new URL(params.url);
501
+ }
502
+ catch (error) {
503
+ // Check if it's a file path
504
+ if (!params.url.startsWith('file://') && !params.url.startsWith('http')) {
505
+ return `Invalid URL format: ${params.url}. Must be a valid URL (http://, https://, or file://)`;
506
+ }
507
+ }
508
+ return null;
509
+ }
510
+ createInvocation(params) {
511
+ return new BrowserNavigateToolInvocation(params);
512
+ }
513
+ }
514
+ class BrowserClickToolInvocation extends BaseToolInvocation {
515
+ constructor(params) {
516
+ super(params);
517
+ }
518
+ getDescription() {
519
+ return `Clicking at coordinates ${this.params.coordinate}`;
520
+ }
521
+ async execute() {
522
+ const session = ServerBrowserSession.getInstance();
523
+ const result = await session.click(this.params.coordinate);
524
+ const [x, y] = this.params.coordinate.split(',').map(Number);
525
+ console.log('[BrowserClickTool] Click completed', {
526
+ coordinate: this.params.coordinate,
527
+ success: result.execution_success,
528
+ currentUrl: result.currentUrl,
529
+ });
530
+ const llmContent = [
531
+ `${screenshotPrompt}`,
532
+ `Coordinate: ${this.params.coordinate} (${x}, ${y})`,
533
+ `Status: ${result.execution_success ? 'Success' : 'Failed'}`,
534
+ `Current URL: ${result.currentUrl || 'N/A'}`,
535
+ `Mouse Position: ${result.currentMousePosition || 'N/A'}`,
536
+ `Console Logs: ${result.console_logs || '(none)'}`,
537
+ `Execution Details: ${result.execution_logs || ''}`,
538
+ ].join('\n');
539
+ if (!result.execution_success) {
540
+ return {
541
+ llmContent,
542
+ returnDisplay: result.logs || 'Click failed',
543
+ error: {
544
+ message: result.logs || 'Click failed',
545
+ type: ToolErrorType.BROWSER_CLICK_ERROR,
546
+ },
547
+ };
548
+ }
549
+ // Include screenshot in response
550
+ const llmParts = [llmContent];
551
+ if (result.screenshot) {
552
+ llmParts.push({
553
+ inlineData: {
554
+ mimeType: 'image/png',
555
+ data: result.screenshot.split(',')[1],
556
+ },
557
+ });
558
+ }
559
+ return {
560
+ llmContent: llmParts,
561
+ returnDisplay: result.logs || 'Click successful',
562
+ };
563
+ }
564
+ }
565
+ export class BrowserClickTool extends BaseDeclarativeTool {
566
+ config;
567
+ static Name = ToolNames.BROWSER_CLICK;
568
+ // @ts-expect-error - Required by base class pattern
569
+ constructor(config) {
570
+ super(BrowserClickTool.Name, 'BrowserClick', 'Clicks at a specific x,y coordinate in the browser. The browser window has a resolution of 900x600 pixels. Always click in the center of an element based on coordinates derived from a screenshot. Returns a screenshot after the click.', Kind.Execute, {
571
+ type: 'object',
572
+ properties: {
573
+ coordinate: {
574
+ type: 'string',
575
+ description: 'The X and Y coordinates for the click action in "x,y" format (e.g., "450,300"). Coordinates must be within 0-900 for x and 0-600 for y.',
576
+ },
577
+ },
578
+ required: ['coordinate'],
579
+ }, false);
580
+ this.config = config;
581
+ }
582
+ validateToolParamValues(params) {
583
+ if (!params.coordinate || params.coordinate.trim() === '') {
584
+ return 'Coordinate parameter must be non-empty';
585
+ }
586
+ const parts = params.coordinate.split(',');
587
+ if (parts.length !== 2) {
588
+ return 'Coordinate must be in "x,y" format';
589
+ }
590
+ const [x, y] = parts.map(Number);
591
+ if (isNaN(x) || isNaN(y)) {
592
+ return 'Coordinate values must be valid numbers';
593
+ }
594
+ if (x < 0 || x > 900 || y < 0 || y > 600) {
595
+ return `Coordinates (${x}, ${y}) are outside viewport bounds (0-900, 0-600)`;
596
+ }
597
+ return null;
598
+ }
599
+ createInvocation(params) {
600
+ return new BrowserClickToolInvocation(params);
601
+ }
602
+ }
603
+ class BrowserTypeToolInvocation extends BaseToolInvocation {
604
+ constructor(params) {
605
+ super(params);
606
+ }
607
+ getDescription() {
608
+ const preview = this.params.text.length > 50
609
+ ? this.params.text.substring(0, 50) + '...'
610
+ : this.params.text;
611
+ return `Typing text: "${preview}"`;
612
+ }
613
+ async execute() {
614
+ const session = ServerBrowserSession.getInstance();
615
+ const result = await session.type(this.params.text);
616
+ console.log('[BrowserTypeTool] Type completed', {
617
+ textLength: this.params.text.length,
618
+ success: result.execution_success,
619
+ currentUrl: result.currentUrl,
620
+ });
621
+ const llmContent = [
622
+ `${screenshotPrompt}`,
623
+ `Text: ${this.params.text}`,
624
+ `Status: ${result.execution_success ? 'Success' : 'Failed'}`,
625
+ `Current URL: ${result.currentUrl || 'N/A'}`,
626
+ `Console Logs: ${result.console_logs || '(none)'}`,
627
+ `Execution Details: ${result.execution_logs || ''}`,
628
+ ].join('\n');
629
+ if (!result.execution_success) {
630
+ return {
631
+ llmContent,
632
+ returnDisplay: result.logs || 'Type action failed',
633
+ error: {
634
+ message: result.logs || 'Type action failed',
635
+ type: ToolErrorType.BROWSER_TYPE_ERROR,
636
+ },
637
+ };
638
+ }
639
+ // Include screenshot in response
640
+ const llmParts = [llmContent];
641
+ if (result.screenshot) {
642
+ llmParts.push({
643
+ inlineData: {
644
+ mimeType: 'image/png',
645
+ data: result.screenshot.split(',')[1],
646
+ },
647
+ });
648
+ }
649
+ return {
650
+ llmContent: llmParts,
651
+ returnDisplay: result.logs || 'Text typed successfully',
652
+ };
653
+ }
654
+ }
655
+ export class BrowserTypeTool extends BaseDeclarativeTool {
656
+ config;
657
+ static Name = ToolNames.BROWSER_TYPE;
658
+ // @ts-expect-error - Required by base class pattern
659
+ constructor(config) {
660
+ super(BrowserTypeTool.Name, 'BrowserType', 'Types a string of text on the keyboard. Use this after clicking on a text field to input text. Returns a screenshot after typing.', Kind.Execute, {
661
+ type: 'object',
662
+ properties: {
663
+ text: {
664
+ type: 'string',
665
+ description: 'The text string to type',
666
+ },
667
+ },
668
+ required: ['text'],
669
+ }, false);
670
+ this.config = config;
671
+ }
672
+ validateToolParamValues(params) {
673
+ if (params.text === undefined || params.text === null) {
674
+ return 'Text parameter is required';
675
+ }
676
+ return null;
677
+ }
678
+ createInvocation(params) {
679
+ return new BrowserTypeToolInvocation(params);
680
+ }
681
+ }
682
+ class BrowserScrollDownToolInvocation extends BaseToolInvocation {
683
+ constructor(params) {
684
+ super(params);
685
+ }
686
+ getDescription() {
687
+ return 'Scrolling down the page';
688
+ }
689
+ async execute() {
690
+ const session = ServerBrowserSession.getInstance();
691
+ const result = await session.scrollDown();
692
+ console.log('[BrowserScrollDownTool] Scroll down completed', {
693
+ success: result.execution_success,
694
+ currentUrl: result.currentUrl,
695
+ });
696
+ const llmContent = [
697
+ `${screenshotPrompt}`,
698
+ `Action: Scroll Down`,
699
+ `Status: ${result.execution_success ? 'Success' : 'Failed'}`,
700
+ `Current URL: ${result.currentUrl || 'N/A'}`,
701
+ `Console Logs: ${result.console_logs || '(none)'}`,
702
+ `Execution Details: ${result.execution_logs || ''}`,
703
+ ].join('\n');
704
+ if (!result.execution_success) {
705
+ return {
706
+ llmContent,
707
+ returnDisplay: result.logs || 'Scroll down failed',
708
+ error: {
709
+ message: result.logs || 'Scroll down failed',
710
+ type: ToolErrorType.BROWSER_SCROLL_ERROR,
711
+ },
712
+ };
713
+ }
714
+ // Include screenshot in response
715
+ const llmParts = [llmContent];
716
+ if (result.screenshot) {
717
+ llmParts.push({
718
+ inlineData: {
719
+ mimeType: 'image/png',
720
+ data: result.screenshot.split(',')[1],
721
+ },
722
+ });
723
+ }
724
+ return {
725
+ llmContent: llmParts,
726
+ returnDisplay: result.logs || 'Scroll down successful',
727
+ };
728
+ }
729
+ }
730
+ export class BrowserScrollDownTool extends BaseDeclarativeTool {
731
+ config;
732
+ static Name = ToolNames.BROWSER_SCROLL_DOWN;
733
+ // @ts-expect-error - Required by base class pattern
734
+ constructor(config) {
735
+ super(BrowserScrollDownTool.Name, 'BrowserScrollDown', 'Scrolls down the page by one page height (400 pixels). Returns a screenshot after scrolling.', Kind.Execute, {
736
+ type: 'object',
737
+ properties: {
738
+ amount: {
739
+ type: 'number',
740
+ description: 'The amount to scroll down',
741
+ },
742
+ },
743
+ required: ['amount'],
744
+ }, false);
745
+ this.config = config;
746
+ }
747
+ createInvocation(params) {
748
+ return new BrowserScrollDownToolInvocation(params);
749
+ }
750
+ }
751
+ class BrowserScrollUpToolInvocation extends BaseToolInvocation {
752
+ constructor(params) {
753
+ super(params);
754
+ }
755
+ getDescription() {
756
+ return 'Scrolling up the page';
757
+ }
758
+ async execute() {
759
+ const session = ServerBrowserSession.getInstance();
760
+ const result = await session.scrollUp();
761
+ console.log('[BrowserScrollUpTool] Scroll up completed', {
762
+ success: result.execution_success,
763
+ currentUrl: result.currentUrl,
764
+ });
765
+ const llmContent = [
766
+ `${screenshotPrompt}`,
767
+ `Action: Scroll Up`,
768
+ `Status: ${result.execution_success ? 'Success' : 'Failed'}`,
769
+ `Current URL: ${result.currentUrl || 'N/A'}`,
770
+ `Console Logs: ${result.console_logs || '(none)'}`,
771
+ `Execution Details: ${result.execution_logs || ''}`,
772
+ ].join('\n');
773
+ if (!result.execution_success) {
774
+ return {
775
+ llmContent,
776
+ returnDisplay: result.logs || 'Scroll up failed',
777
+ error: {
778
+ message: result.logs || 'Scroll up failed',
779
+ type: ToolErrorType.BROWSER_SCROLL_ERROR,
780
+ },
781
+ };
782
+ }
783
+ // Include screenshot in response
784
+ const llmParts = [llmContent];
785
+ if (result.screenshot) {
786
+ llmParts.push({
787
+ inlineData: {
788
+ mimeType: 'image/png',
789
+ data: result.screenshot.split(',')[1],
790
+ },
791
+ });
792
+ }
793
+ return {
794
+ llmContent: llmParts,
795
+ returnDisplay: result.logs || 'Scroll up successful',
796
+ };
797
+ }
798
+ }
799
+ export class BrowserScrollUpTool extends BaseDeclarativeTool {
800
+ config;
801
+ static Name = ToolNames.BROWSER_SCROLL_UP;
802
+ // @ts-expect-error - Required by base class pattern
803
+ constructor(config) {
804
+ super(BrowserScrollUpTool.Name, 'BrowserScrollUp', 'Scrolls up the page by one page height (600 pixels). Returns a screenshot after scrolling.', Kind.Execute, {
805
+ type: 'object',
806
+ properties: {
807
+ amount: {
808
+ type: 'number',
809
+ description: 'The amount to scroll up',
810
+ },
811
+ },
812
+ required: ['amount'],
813
+ }, false);
814
+ this.config = config;
815
+ }
816
+ createInvocation(params) {
817
+ return new BrowserScrollUpToolInvocation(params);
818
+ }
819
+ }
820
+ class BrowserCloseToolInvocation extends BaseToolInvocation {
821
+ constructor(params) {
822
+ super(params);
823
+ }
824
+ getDescription() {
825
+ return 'Closing browser';
826
+ }
827
+ async execute() {
828
+ const session = ServerBrowserSession.getInstance();
829
+ const result = await session.closeBrowser();
830
+ console.log('[BrowserCloseTool] Browser close completed', {
831
+ success: result.execution_success,
832
+ });
833
+ if (!result.execution_success) {
834
+ return {
835
+ llmContent: result.execution_logs || 'Failed to close browser',
836
+ returnDisplay: result.logs || 'Failed to close browser',
837
+ error: {
838
+ message: result.logs || 'Failed to close browser',
839
+ type: ToolErrorType.BROWSER_CLOSE_ERROR,
840
+ },
841
+ };
842
+ }
843
+ return {
844
+ llmContent: result.execution_logs || 'Browser closed successfully',
845
+ returnDisplay: result.logs || 'Browser closed successfully',
846
+ };
847
+ }
848
+ }
849
+ export class BrowserCloseTool extends BaseDeclarativeTool {
850
+ config;
851
+ static Name = ToolNames.BROWSER_CLOSE;
852
+ // @ts-expect-error - Required by base class pattern
853
+ constructor(config) {
854
+ super(BrowserCloseTool.Name, 'BrowserClose', 'Closes the Puppeteer-controlled browser instance. This must always be the final browser action.', Kind.Execute, {
855
+ type: 'object',
856
+ properties: {
857
+ force: {
858
+ type: 'boolean',
859
+ description: 'Whether to force close the browser',
860
+ },
861
+ },
862
+ required: ['force'],
863
+ }, false);
864
+ this.config = config;
865
+ }
866
+ createInvocation(params) {
867
+ return new BrowserCloseToolInvocation(params);
868
+ }
869
+ }
870
+ //# sourceMappingURL=browser_use.js.map