@lov3kaizen/agentsea-surf 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs ADDED
@@ -0,0 +1,582 @@
1
+ import {
2
+ BaseBackend,
3
+ CoordinateScaler,
4
+ DEFAULT_SURF_CONFIG,
5
+ DockerBackend,
6
+ LinuxBackend,
7
+ MacOSBackend,
8
+ PuppeteerBackend,
9
+ SecurityValidator,
10
+ SurfAgent,
11
+ VisionAnalyzer,
12
+ WindowsBackend,
13
+ clickInputSchema,
14
+ clickOutputSchema,
15
+ createBackend,
16
+ createNativeBackend,
17
+ cursorMoveInputSchema,
18
+ cursorMoveOutputSchema,
19
+ dragInputSchema,
20
+ dragOutputSchema,
21
+ keyPressInputSchema,
22
+ keyPressOutputSchema,
23
+ screenshotInputSchema,
24
+ screenshotOutputSchema,
25
+ scrollInputSchema,
26
+ scrollOutputSchema,
27
+ typeTextInputSchema,
28
+ typeTextOutputSchema,
29
+ waitInputSchema,
30
+ waitOutputSchema
31
+ } from "./chunk-QCNRPRJB.mjs";
32
+
33
+ // src/tools/screenshot.tool.ts
34
+ import { serverTool } from "@lov3kaizen/agentsea-core";
35
+ function createScreenshotTool(backend) {
36
+ return serverTool({
37
+ name: "computer_screenshot",
38
+ description: "Take a screenshot of the current screen or a specific region. Returns the image as base64 for vision analysis. Use this to see what is currently displayed on the screen.",
39
+ inputSchema: screenshotInputSchema,
40
+ outputSchema: screenshotOutputSchema,
41
+ execute: async (input) => {
42
+ const _startTime = Date.now();
43
+ try {
44
+ const result = await backend.screenshot({
45
+ region: input.region,
46
+ format: input.format,
47
+ quality: input.quality
48
+ });
49
+ return {
50
+ success: true,
51
+ base64: result.base64,
52
+ mimeType: result.mimeType,
53
+ width: result.dimensions.width,
54
+ height: result.dimensions.height,
55
+ scaleFactor: result.dimensions.scaleFactor
56
+ };
57
+ } catch (error) {
58
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
59
+ throw new Error(`Screenshot failed: ${errorMessage}`);
60
+ }
61
+ }
62
+ });
63
+ }
64
+ var screenshotToolDefinition = {
65
+ name: "computer_screenshot",
66
+ description: "Take a screenshot of the current screen or a specific region. Returns the image as base64 for vision analysis.",
67
+ inputSchema: screenshotInputSchema,
68
+ outputSchema: screenshotOutputSchema
69
+ };
70
+
71
+ // src/tools/click.tool.ts
72
+ import { serverTool as serverTool2 } from "@lov3kaizen/agentsea-core";
73
+ function createClickTool(backend) {
74
+ return serverTool2({
75
+ name: "computer_click",
76
+ description: "Click at specified screen coordinates. Supports single/double click, different mouse buttons (left, right, middle), and modifier keys (ctrl, alt, shift, meta). Use this to interact with buttons, links, and other clickable elements.",
77
+ inputSchema: clickInputSchema,
78
+ outputSchema: clickOutputSchema,
79
+ execute: async (input) => {
80
+ const startTime = Date.now();
81
+ const point = { x: input.x, y: input.y };
82
+ try {
83
+ const options = {
84
+ button: input.button,
85
+ holdMs: input.holdMs,
86
+ modifiers: input.modifiers
87
+ };
88
+ let result;
89
+ if (input.clickType === "double") {
90
+ result = await backend.doubleClick(point, options);
91
+ } else {
92
+ result = await backend.click(point, options);
93
+ }
94
+ return {
95
+ success: result.success,
96
+ x: input.x,
97
+ y: input.y,
98
+ action: input.clickType === "double" ? "doubleClick" : "click",
99
+ duration: Date.now() - startTime,
100
+ error: result.error
101
+ };
102
+ } catch (error) {
103
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
104
+ return {
105
+ success: false,
106
+ x: input.x,
107
+ y: input.y,
108
+ action: input.clickType === "double" ? "doubleClick" : "click",
109
+ duration: Date.now() - startTime,
110
+ error: `Click failed: ${errorMessage}`
111
+ };
112
+ }
113
+ }
114
+ });
115
+ }
116
+ var clickToolDefinition = {
117
+ name: "computer_click",
118
+ description: "Click at specified screen coordinates. Supports single/double click, different mouse buttons, and modifier keys.",
119
+ inputSchema: clickInputSchema,
120
+ outputSchema: clickOutputSchema
121
+ };
122
+
123
+ // src/tools/type-text.tool.ts
124
+ import { serverTool as serverTool3 } from "@lov3kaizen/agentsea-core";
125
+ function createTypeTextTool(backend) {
126
+ return serverTool3({
127
+ name: "computer_type",
128
+ description: "Type text at the current cursor position or at specified coordinates. Can optionally clear existing text first (useful for text fields). Use this to enter text into input fields, search boxes, editors, etc.",
129
+ inputSchema: typeTextInputSchema,
130
+ outputSchema: typeTextOutputSchema,
131
+ execute: async (input) => {
132
+ const startTime = Date.now();
133
+ try {
134
+ const options = {
135
+ point: input.x !== void 0 && input.y !== void 0 ? { x: input.x, y: input.y } : void 0,
136
+ delayMs: input.delayMs,
137
+ clearFirst: input.clearFirst
138
+ };
139
+ const result = await backend.typeText(input.text, options);
140
+ return {
141
+ success: result.success,
142
+ textLength: input.text.length,
143
+ duration: Date.now() - startTime,
144
+ error: result.error
145
+ };
146
+ } catch (error) {
147
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
148
+ return {
149
+ success: false,
150
+ textLength: input.text.length,
151
+ duration: Date.now() - startTime,
152
+ error: `Type text failed: ${errorMessage}`
153
+ };
154
+ }
155
+ }
156
+ });
157
+ }
158
+ var typeTextToolDefinition = {
159
+ name: "computer_type",
160
+ description: "Type text at the current cursor position or at specified coordinates. Can optionally clear existing text first.",
161
+ inputSchema: typeTextInputSchema,
162
+ outputSchema: typeTextOutputSchema
163
+ };
164
+
165
+ // src/tools/scroll.tool.ts
166
+ import { serverTool as serverTool4 } from "@lov3kaizen/agentsea-core";
167
+ function createScrollTool(backend) {
168
+ return serverTool4({
169
+ name: "computer_scroll",
170
+ description: "Scroll the screen in a specified direction (up, down, left, right) at given coordinates. Use this to navigate through long pages, lists, or documents.",
171
+ inputSchema: scrollInputSchema,
172
+ outputSchema: scrollOutputSchema,
173
+ execute: async (input) => {
174
+ const startTime = Date.now();
175
+ const point = { x: input.x, y: input.y };
176
+ try {
177
+ const result = await backend.scroll(
178
+ input.direction,
179
+ point,
180
+ {
181
+ amount: input.amount,
182
+ smooth: input.smooth
183
+ }
184
+ );
185
+ return {
186
+ success: result.success,
187
+ direction: input.direction,
188
+ amount: input.amount,
189
+ duration: Date.now() - startTime,
190
+ error: result.error
191
+ };
192
+ } catch (error) {
193
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
194
+ return {
195
+ success: false,
196
+ direction: input.direction,
197
+ amount: input.amount,
198
+ duration: Date.now() - startTime,
199
+ error: `Scroll failed: ${errorMessage}`
200
+ };
201
+ }
202
+ }
203
+ });
204
+ }
205
+ var scrollToolDefinition = {
206
+ name: "computer_scroll",
207
+ description: "Scroll the screen in a specified direction at given coordinates.",
208
+ inputSchema: scrollInputSchema,
209
+ outputSchema: scrollOutputSchema
210
+ };
211
+
212
+ // src/tools/drag.tool.ts
213
+ import { serverTool as serverTool5 } from "@lov3kaizen/agentsea-core";
214
+ function createDragTool(backend) {
215
+ return serverTool5({
216
+ name: "computer_drag",
217
+ description: "Drag from one point to another. Useful for drag-and-drop operations, selecting text, moving windows, resizing elements, or drawing.",
218
+ inputSchema: dragInputSchema,
219
+ outputSchema: dragOutputSchema,
220
+ execute: async (input) => {
221
+ const startTime = Date.now();
222
+ const from = { x: input.fromX, y: input.fromY };
223
+ const to = { x: input.toX, y: input.toY };
224
+ try {
225
+ const result = await backend.drag(from, to, {
226
+ button: input.button,
227
+ durationMs: input.durationMs
228
+ });
229
+ return {
230
+ success: result.success,
231
+ fromX: input.fromX,
232
+ fromY: input.fromY,
233
+ toX: input.toX,
234
+ toY: input.toY,
235
+ duration: Date.now() - startTime,
236
+ error: result.error
237
+ };
238
+ } catch (error) {
239
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
240
+ return {
241
+ success: false,
242
+ fromX: input.fromX,
243
+ fromY: input.fromY,
244
+ toX: input.toX,
245
+ toY: input.toY,
246
+ duration: Date.now() - startTime,
247
+ error: `Drag failed: ${errorMessage}`
248
+ };
249
+ }
250
+ }
251
+ });
252
+ }
253
+ var dragToolDefinition = {
254
+ name: "computer_drag",
255
+ description: "Drag from one point to another. Useful for drag-and-drop operations, selecting text, or moving windows.",
256
+ inputSchema: dragInputSchema,
257
+ outputSchema: dragOutputSchema
258
+ };
259
+
260
+ // src/tools/key-press.tool.ts
261
+ import { serverTool as serverTool6 } from "@lov3kaizen/agentsea-core";
262
+ function createKeyPressTool(backend) {
263
+ return serverTool6({
264
+ name: "computer_key",
265
+ description: "Press a keyboard key or key combination. Supports all standard keys (enter, escape, tab, backspace, delete, arrows, function keys, etc.) and modifier combinations (ctrl, alt, shift, meta/command). Use this for keyboard shortcuts, navigation, and special key inputs.",
266
+ inputSchema: keyPressInputSchema,
267
+ outputSchema: keyPressOutputSchema,
268
+ execute: async (input) => {
269
+ const startTime = Date.now();
270
+ try {
271
+ for (let i = 0; i < input.repeat; i++) {
272
+ const result = await backend.keyPress(
273
+ input.key,
274
+ input.modifiers
275
+ );
276
+ if (!result.success) {
277
+ return {
278
+ success: false,
279
+ key: input.key,
280
+ modifiers: input.modifiers,
281
+ repeat: i + 1,
282
+ duration: Date.now() - startTime,
283
+ error: result.error
284
+ };
285
+ }
286
+ if (input.holdMs && input.holdMs > 0) {
287
+ await new Promise((resolve) => setTimeout(resolve, input.holdMs));
288
+ }
289
+ }
290
+ return {
291
+ success: true,
292
+ key: input.key,
293
+ modifiers: input.modifiers,
294
+ repeat: input.repeat,
295
+ duration: Date.now() - startTime
296
+ };
297
+ } catch (error) {
298
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
299
+ return {
300
+ success: false,
301
+ key: input.key,
302
+ modifiers: input.modifiers,
303
+ repeat: input.repeat,
304
+ duration: Date.now() - startTime,
305
+ error: `Key press failed: ${errorMessage}`
306
+ };
307
+ }
308
+ }
309
+ });
310
+ }
311
+ var keyPressToolDefinition = {
312
+ name: "computer_key",
313
+ description: "Press a keyboard key or key combination. Supports all standard keys and modifier combinations.",
314
+ inputSchema: keyPressInputSchema,
315
+ outputSchema: keyPressOutputSchema
316
+ };
317
+
318
+ // src/tools/cursor-move.tool.ts
319
+ import { serverTool as serverTool7 } from "@lov3kaizen/agentsea-core";
320
+ function createCursorMoveTool(backend) {
321
+ return serverTool7({
322
+ name: "computer_cursor_move",
323
+ description: "Move the cursor to specified coordinates without clicking. Useful for hover effects, tooltip activation, or positioning before other actions.",
324
+ inputSchema: cursorMoveInputSchema,
325
+ outputSchema: cursorMoveOutputSchema,
326
+ execute: async (input) => {
327
+ const startTime = Date.now();
328
+ const point = { x: input.x, y: input.y };
329
+ try {
330
+ const result = await backend.moveCursor(point);
331
+ if (input.smooth && input.durationMs > 0) {
332
+ await new Promise((resolve) => setTimeout(resolve, input.durationMs));
333
+ }
334
+ return {
335
+ success: result.success,
336
+ x: input.x,
337
+ y: input.y,
338
+ duration: Date.now() - startTime,
339
+ error: result.error
340
+ };
341
+ } catch (error) {
342
+ const errorMessage = error instanceof Error ? error.message : "Unknown error";
343
+ return {
344
+ success: false,
345
+ x: input.x,
346
+ y: input.y,
347
+ duration: Date.now() - startTime,
348
+ error: `Cursor move failed: ${errorMessage}`
349
+ };
350
+ }
351
+ }
352
+ });
353
+ }
354
+ var cursorMoveToolDefinition = {
355
+ name: "computer_cursor_move",
356
+ description: "Move the cursor to specified coordinates without clicking. Useful for hover effects or positioning.",
357
+ inputSchema: cursorMoveInputSchema,
358
+ outputSchema: cursorMoveOutputSchema
359
+ };
360
+
361
+ // src/tools/wait.tool.ts
362
+ import { serverTool as serverTool8 } from "@lov3kaizen/agentsea-core";
363
+ function createWaitTool(backend) {
364
+ return serverTool8({
365
+ name: "computer_wait",
366
+ description: "Wait for a specified duration in milliseconds. Use this to wait for page loads, animations, network requests, or UI updates to complete before taking the next action.",
367
+ inputSchema: waitInputSchema,
368
+ outputSchema: waitOutputSchema,
369
+ execute: async (input) => {
370
+ const startTime = Date.now();
371
+ try {
372
+ await backend.wait(input.ms);
373
+ return {
374
+ success: true,
375
+ waitedMs: Date.now() - startTime,
376
+ reason: input.reason
377
+ };
378
+ } catch (error) {
379
+ const _errorMessage = error instanceof Error ? error.message : "Unknown error";
380
+ return {
381
+ success: false,
382
+ waitedMs: Date.now() - startTime,
383
+ reason: input.reason
384
+ };
385
+ }
386
+ }
387
+ });
388
+ }
389
+ var waitToolDefinition = {
390
+ name: "computer_wait",
391
+ description: "Wait for a specified duration. Useful for waiting for page loads, animations, or UI updates.",
392
+ inputSchema: waitInputSchema,
393
+ outputSchema: waitOutputSchema
394
+ };
395
+
396
+ // src/tools/index.ts
397
+ function createSurfTools(backend) {
398
+ return {
399
+ screenshot: createScreenshotTool(backend),
400
+ click: createClickTool(backend),
401
+ typeText: createTypeTextTool(backend),
402
+ scroll: createScrollTool(backend),
403
+ drag: createDragTool(backend),
404
+ keyPress: createKeyPressTool(backend),
405
+ cursorMove: createCursorMoveTool(backend),
406
+ wait: createWaitTool(backend)
407
+ };
408
+ }
409
+ function createSurfToolsArray(backend) {
410
+ return [
411
+ createScreenshotTool(backend),
412
+ createClickTool(backend),
413
+ createTypeTextTool(backend),
414
+ createScrollTool(backend),
415
+ createDragTool(backend),
416
+ createKeyPressTool(backend),
417
+ createCursorMoveTool(backend),
418
+ createWaitTool(backend)
419
+ ];
420
+ }
421
+ var SURF_TOOL_NAMES = [
422
+ "computer_screenshot",
423
+ "computer_click",
424
+ "computer_type",
425
+ "computer_scroll",
426
+ "computer_drag",
427
+ "computer_key",
428
+ "computer_cursor_move",
429
+ "computer_wait"
430
+ ];
431
+
432
+ // src/utils/image-utils.ts
433
+ async function resizeImage(imageBuffer, targetWidth, targetHeight) {
434
+ try {
435
+ const sharp = await import("sharp");
436
+ return sharp.default(imageBuffer).resize(targetWidth, targetHeight, {
437
+ fit: "contain",
438
+ background: { r: 0, g: 0, b: 0, alpha: 0 }
439
+ }).toBuffer();
440
+ } catch {
441
+ return imageBuffer;
442
+ }
443
+ }
444
+ function imageToBase64(imageBuffer) {
445
+ return imageBuffer.toString("base64");
446
+ }
447
+ function base64ToImage(base64) {
448
+ return Buffer.from(base64, "base64");
449
+ }
450
+ async function getImageDimensions(imageBuffer) {
451
+ try {
452
+ const sharp = await import("sharp");
453
+ const metadata = await sharp.default(imageBuffer).metadata();
454
+ return {
455
+ width: metadata.width || 0,
456
+ height: metadata.height || 0
457
+ };
458
+ } catch {
459
+ return null;
460
+ }
461
+ }
462
+ async function cropImage(imageBuffer, region) {
463
+ try {
464
+ const sharp = await import("sharp");
465
+ return sharp.default(imageBuffer).extract({
466
+ left: region.x,
467
+ top: region.y,
468
+ width: region.width,
469
+ height: region.height
470
+ }).toBuffer();
471
+ } catch {
472
+ return imageBuffer;
473
+ }
474
+ }
475
+ async function convertImageFormat(imageBuffer, format, quality) {
476
+ try {
477
+ const sharp = await import("sharp");
478
+ let sharpInstance = sharp.default(imageBuffer);
479
+ switch (format) {
480
+ case "png":
481
+ sharpInstance = sharpInstance.png();
482
+ break;
483
+ case "jpeg":
484
+ sharpInstance = sharpInstance.jpeg({ quality: quality || 90 });
485
+ break;
486
+ case "webp":
487
+ sharpInstance = sharpInstance.webp({ quality: quality || 90 });
488
+ break;
489
+ }
490
+ return sharpInstance.toBuffer();
491
+ } catch {
492
+ return imageBuffer;
493
+ }
494
+ }
495
+ async function calculateImageHash(imageBuffer) {
496
+ try {
497
+ const sharp = await import("sharp");
498
+ const { data } = await sharp.default(imageBuffer).resize(8, 8, { fit: "fill" }).grayscale().raw().toBuffer({ resolveWithObject: true });
499
+ let sum = 0;
500
+ for (let i = 0; i < data.length; i++) {
501
+ sum += data[i];
502
+ }
503
+ const avg = sum / data.length;
504
+ let hash = "";
505
+ for (let i = 0; i < data.length; i++) {
506
+ hash += data[i] >= avg ? "1" : "0";
507
+ }
508
+ return hash;
509
+ } catch {
510
+ return "";
511
+ }
512
+ }
513
+ function compareImageHashes(hash1, hash2) {
514
+ if (hash1.length !== hash2.length || hash1.length === 0) {
515
+ return 0;
516
+ }
517
+ let matching = 0;
518
+ for (let i = 0; i < hash1.length; i++) {
519
+ if (hash1[i] === hash2[i]) {
520
+ matching++;
521
+ }
522
+ }
523
+ return matching / hash1.length * 100;
524
+ }
525
+ export {
526
+ BaseBackend,
527
+ CoordinateScaler,
528
+ DEFAULT_SURF_CONFIG,
529
+ DockerBackend,
530
+ LinuxBackend,
531
+ MacOSBackend,
532
+ PuppeteerBackend,
533
+ SURF_TOOL_NAMES,
534
+ SecurityValidator,
535
+ SurfAgent,
536
+ VisionAnalyzer,
537
+ WindowsBackend,
538
+ base64ToImage,
539
+ calculateImageHash,
540
+ clickInputSchema,
541
+ clickOutputSchema,
542
+ clickToolDefinition,
543
+ compareImageHashes,
544
+ convertImageFormat,
545
+ createBackend,
546
+ createClickTool,
547
+ createCursorMoveTool,
548
+ createDragTool,
549
+ createKeyPressTool,
550
+ createNativeBackend,
551
+ createScreenshotTool,
552
+ createScrollTool,
553
+ createSurfTools,
554
+ createSurfToolsArray,
555
+ createTypeTextTool,
556
+ createWaitTool,
557
+ cropImage,
558
+ cursorMoveInputSchema,
559
+ cursorMoveOutputSchema,
560
+ cursorMoveToolDefinition,
561
+ dragInputSchema,
562
+ dragOutputSchema,
563
+ dragToolDefinition,
564
+ getImageDimensions,
565
+ imageToBase64,
566
+ keyPressInputSchema,
567
+ keyPressOutputSchema,
568
+ keyPressToolDefinition,
569
+ resizeImage,
570
+ screenshotInputSchema,
571
+ screenshotOutputSchema,
572
+ screenshotToolDefinition,
573
+ scrollInputSchema,
574
+ scrollOutputSchema,
575
+ scrollToolDefinition,
576
+ typeTextInputSchema,
577
+ typeTextOutputSchema,
578
+ typeTextToolDefinition,
579
+ waitInputSchema,
580
+ waitOutputSchema,
581
+ waitToolDefinition
582
+ };