@eko-ai/eko 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +72 -21
  2. package/dist/core/eko.d.ts +3 -2
  3. package/dist/extension/content/index.d.ts +1 -0
  4. package/dist/extension/tools/browser.d.ts +2 -1
  5. package/dist/extension/tools/get_all_tabs.d.ts +9 -0
  6. package/dist/extension/tools/index.d.ts +4 -1
  7. package/dist/extension/tools/request_login.d.ts +10 -0
  8. package/dist/extension/tools/tab_management.d.ts +1 -1
  9. package/dist/extension/utils.d.ts +2 -1
  10. package/dist/extension.cjs.js +797 -209
  11. package/dist/extension.esm.js +797 -209
  12. package/dist/extension_content_script.js +129 -2
  13. package/dist/index.cjs.js +518 -114
  14. package/dist/index.d.ts +2 -1
  15. package/dist/index.esm.js +518 -115
  16. package/dist/models/action.d.ts +9 -4
  17. package/dist/models/workflow.d.ts +8 -3
  18. package/dist/nodejs/script/build_dom_tree.d.ts +1 -0
  19. package/dist/nodejs/tools/browser_use.d.ts +28 -0
  20. package/dist/nodejs/tools/index.d.ts +2 -0
  21. package/dist/nodejs.cjs.js +71638 -12
  22. package/dist/nodejs.esm.js +71632 -6
  23. package/dist/schemas/workflow.schema.d.ts +2 -13
  24. package/dist/services/llm/claude-provider.d.ts +2 -1
  25. package/dist/services/llm/openai-provider.d.ts +2 -1
  26. package/dist/services/parser/workflow-parser.d.ts +0 -7
  27. package/dist/types/action.types.d.ts +8 -3
  28. package/dist/types/tools.types.d.ts +44 -1
  29. package/dist/types/workflow.types.d.ts +22 -9
  30. package/dist/universal_tools/cancel_workflow.d.ts +9 -0
  31. package/dist/universal_tools/human.d.ts +30 -0
  32. package/dist/universal_tools/index.d.ts +4 -0
  33. package/dist/universal_tools/summary_workflow.d.ts +9 -0
  34. package/dist/utils/execution-logger.d.ts +69 -0
  35. package/dist/web/tools/browser.d.ts +2 -1
  36. package/dist/web.cjs.js +29 -17
  37. package/dist/web.esm.js +29 -17
  38. package/package.json +6 -9
@@ -30,12 +30,21 @@ async function getWindowId(context) {
30
30
  async function getTabId(context) {
31
31
  let tabId = context.variables.get('tabId');
32
32
  if (tabId) {
33
- try {
34
- await chrome.tabs.get(tabId);
35
- }
36
- catch (e) {
37
- tabId = null;
33
+ // Ensure tabId is a number
34
+ tabId = Number(tabId);
35
+ // Check if it's a valid integer
36
+ if (!Number.isInteger(tabId)) {
38
37
  context.variables.delete('tabId');
38
+ tabId = null;
39
+ }
40
+ else {
41
+ try {
42
+ await chrome.tabs.get(tabId);
43
+ }
44
+ catch (e) {
45
+ tabId = null;
46
+ context.variables.delete('tabId');
47
+ }
39
48
  }
40
49
  }
41
50
  if (!tabId) {
@@ -52,28 +61,31 @@ async function getTabId(context) {
52
61
  else {
53
62
  tabId = await getCurrentTabId();
54
63
  }
64
+ if (!tabId) {
65
+ throw new Error('Could not find a valid tab');
66
+ }
67
+ context.variables.set('tabId', tabId);
55
68
  }
56
69
  return tabId;
57
70
  }
58
- function getCurrentTabId(windowId) {
59
- return new Promise((resolve) => {
60
- chrome.tabs.query({ windowId, active: true, lastFocusedWindow: true }, function (tabs) {
61
- if (tabs.length > 0) {
62
- resolve(tabs[0].id);
71
+ async function getCurrentTabId(windowId) {
72
+ return new Promise((resolve, reject) => {
73
+ chrome.tabs.query({ active: true, currentWindow: true }, function (tabs) {
74
+ if (chrome.runtime.lastError) {
75
+ console.error('Chrome runtime error:', chrome.runtime.lastError);
76
+ reject(chrome.runtime.lastError);
77
+ return;
63
78
  }
64
- else {
65
- chrome.tabs.query({ windowId, active: true, currentWindow: true }, function (_tabs) {
66
- if (_tabs.length > 0) {
67
- resolve(_tabs[0].id);
68
- return;
69
- }
70
- else {
71
- chrome.tabs.query({ windowId, status: 'complete', currentWindow: true }, function (__tabs) {
72
- resolve(__tabs.length ? __tabs[__tabs.length - 1].id : undefined);
73
- });
74
- }
75
- });
79
+ if (!tabs || tabs.length === 0) {
80
+ reject(new Error('No active tab found'));
81
+ return;
76
82
  }
83
+ const tabId = tabs[0].id;
84
+ if (typeof tabId !== 'number') {
85
+ reject(new Error('Invalid tab ID'));
86
+ return;
87
+ }
88
+ resolve(tabId);
77
89
  });
78
90
  });
79
91
  }
@@ -138,6 +150,19 @@ async function waitForTabComplete(tabId, timeout = 15000) {
138
150
  chrome.tabs.onUpdated.addListener(listener);
139
151
  });
140
152
  }
153
+ async function doesTabExists(tabId) {
154
+ const tabExists = await new Promise((resolve) => {
155
+ chrome.tabs.get(tabId, (tab) => {
156
+ if (chrome.runtime.lastError) {
157
+ resolve(false);
158
+ }
159
+ else {
160
+ resolve(true);
161
+ }
162
+ });
163
+ });
164
+ return tabExists;
165
+ }
141
166
  async function getPageSize(tabId) {
142
167
  if (!tabId) {
143
168
  tabId = await getCurrentTabId();
@@ -235,6 +260,7 @@ var utils = /*#__PURE__*/Object.freeze({
235
260
  __proto__: null,
236
261
  CountDownLatch: CountDownLatch,
237
262
  MsgEvent: MsgEvent,
263
+ doesTabExists: doesTabExists,
238
264
  executeScript: executeScript,
239
265
  getCurrentTabId: getCurrentTabId,
240
266
  getPageSize: getPageSize,
@@ -248,155 +274,355 @@ var utils = /*#__PURE__*/Object.freeze({
248
274
  });
249
275
 
250
276
  async function type(tabId, text, coordinate) {
251
- if (!coordinate) {
252
- coordinate = (await cursor_position(tabId)).coordinate;
277
+ console.log('Sending type message to tab:', tabId, { text, coordinate });
278
+ try {
279
+ if (!coordinate) {
280
+ coordinate = (await cursor_position(tabId)).coordinate;
281
+ }
282
+ await mouse_move(tabId, coordinate);
283
+ const response = await chrome.tabs.sendMessage(tabId, {
284
+ type: 'computer:type',
285
+ text,
286
+ coordinate,
287
+ });
288
+ console.log('Got response:', response);
289
+ return response;
290
+ }
291
+ catch (e) {
292
+ console.error('Failed to send type message:', e);
293
+ throw e;
253
294
  }
254
- await mouse_move(tabId, coordinate);
255
- return await chrome.tabs.sendMessage(tabId, {
256
- type: 'computer:type',
257
- text,
258
- coordinate,
259
- });
260
295
  }
261
296
  async function type_by(tabId, text, xpath, highlightIndex) {
262
- return await chrome.tabs.sendMessage(tabId, {
263
- type: 'computer:type',
264
- text,
265
- xpath,
266
- highlightIndex,
267
- });
297
+ console.log('Sending type message to tab:', tabId, { text, xpath, highlightIndex });
298
+ try {
299
+ const response = await chrome.tabs.sendMessage(tabId, {
300
+ type: 'computer:type',
301
+ text,
302
+ xpath,
303
+ highlightIndex,
304
+ });
305
+ console.log('Got response:', response);
306
+ return response;
307
+ }
308
+ catch (e) {
309
+ console.error('Failed to send type message:', e);
310
+ throw e;
311
+ }
268
312
  }
269
313
  async function clear_input(tabId, coordinate) {
270
- if (!coordinate) {
271
- coordinate = (await cursor_position(tabId)).coordinate;
314
+ console.log('Sending clear_input message to tab:', tabId, { coordinate });
315
+ try {
316
+ if (!coordinate) {
317
+ coordinate = (await cursor_position(tabId)).coordinate;
318
+ }
319
+ await mouse_move(tabId, coordinate);
320
+ const response = await chrome.tabs.sendMessage(tabId, {
321
+ type: 'computer:type',
322
+ text: '',
323
+ coordinate,
324
+ });
325
+ console.log('Got response:', response);
326
+ return response;
327
+ }
328
+ catch (e) {
329
+ console.error('Failed to send clear_input message:', e);
330
+ throw e;
272
331
  }
273
- await mouse_move(tabId, coordinate);
274
- return await chrome.tabs.sendMessage(tabId, {
275
- type: 'computer:type',
276
- text: '',
277
- coordinate,
278
- });
279
332
  }
280
333
  async function clear_input_by(tabId, xpath, highlightIndex) {
281
- return await chrome.tabs.sendMessage(tabId, {
282
- type: 'computer:type',
283
- text: '',
284
- xpath,
285
- highlightIndex,
286
- });
334
+ console.log('Sending clear_input_by message to tab:', tabId, { xpath, highlightIndex });
335
+ try {
336
+ const response = await chrome.tabs.sendMessage(tabId, {
337
+ type: 'computer:type',
338
+ text: '',
339
+ xpath,
340
+ highlightIndex,
341
+ });
342
+ console.log('Got response:', response);
343
+ return response;
344
+ }
345
+ catch (e) {
346
+ console.error('Failed to send clear_input_by message:', e);
347
+ throw e;
348
+ }
287
349
  }
288
350
  async function mouse_move(tabId, coordinate) {
289
- return await chrome.tabs.sendMessage(tabId, {
290
- type: 'computer:mouse_move',
291
- coordinate,
292
- });
351
+ console.log('Sending mouse_move message to tab:', tabId, { coordinate });
352
+ try {
353
+ const response = await chrome.tabs.sendMessage(tabId, {
354
+ type: 'computer:mouse_move',
355
+ coordinate,
356
+ });
357
+ console.log('Got response:', response);
358
+ return response;
359
+ }
360
+ catch (e) {
361
+ console.error('Failed to send mouse_move message:', e);
362
+ throw e;
363
+ }
293
364
  }
294
365
  async function left_click(tabId, coordinate) {
295
- if (!coordinate) {
296
- coordinate = (await cursor_position(tabId)).coordinate;
366
+ console.log('Sending left_click message to tab:', tabId, { coordinate });
367
+ try {
368
+ if (!coordinate) {
369
+ coordinate = (await cursor_position(tabId)).coordinate;
370
+ }
371
+ const response = await chrome.tabs.sendMessage(tabId, {
372
+ type: 'computer:left_click',
373
+ coordinate,
374
+ });
375
+ console.log('Got response:', response);
376
+ return response;
377
+ }
378
+ catch (e) {
379
+ console.error('Failed to send left_click message:', e);
380
+ throw e;
297
381
  }
298
- return await chrome.tabs.sendMessage(tabId, {
299
- type: 'computer:left_click',
300
- coordinate,
301
- });
302
382
  }
303
383
  async function left_click_by(tabId, xpath, highlightIndex) {
304
- return await chrome.tabs.sendMessage(tabId, {
305
- type: 'computer:left_click',
306
- xpath,
307
- highlightIndex,
308
- });
384
+ console.log('Sending left_click_by message to tab:', tabId, { xpath, highlightIndex });
385
+ try {
386
+ const response = await chrome.tabs.sendMessage(tabId, {
387
+ type: 'computer:left_click',
388
+ xpath,
389
+ highlightIndex,
390
+ });
391
+ console.log('Got response:', response);
392
+ return response;
393
+ }
394
+ catch (e) {
395
+ console.error('Failed to send left_click_by message:', e);
396
+ throw e;
397
+ }
309
398
  }
310
399
  async function right_click(tabId, coordinate) {
311
- if (!coordinate) {
312
- coordinate = (await cursor_position(tabId)).coordinate;
400
+ console.log('Sending right_click message to tab:', tabId, { coordinate });
401
+ try {
402
+ if (!coordinate) {
403
+ coordinate = (await cursor_position(tabId)).coordinate;
404
+ }
405
+ const response = await chrome.tabs.sendMessage(tabId, {
406
+ type: 'computer:right_click',
407
+ coordinate,
408
+ });
409
+ console.log('Got response:', response);
410
+ return response;
411
+ }
412
+ catch (e) {
413
+ console.error('Failed to send right_click message:', e);
414
+ throw e;
313
415
  }
314
- return await chrome.tabs.sendMessage(tabId, {
315
- type: 'computer:right_click',
316
- coordinate,
317
- });
318
416
  }
319
417
  async function right_click_by(tabId, xpath, highlightIndex) {
320
- return await chrome.tabs.sendMessage(tabId, {
321
- type: 'computer:right_click',
322
- xpath,
323
- highlightIndex,
324
- });
418
+ console.log('Sending right_click_by message to tab:', tabId, { xpath, highlightIndex });
419
+ try {
420
+ const response = await chrome.tabs.sendMessage(tabId, {
421
+ type: 'computer:right_click',
422
+ xpath,
423
+ highlightIndex,
424
+ });
425
+ console.log('Got response:', response);
426
+ return response;
427
+ }
428
+ catch (e) {
429
+ console.error('Failed to send right_click_by message:', e);
430
+ throw e;
431
+ }
325
432
  }
326
433
  async function double_click(tabId, coordinate) {
327
- if (!coordinate) {
328
- coordinate = (await cursor_position(tabId)).coordinate;
434
+ console.log('Sending double_click message to tab:', tabId, { coordinate });
435
+ try {
436
+ if (!coordinate) {
437
+ coordinate = (await cursor_position(tabId)).coordinate;
438
+ }
439
+ const response = await chrome.tabs.sendMessage(tabId, {
440
+ type: 'computer:double_click',
441
+ coordinate,
442
+ });
443
+ console.log('Got response:', response);
444
+ return response;
445
+ }
446
+ catch (e) {
447
+ console.error('Failed to send double_click message:', e);
448
+ throw e;
329
449
  }
330
- return await chrome.tabs.sendMessage(tabId, {
331
- type: 'computer:double_click',
332
- coordinate,
333
- });
334
450
  }
335
451
  async function double_click_by(tabId, xpath, highlightIndex) {
336
- return await chrome.tabs.sendMessage(tabId, {
337
- type: 'computer:double_click',
338
- xpath,
339
- highlightIndex,
340
- });
452
+ console.log('Sending double_click_by message to tab:', tabId, { xpath, highlightIndex });
453
+ try {
454
+ const response = await chrome.tabs.sendMessage(tabId, {
455
+ type: 'computer:double_click',
456
+ xpath,
457
+ highlightIndex,
458
+ });
459
+ console.log('Got response:', response);
460
+ return response;
461
+ }
462
+ catch (e) {
463
+ console.error('Failed to send double_click_by message:', e);
464
+ throw e;
465
+ }
341
466
  }
342
- async function screenshot(windowId) {
343
- let dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
344
- format: 'jpeg', // jpeg / png
345
- quality: 50, // 0-100
346
- });
347
- let data = dataUrl.substring(dataUrl.indexOf('base64,') + 7);
348
- return {
349
- image: {
350
- type: 'base64',
351
- media_type: dataUrl.indexOf('image/png') > -1 ? 'image/png' : 'image/jpeg',
352
- data: data,
353
- },
354
- };
467
+ async function screenshot(windowId, compress) {
468
+ console.log('Taking screenshot of window:', windowId, { compress });
469
+ try {
470
+ let dataUrl;
471
+ if (compress) {
472
+ dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
473
+ format: 'jpeg',
474
+ quality: 60, // 0-100
475
+ });
476
+ dataUrl = await compress_image(dataUrl, 0.7, 1);
477
+ }
478
+ else {
479
+ dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
480
+ format: 'jpeg',
481
+ quality: 50,
482
+ });
483
+ }
484
+ let data = dataUrl.substring(dataUrl.indexOf('base64,') + 7);
485
+ const result = {
486
+ image: {
487
+ type: 'base64',
488
+ media_type: dataUrl.indexOf('image/png') > -1 ? 'image/png' : 'image/jpeg',
489
+ data: data,
490
+ },
491
+ };
492
+ console.log('Got screenshot result:', result);
493
+ return result;
494
+ }
495
+ catch (e) {
496
+ console.error('Failed to take screenshot:', e);
497
+ throw e;
498
+ }
499
+ }
500
+ async function compress_image(dataUrl, scale = 0.8, quality = 0.8) {
501
+ console.log('Compressing image', { scale, quality });
502
+ try {
503
+ const bitmap = await createImageBitmap(await (await fetch(dataUrl)).blob());
504
+ let width = bitmap.width * scale;
505
+ let height = bitmap.height * scale;
506
+ const canvas = new OffscreenCanvas(width, height);
507
+ const ctx = canvas.getContext('2d');
508
+ ctx.drawImage(bitmap, 0, 0, width, height);
509
+ const blob = await canvas.convertToBlob({
510
+ type: 'image/jpeg',
511
+ quality: quality,
512
+ });
513
+ return new Promise((resolve) => {
514
+ const reader = new FileReader();
515
+ reader.onloadend = () => {
516
+ const result = reader.result;
517
+ console.log('Got compressed image result:', result);
518
+ resolve(result);
519
+ };
520
+ reader.readAsDataURL(blob);
521
+ });
522
+ }
523
+ catch (e) {
524
+ console.error('Failed to compress image:', e);
525
+ throw e;
526
+ }
355
527
  }
356
528
  async function scroll_to(tabId, coordinate) {
357
- let from_coordinate = (await cursor_position(tabId)).coordinate;
358
- return await chrome.tabs.sendMessage(tabId, {
359
- type: 'computer:scroll_to',
360
- from_coordinate,
361
- to_coordinate: coordinate,
362
- });
529
+ console.log('Sending scroll_to message to tab:', tabId, { coordinate });
530
+ try {
531
+ let from_coordinate = (await cursor_position(tabId)).coordinate;
532
+ const response = await chrome.tabs.sendMessage(tabId, {
533
+ type: 'computer:scroll_to',
534
+ from_coordinate,
535
+ to_coordinate: coordinate,
536
+ });
537
+ console.log('Got response:', response);
538
+ return response;
539
+ }
540
+ catch (e) {
541
+ console.error('Failed to send scroll_to message:', e);
542
+ throw e;
543
+ }
363
544
  }
364
545
  async function scroll_to_by(tabId, xpath, highlightIndex) {
365
- return await chrome.tabs.sendMessage(tabId, {
366
- type: 'computer:scroll_to',
367
- xpath,
368
- highlightIndex,
369
- });
546
+ console.log('Sending scroll_to_by message to tab:', tabId, { xpath, highlightIndex });
547
+ try {
548
+ const response = await chrome.tabs.sendMessage(tabId, {
549
+ type: 'computer:scroll_to',
550
+ xpath,
551
+ highlightIndex,
552
+ });
553
+ console.log('Got response:', response);
554
+ return response;
555
+ }
556
+ catch (e) {
557
+ console.error('Failed to send scroll_to_by message:', e);
558
+ throw e;
559
+ }
370
560
  }
371
561
  async function get_dropdown_options(tabId, xpath, highlightIndex) {
372
- return await chrome.tabs.sendMessage(tabId, {
373
- type: 'computer:get_dropdown_options',
374
- xpath,
375
- highlightIndex,
376
- });
562
+ console.log('Sending get_dropdown_options message to tab:', tabId, { xpath, highlightIndex });
563
+ try {
564
+ const response = await chrome.tabs.sendMessage(tabId, {
565
+ type: 'computer:get_dropdown_options',
566
+ xpath,
567
+ highlightIndex,
568
+ });
569
+ console.log('Got response:', response);
570
+ return response;
571
+ }
572
+ catch (e) {
573
+ console.error('Failed to send get_dropdown_options message:', e);
574
+ throw e;
575
+ }
377
576
  }
378
577
  async function select_dropdown_option(tabId, text, xpath, highlightIndex) {
379
- return await chrome.tabs.sendMessage(tabId, {
380
- type: 'computer:select_dropdown_option',
381
- text,
382
- xpath,
383
- highlightIndex,
384
- });
578
+ console.log('Sending select_dropdown_option message to tab:', tabId, { text, xpath, highlightIndex });
579
+ try {
580
+ const response = await chrome.tabs.sendMessage(tabId, {
581
+ type: 'computer:select_dropdown_option',
582
+ text,
583
+ xpath,
584
+ highlightIndex,
585
+ });
586
+ console.log('Got response:', response);
587
+ return response;
588
+ }
589
+ catch (e) {
590
+ console.error('Failed to send select_dropdown_option message:', e);
591
+ throw e;
592
+ }
385
593
  }
386
594
  async function cursor_position(tabId) {
387
- let result = await chrome.tabs.sendMessage(tabId, {
388
- type: 'computer:cursor_position',
389
- });
390
- return { coordinate: result.coordinate };
595
+ console.log('Sending cursor_position message to tab:', tabId);
596
+ try {
597
+ let result = await chrome.tabs.sendMessage(tabId, {
598
+ type: 'computer:cursor_position',
599
+ });
600
+ console.log('Got cursor position:', result.coordinate);
601
+ return { coordinate: result.coordinate };
602
+ }
603
+ catch (e) {
604
+ console.error('Failed to send cursor_position message:', e);
605
+ throw e;
606
+ }
391
607
  }
392
608
  async function size(tabId) {
393
- return await getPageSize(tabId);
609
+ console.log('Getting page size for tab:', tabId);
610
+ try {
611
+ const pageSize = await getPageSize(tabId);
612
+ console.log('Got page size:', pageSize);
613
+ return pageSize;
614
+ }
615
+ catch (e) {
616
+ console.error('Failed to get page size:', e);
617
+ throw e;
618
+ }
394
619
  }
395
620
 
396
621
  var browser = /*#__PURE__*/Object.freeze({
397
622
  __proto__: null,
398
623
  clear_input: clear_input,
399
624
  clear_input_by: clear_input_by,
625
+ compress_image: compress_image,
400
626
  cursor_position: cursor_position,
401
627
  double_click: double_click,
402
628
  double_click_by: double_click_by,
@@ -423,7 +649,6 @@ class BrowserUse {
423
649
  this.name = 'browser_use';
424
650
  this.description = `Use structured commands to interact with the browser, manipulating page elements through screenshots and webpage element extraction.
425
651
  * This is a browser GUI interface where you need to analyze webpages by taking screenshots and extracting page element structures, and specify action sequences to complete designated tasks.
426
- * Some operations may need time to process, so you might need to wait and continuously take screenshots and extract element structures to check the operation results.
427
652
  * Before any operation, you must first call the \`screenshot_extract_element\` command, which will return the browser page screenshot and structured element information, both specially processed.
428
653
  * ELEMENT INTERACTION:
429
654
  - Only use indexes that exist in the provided element list
@@ -433,17 +658,7 @@ class BrowserUse {
433
658
  - If no suitable elements exist, use other functions to complete the task
434
659
  - If stuck, try alternative approaches
435
660
  - Handle popups/cookies by accepting or closing them
436
- - Use scroll to find elements you are looking for
437
- * Form filling:
438
- - If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
439
- * ACTION SEQUENCING:
440
- - Actions are executed in the order they appear in the list
441
- - Each action should logically follow from the previous one
442
- - If the page changes after an action, the sequence is interrupted and you get the new state.
443
- - If content only disappears the sequence continues.
444
- - Only provide the action sequence until you think the page will change.
445
- - Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
446
- - only use multiple actions if it makes sense.`;
661
+ - Use scroll to find elements you are looking for`;
447
662
  this.input_schema = {
448
663
  type: 'object',
449
664
  properties: {
@@ -498,7 +713,17 @@ class BrowserUse {
498
713
  if (params === null || !params.action) {
499
714
  throw new Error('Invalid parameters. Expected an object with a "action" property.');
500
715
  }
501
- let tabId = await getTabId(context);
716
+ let tabId;
717
+ try {
718
+ tabId = await getTabId(context);
719
+ if (!tabId || !Number.isInteger(tabId)) {
720
+ throw new Error('Could not get valid tab ID');
721
+ }
722
+ }
723
+ catch (e) {
724
+ console.error('Tab ID error:', e);
725
+ return { success: false, error: 'Could not access browser tab' };
726
+ }
502
727
  let windowId = await getWindowId(context);
503
728
  let selector_map = context.selector_map;
504
729
  let selector_xpath;
@@ -585,7 +810,7 @@ class BrowserUse {
585
810
  return window.get_clickable_elements(true);
586
811
  }, []);
587
812
  context.selector_map = element_result.selector_map;
588
- let screenshot$1 = await screenshot(windowId);
813
+ let screenshot$1 = await screenshot(windowId, true);
589
814
  await executeScript(tabId, () => {
590
815
  return window.remove_highlight();
591
816
  }, []);
@@ -602,6 +827,7 @@ class BrowserUse {
602
827
  }
603
828
  }
604
829
  catch (e) {
830
+ console.error('Browser use error:', e);
605
831
  return { success: false, error: e === null || e === void 0 ? void 0 : e.message };
606
832
  }
607
833
  }
@@ -817,7 +1043,7 @@ ${pseudoHtml}
817
1043
  async function executeWithBrowserUse$1(context, task_prompt) {
818
1044
  let tabId = await getTabId(context);
819
1045
  let windowId = await getWindowId(context);
820
- let screenshot_result = await screenshot(windowId);
1046
+ let screenshot_result = await screenshot(windowId, false);
821
1047
  let messages = [
822
1048
  {
823
1049
  role: 'user',
@@ -897,6 +1123,7 @@ class ExportFile {
897
1123
  * @returns > { success: true }
898
1124
  */
899
1125
  async execute(context, params) {
1126
+ var _a, _b, _c;
900
1127
  if (typeof params !== 'object' || params === null || !('content' in params)) {
901
1128
  throw new Error('Invalid parameters. Expected an object with a "content" property.');
902
1129
  }
@@ -931,8 +1158,8 @@ class ExportFile {
931
1158
  else {
932
1159
  filename = params.filename;
933
1160
  }
934
- let tabId = await getTabId(context);
935
1161
  try {
1162
+ let tabId = await getTabId(context);
936
1163
  await chrome.scripting.executeScript({
937
1164
  target: { tabId: tabId },
938
1165
  func: exportFile,
@@ -941,13 +1168,14 @@ class ExportFile {
941
1168
  }
942
1169
  catch (e) {
943
1170
  let tab = await open_new_tab('https://www.google.com', true);
944
- tabId = tab.id;
1171
+ (_c = (_b = (_a = context.callback) === null || _a === void 0 ? void 0 : _a.hooks) === null || _b === void 0 ? void 0 : _b.onTabCreated) === null || _c === void 0 ? void 0 : _c.call(_b, tab.id);
1172
+ let tabId = tab.id;
945
1173
  await chrome.scripting.executeScript({
946
1174
  target: { tabId: tabId },
947
1175
  func: exportFile,
948
1176
  args: [filename, type, params.content],
949
1177
  });
950
- await sleep(1000);
1178
+ await sleep(5000);
951
1179
  await chrome.tabs.remove(tabId);
952
1180
  }
953
1181
  return { success: true };
@@ -1065,7 +1293,7 @@ ${pseudoHtml}
1065
1293
  async function executeWithBrowserUse(context, task_prompt) {
1066
1294
  await getTabId(context);
1067
1295
  let windowId = await getWindowId(context);
1068
- let screenshot_result = await screenshot(windowId);
1296
+ let screenshot_result = await screenshot(windowId, false);
1069
1297
  let messages = [
1070
1298
  {
1071
1299
  role: 'user',
@@ -1113,6 +1341,48 @@ async function executeWithBrowserUse(context, task_prompt) {
1113
1341
  };
1114
1342
  }
1115
1343
 
1344
+ class GetAllTabs {
1345
+ constructor() {
1346
+ this.name = 'get_all_tabs';
1347
+ this.description = 'Get the tabId, title, url and content from current all tabs without opening new tab.';
1348
+ this.input_schema = {
1349
+ type: 'object',
1350
+ properties: {},
1351
+ };
1352
+ }
1353
+ async execute(context, params) {
1354
+ const currentWindow = await chrome.windows.getCurrent();
1355
+ const windowId = currentWindow.id;
1356
+ const tabs = await chrome.tabs.query({ windowId });
1357
+ const tabsInfo = [];
1358
+ for (const tab of tabs) {
1359
+ if (tab.id === undefined) {
1360
+ console.warn(`Tab ID is undefined for tab with URL: ${tab.url}`);
1361
+ continue;
1362
+ }
1363
+ await injectScript(tab.id);
1364
+ await sleep(500);
1365
+ let content = await executeScript(tab.id, () => {
1366
+ return eko.extractHtmlContent();
1367
+ }, []);
1368
+ // Use title as description, but requirement may evolve
1369
+ let description = tab.title ? tab.title : "No description available.";
1370
+ const tabInfo = {
1371
+ id: tab.id,
1372
+ url: tab.url,
1373
+ title: tab.title,
1374
+ content: content,
1375
+ description: description,
1376
+ };
1377
+ console.log("url: " + tab.url);
1378
+ console.log("title: " + tab.title);
1379
+ console.log("description: " + description);
1380
+ tabsInfo.push(tabInfo);
1381
+ }
1382
+ return tabsInfo;
1383
+ }
1384
+ }
1385
+
1116
1386
  /**
1117
1387
  * Open Url
1118
1388
  */
@@ -1142,6 +1412,7 @@ class OpenUrl {
1142
1412
  * @returns > { tabId, windowId, title, success: true }
1143
1413
  */
1144
1414
  async execute(context, params) {
1415
+ var _a, _b, _c, _d, _e, _f;
1145
1416
  if (typeof params !== 'object' || params === null || !params.url) {
1146
1417
  throw new Error('Invalid parameters. Expected an object with a "url" property.');
1147
1418
  }
@@ -1154,10 +1425,12 @@ class OpenUrl {
1154
1425
  let tab;
1155
1426
  if (newWindow) {
1156
1427
  tab = await open_new_tab(url, true);
1428
+ (_c = (_b = (_a = context.callback) === null || _a === void 0 ? void 0 : _a.hooks) === null || _b === void 0 ? void 0 : _b.onTabCreated) === null || _c === void 0 ? void 0 : _c.call(_b, tab.id);
1157
1429
  }
1158
1430
  else {
1159
1431
  let windowId = await getWindowId(context);
1160
1432
  tab = await open_new_tab(url, false, windowId);
1433
+ (_f = (_e = (_d = context.callback) === null || _d === void 0 ? void 0 : _d.hooks) === null || _e === void 0 ? void 0 : _e.onTabCreated) === null || _f === void 0 ? void 0 : _f.call(_e, tab.id);
1161
1434
  }
1162
1435
  let windowId = tab.windowId;
1163
1436
  let tabId = tab.id;
@@ -1214,9 +1487,9 @@ class TabManagement {
1214
1487
  this.input_schema = {
1215
1488
  type: 'object',
1216
1489
  properties: {
1217
- commond: {
1490
+ command: {
1218
1491
  type: 'string',
1219
- description: `The commond to perform. The available commonds are:
1492
+ description: `The command to perform. The available commands are:
1220
1493
  * \`tab_all\`: View all tabs and return the tabId and title.
1221
1494
  * \`current_tab\`: Get current tab information (tabId, url, title).
1222
1495
  * \`go_back\`: Go back to the previous page in the current tab.
@@ -1226,29 +1499,30 @@ class TabManagement {
1226
1499
  * \`new_tab [url]\`: Open a new tab window and open the URL, eg: \`new_tab https://www.google.com\``,
1227
1500
  },
1228
1501
  },
1229
- required: ['commond'],
1502
+ required: ['command'],
1230
1503
  };
1231
1504
  }
1232
1505
  /**
1233
1506
  * Tab management
1234
1507
  *
1235
- * @param {*} params { commond: `new_tab [url]` | 'tab_all' | 'current_tab' | 'go_back' | 'close_tab' | 'switch_tab [tabId]' | `change_url [url]` }
1508
+ * @param {*} params { command: `new_tab [url]` | 'tab_all' | 'current_tab' | 'go_back' | 'close_tab' | 'switch_tab [tabId]' | `change_url [url]` }
1236
1509
  * @returns > { result, success: true }
1237
1510
  */
1238
1511
  async execute(context, params) {
1239
- if (params === null || !params.commond) {
1240
- throw new Error('Invalid parameters. Expected an object with a "commond" property.');
1512
+ var _a, _b, _c, _d, _e, _f;
1513
+ if (params === null || !params.command) {
1514
+ throw new Error('Invalid parameters. Expected an object with a "command" property.');
1241
1515
  }
1242
1516
  let windowId = await getWindowId(context);
1243
- let commond = params.commond.trim();
1244
- if (commond.startsWith('`')) {
1245
- commond = commond.substring(1);
1517
+ let command = params.command.trim();
1518
+ if (command.startsWith('`')) {
1519
+ command = command.substring(1);
1246
1520
  }
1247
- if (commond.endsWith('`')) {
1248
- commond = commond.substring(0, commond.length - 1);
1521
+ if (command.endsWith('`')) {
1522
+ command = command.substring(0, command.length - 1);
1249
1523
  }
1250
1524
  let result;
1251
- if (commond == 'tab_all') {
1525
+ if (command == 'tab_all') {
1252
1526
  result = [];
1253
1527
  let tabs = await chrome.tabs.query({ windowId: windowId });
1254
1528
  for (let i = 0; i < tabs.length; i++) {
@@ -1265,20 +1539,20 @@ class TabManagement {
1265
1539
  result.push(tabInfo);
1266
1540
  }
1267
1541
  }
1268
- else if (commond == 'current_tab') {
1542
+ else if (command == 'current_tab') {
1269
1543
  let tabId = await getTabId(context);
1270
1544
  let tab = await chrome.tabs.get(tabId);
1271
1545
  let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1272
1546
  result = tabInfo;
1273
1547
  }
1274
- else if (commond == 'go_back') {
1548
+ else if (command == 'go_back') {
1275
1549
  let tabId = await getTabId(context);
1276
1550
  await chrome.tabs.goBack(tabId);
1277
1551
  let tab = await chrome.tabs.get(tabId);
1278
1552
  let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1279
1553
  result = tabInfo;
1280
1554
  }
1281
- else if (commond == 'close_tab') {
1555
+ else if (command == 'close_tab') {
1282
1556
  let closedTabId = await getTabId(context);
1283
1557
  await chrome.tabs.remove(closedTabId);
1284
1558
  await sleep(100);
@@ -1296,16 +1570,16 @@ class TabManagement {
1296
1570
  let closeTabInfo = { closedTabId, newTabId, newTabTitle: tab.title };
1297
1571
  result = closeTabInfo;
1298
1572
  }
1299
- else if (commond.startsWith('switch_tab')) {
1300
- let tabId = parseInt(commond.replace('switch_tab', '').replace('[', '').replace(']', ''));
1573
+ else if (command.startsWith('switch_tab')) {
1574
+ let tabId = parseInt(command.replace('switch_tab', '').replace('[', '').replace(']', ''));
1301
1575
  let tab = await chrome.tabs.update(tabId, { active: true });
1302
1576
  context.variables.set('tabId', tab.id);
1303
1577
  context.variables.set('windowId', tab.windowId);
1304
1578
  let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1305
1579
  result = tabInfo;
1306
1580
  }
1307
- else if (commond.startsWith('change_url')) {
1308
- let url = commond.substring('change_url'.length).replace('[', '').replace(']', '').trim();
1581
+ else if (command.startsWith('change_url')) {
1582
+ let url = command.substring('change_url'.length).replace('[', '').replace(']', '').trim();
1309
1583
  let tabId = await getTabId(context);
1310
1584
  // await chrome.tabs.update(tabId, { url: url });
1311
1585
  await executeScript(tabId, () => {
@@ -1315,17 +1589,19 @@ class TabManagement {
1315
1589
  let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1316
1590
  result = tabInfo;
1317
1591
  }
1318
- else if (commond.startsWith('new_tab')) {
1319
- let url = commond.replace('new_tab', '').replace('[', '').replace(']', '').replace(/"/g, '');
1592
+ else if (command.startsWith('new_tab')) {
1593
+ let url = command.replace('new_tab', '').replace('[', '').replace(']', '').replace(/"/g, '');
1320
1594
  // First mandatory opening of a new window
1321
1595
  let newWindow = !context.variables.get('windowId') && !context.variables.get('tabId');
1322
1596
  let tab;
1323
1597
  if (newWindow) {
1324
1598
  tab = await open_new_tab(url, true);
1599
+ (_c = (_b = (_a = context.callback) === null || _a === void 0 ? void 0 : _a.hooks) === null || _b === void 0 ? void 0 : _b.onTabCreated) === null || _c === void 0 ? void 0 : _c.call(_b, tab.id);
1325
1600
  }
1326
1601
  else {
1327
1602
  let windowId = await getWindowId(context);
1328
1603
  tab = await open_new_tab(url, false, windowId);
1604
+ (_f = (_e = (_d = context.callback) === null || _d === void 0 ? void 0 : _d.hooks) === null || _e === void 0 ? void 0 : _e.onTabCreated) === null || _f === void 0 ? void 0 : _f.call(_e, tab.id);
1329
1605
  }
1330
1606
  let windowId = tab.windowId;
1331
1607
  let tabId = tab.id;
@@ -1349,7 +1625,7 @@ class TabManagement {
1349
1625
  result = tabInfo;
1350
1626
  }
1351
1627
  else {
1352
- throw Error('Unknown commond: ' + commond);
1628
+ throw Error('Unknown command: ' + command);
1353
1629
  }
1354
1630
  return result;
1355
1631
  }
@@ -1369,7 +1645,7 @@ class TabManagement {
1369
1645
  class WebSearch {
1370
1646
  constructor() {
1371
1647
  this.name = 'web_search';
1372
- this.description = 'Use web search to return search results';
1648
+ this.description = 'Search the web based on keywords and return relevant extracted content from webpages.';
1373
1649
  this.input_schema = {
1374
1650
  type: 'object',
1375
1651
  properties: {
@@ -1404,7 +1680,7 @@ class WebSearch {
1404
1680
  }
1405
1681
  let taskId = new Date().getTime() + '';
1406
1682
  let searchs = [{ url: url, keyword: query }];
1407
- let searchInfo = await deepSearch(taskId, searchs, maxResults || 5);
1683
+ let searchInfo = await deepSearch(context, taskId, searchs, maxResults || 5);
1408
1684
  let links = ((_a = searchInfo.result[0]) === null || _a === void 0 ? void 0 : _a.links) || [];
1409
1685
  return links.filter((s) => s.content);
1410
1686
  }
@@ -1477,7 +1753,7 @@ chrome.tabs.onUpdated.addListener(async function (tabId, changeInfo, tab) {
1477
1753
  * @param {array} searchs search list => [{ url: 'https://bing.com', keyword: 'ai' }]
1478
1754
  * @param {number} detailsMaxNum Maximum crawling quantity per search detail page
1479
1755
  */
1480
- async function deepSearch(taskId, searchs, detailsMaxNum, window) {
1756
+ async function deepSearch(context, taskId, searchs, detailsMaxNum, window) {
1481
1757
  let closeWindow = false;
1482
1758
  if (!window) {
1483
1759
  // open new window
@@ -1490,9 +1766,9 @@ async function deepSearch(taskId, searchs, detailsMaxNum, window) {
1490
1766
  }
1491
1767
  // crawler the search page details page link
1492
1768
  // [{ links: [{ title, url }] }]
1493
- let detailLinkGroups = await doDetailLinkGroups(taskId, searchs, detailsMaxNum, window);
1769
+ let detailLinkGroups = await doDetailLinkGroups(context, taskId, searchs, detailsMaxNum, window);
1494
1770
  // crawler all details page content and comments
1495
- let searchInfo = await doPageContent(taskId, detailLinkGroups, window);
1771
+ let searchInfo = await doPageContent(context, taskId, detailLinkGroups, window);
1496
1772
  console.log('searchInfo: ', searchInfo);
1497
1773
  // close window
1498
1774
  closeWindow && chrome.windows.remove(window.id);
@@ -1507,7 +1783,8 @@ async function deepSearch(taskId, searchs, detailsMaxNum, window) {
1507
1783
  * @param {*} window
1508
1784
  * @returns [{ links: [{ title, url }] }]
1509
1785
  */
1510
- async function doDetailLinkGroups(taskId, searchs, detailsMaxNum, window) {
1786
+ async function doDetailLinkGroups(context, taskId, searchs, detailsMaxNum, window) {
1787
+ var _a, _b, _c;
1511
1788
  let detailLinkGroups = [];
1512
1789
  let countDownLatch = new CountDownLatch(searchs.length);
1513
1790
  for (let i = 0; i < searchs.length; i++) {
@@ -1519,6 +1796,7 @@ async function doDetailLinkGroups(taskId, searchs, detailsMaxNum, window) {
1519
1796
  url: url,
1520
1797
  windowId: window.id,
1521
1798
  });
1799
+ (_c = (_b = (_a = context.callback) === null || _a === void 0 ? void 0 : _a.hooks) === null || _b === void 0 ? void 0 : _b.onTabCreated) === null || _c === void 0 ? void 0 : _c.call(_b, tab.id);
1522
1800
  let eventId = taskId + '_' + i;
1523
1801
  // monitor Tab status
1524
1802
  tabsUpdateEvent.addListener(async function (obj) {
@@ -1569,7 +1847,8 @@ async function doDetailLinkGroups(taskId, searchs, detailsMaxNum, window) {
1569
1847
  * @param {*} window
1570
1848
  * @returns search info
1571
1849
  */
1572
- async function doPageContent(taskId, detailLinkGroups, window) {
1850
+ async function doPageContent(context, taskId, detailLinkGroups, window) {
1851
+ var _a, _b, _c;
1573
1852
  const searchInfo = {
1574
1853
  total: 0,
1575
1854
  running: 0,
@@ -1593,65 +1872,374 @@ async function doPageContent(taskId, detailLinkGroups, window) {
1593
1872
  url: link.url,
1594
1873
  windowId: window.id,
1595
1874
  });
1875
+ (_c = (_b = (_a = context.callback) === null || _a === void 0 ? void 0 : _a.hooks) === null || _b === void 0 ? void 0 : _b.onTabCreated) === null || _c === void 0 ? void 0 : _c.call(_b, tab.id);
1596
1876
  searchInfo.running++;
1597
1877
  let eventId = taskId + '_' + i + '_' + j;
1598
- // monitor Tab status
1599
- tabsUpdateEvent.addListener(async function (obj) {
1600
- if (obj.tabId != tab.id) {
1601
- return;
1602
- }
1603
- if (obj.changeInfo.status === 'complete') {
1604
- try {
1878
+ // Create a timeout promise
1879
+ const timeoutPromise = new Promise((_, reject) => {
1880
+ setTimeout(() => reject(new Error('Page load timeout')), 10000); // Timeout after 10 seconds
1881
+ });
1882
+ // Create a tab monitoring promise
1883
+ const monitorTabPromise = new Promise(async (resolve, reject) => {
1884
+ tabsUpdateEvent.addListener(async function onTabUpdated(obj) {
1885
+ if (obj.tabId !== tab.id)
1886
+ return;
1887
+ if (obj.changeInfo.status === 'complete') {
1605
1888
  tabsUpdateEvent.removeListener(eventId);
1606
- // inject js
1607
- await injectScript(tab.id, filename);
1608
- await sleep(1000);
1609
- // cralwer content and comments
1610
- // { title, content }
1611
- let result = await chrome.tabs.sendMessage(tab.id, {
1612
- type: 'page:getContent',
1613
- });
1614
- if (!result) {
1615
- throw Error('No Result');
1889
+ try {
1890
+ // Inject script and get page content
1891
+ await injectScript(tab.id, filename);
1892
+ await sleep(1000);
1893
+ let result = await chrome.tabs.sendMessage(tab.id, {
1894
+ type: 'page:getContent',
1895
+ });
1896
+ if (!result)
1897
+ throw new Error('No Result');
1898
+ link.content = result.content;
1899
+ link.page_title = result.title;
1900
+ searchInfo.succeed++;
1901
+ resolve(); // Resolve the promise if successful
1902
+ }
1903
+ catch (error) {
1904
+ searchInfo.failed++;
1905
+ searchInfo.failedLinks.push(link);
1906
+ reject(error); // Reject the promise on error
1907
+ }
1908
+ finally {
1909
+ searchInfo.running--;
1910
+ countDownLatch.countDown();
1911
+ chrome.tabs.remove(tab.id);
1912
+ tabsUpdateEvent.removeListener(eventId);
1616
1913
  }
1617
- link.content = result.content;
1618
- link.page_title = result.title;
1619
- searchInfo.succeed++;
1620
- }
1621
- catch (e) {
1622
- searchInfo.failed++;
1623
- searchInfo.failedLinks.push(link);
1624
- console.error(link.title + ' crawler error', link.url, e);
1625
1914
  }
1626
- finally {
1915
+ else if (obj.changeInfo.status === 'unloaded') {
1627
1916
  searchInfo.running--;
1628
1917
  countDownLatch.countDown();
1629
1918
  chrome.tabs.remove(tab.id);
1630
1919
  tabsUpdateEvent.removeListener(eventId);
1920
+ reject(new Error('Tab unloaded')); // Reject if the tab is unloaded
1631
1921
  }
1632
- }
1633
- else if (obj.changeInfo.status === 'unloaded') {
1634
- searchInfo.running--;
1635
- countDownLatch.countDown();
1636
- chrome.tabs.remove(tab.id);
1637
- tabsUpdateEvent.removeListener(eventId);
1638
- }
1639
- }, eventId);
1922
+ }, eventId);
1923
+ });
1924
+ // Use Promise.race to enforce the timeout
1925
+ try {
1926
+ await Promise.race([monitorTabPromise, timeoutPromise]);
1927
+ }
1928
+ catch (e) {
1929
+ console.error(`${link.title} failed:`, e);
1930
+ searchInfo.running--;
1931
+ searchInfo.failed++;
1932
+ searchInfo.failedLinks.push(link);
1933
+ countDownLatch.countDown();
1934
+ chrome.tabs.remove(tab.id); // Clean up tab on failure
1935
+ }
1640
1936
  }
1641
1937
  }
1642
1938
  await countDownLatch.await(60000);
1643
1939
  return searchInfo;
1644
1940
  }
1645
1941
 
1942
+ class RequestLogin {
1943
+ constructor() {
1944
+ this.name = 'request_login';
1945
+ this.description =
1946
+ 'Login to this website, assist with identity verification when manual intervention is needed, guide users through the login process, and wait for their confirmation of successful login.';
1947
+ this.input_schema = {
1948
+ type: 'object',
1949
+ properties: {},
1950
+ };
1951
+ }
1952
+ async execute(context, params) {
1953
+ if (!params.force && await this.isLoginIn(context)) {
1954
+ return true;
1955
+ }
1956
+ let tabId = await getTabId(context);
1957
+ let task_id = 'login_required_' + tabId;
1958
+ const request_user_help = async () => {
1959
+ await chrome.tabs.sendMessage(tabId, {
1960
+ type: 'request_user_help',
1961
+ task_id,
1962
+ failure_type: 'login_required',
1963
+ failure_message: 'Access page require user authentication.',
1964
+ });
1965
+ };
1966
+ const login_interval = setInterval(async () => {
1967
+ try {
1968
+ request_user_help();
1969
+ }
1970
+ catch (e) {
1971
+ clearInterval(login_interval);
1972
+ }
1973
+ }, 2000);
1974
+ try {
1975
+ return await this.awaitLogin(tabId, task_id);
1976
+ }
1977
+ finally {
1978
+ clearInterval(login_interval);
1979
+ }
1980
+ }
1981
+ async awaitLogin(tabId, task_id) {
1982
+ return new Promise((resolve) => {
1983
+ const checkTabClosedInterval = setInterval(async () => {
1984
+ const tabExists = await doesTabExists(tabId);
1985
+ if (!tabExists) {
1986
+ clearInterval(checkTabClosedInterval);
1987
+ resolve(false);
1988
+ chrome.runtime.onMessage.removeListener(listener);
1989
+ }
1990
+ }, 1000);
1991
+ const listener = (message) => {
1992
+ if (message.type === 'issue_resolved' && message.task_id === task_id) {
1993
+ resolve(true);
1994
+ clearInterval(checkTabClosedInterval);
1995
+ }
1996
+ };
1997
+ chrome.runtime.onMessage.addListener(listener);
1998
+ });
1999
+ }
2000
+ async isLoginIn(context) {
2001
+ let windowId = await getWindowId(context);
2002
+ let screenshot_result = await screenshot(windowId, true);
2003
+ let messages = [
2004
+ {
2005
+ role: 'user',
2006
+ content: [
2007
+ {
2008
+ type: 'image',
2009
+ source: screenshot_result.image,
2010
+ },
2011
+ {
2012
+ type: 'text',
2013
+ text: 'Check if the current website is logged in. If not logged in, output `NOT_LOGIN`. If logged in, output `LOGGED_IN`. Output directly without explanation.',
2014
+ },
2015
+ ],
2016
+ },
2017
+ ];
2018
+ let response = await context.llmProvider.generateText(messages, { maxTokens: 256 });
2019
+ let text = response.textContent;
2020
+ if (!text) {
2021
+ text = JSON.stringify(response.content);
2022
+ }
2023
+ return text.indexOf('LOGGED_IN') > -1;
2024
+ }
2025
+ }
2026
+
2027
+ class CancelWorkflow {
2028
+ constructor() {
2029
+ this.name = 'cancel_workflow';
2030
+ this.description = 'Cancel the workflow. If any tool consistently encounters exceptions, invoke this tool to cancel the workflow.';
2031
+ this.input_schema = {
2032
+ type: 'object',
2033
+ properties: {
2034
+ reason: {
2035
+ type: 'string',
2036
+ description: 'Why the workflow should be cancelled.',
2037
+ },
2038
+ },
2039
+ required: ['reason'],
2040
+ };
2041
+ }
2042
+ async execute(context, params) {
2043
+ var _a;
2044
+ if (typeof params !== 'object' || params === null || !params.reason) {
2045
+ throw new Error('Invalid parameters. Expected an object with a "reason" property.');
2046
+ }
2047
+ const reason = params.reason;
2048
+ console.log("The workflow has been cancelled because: " + reason);
2049
+ await ((_a = context.workflow) === null || _a === void 0 ? void 0 : _a.cancel());
2050
+ return;
2051
+ }
2052
+ }
2053
+
2054
+ class HumanInputText {
2055
+ constructor() {
2056
+ this.name = 'human_input_text';
2057
+ this.description = 'When you are unsure about the details of your next action, call me and ask the user for details in the "question" field. The user will provide you with a text as an answer.';
2058
+ this.input_schema = {
2059
+ type: 'object',
2060
+ properties: {
2061
+ question: {
2062
+ type: 'string',
2063
+ description: 'Ask the user here.',
2064
+ },
2065
+ },
2066
+ required: ['question'],
2067
+ };
2068
+ }
2069
+ async execute(context, params) {
2070
+ var _a, _b, _c;
2071
+ if (typeof params !== 'object' || params === null || !params.question) {
2072
+ throw new Error('Invalid parameters. Expected an object with a "question" property.');
2073
+ }
2074
+ const question = params.question;
2075
+ console.log("question: " + question);
2076
+ let answer = await ((_c = (_a = context.callback) === null || _a === void 0 ? void 0 : (_b = _a.hooks).onHumanInputText) === null || _c === void 0 ? void 0 : _c.call(_b, question));
2077
+ if (!answer) {
2078
+ console.error("Cannot get user's answer.");
2079
+ return { status: "Error: Cannot get user's answer.", answer: "" };
2080
+ }
2081
+ else {
2082
+ console.log("answer: " + answer);
2083
+ return { status: "OK", answer: answer };
2084
+ }
2085
+ }
2086
+ }
2087
+ class HumanInputSingleChoice {
2088
+ constructor() {
2089
+ this.name = 'human_input_single_choice';
2090
+ this.description = 'When you are unsure about the details of your next action, call me and ask the user for details in the "question" field with at least 2 choices. The user will provide you with ONE choice as an answer.';
2091
+ this.input_schema = {
2092
+ type: 'object',
2093
+ properties: {
2094
+ question: {
2095
+ type: 'string',
2096
+ description: 'Ask the user here.',
2097
+ },
2098
+ choices: {
2099
+ type: 'array',
2100
+ description: 'All of the choices.',
2101
+ }
2102
+ },
2103
+ required: ['question', 'choices'],
2104
+ };
2105
+ }
2106
+ async execute(context, params) {
2107
+ var _a, _b, _c;
2108
+ if (typeof params !== 'object' || params === null || !params.question || !params.choices) {
2109
+ throw new Error('Invalid parameters. Expected an object with a "question" and "choices" property.');
2110
+ }
2111
+ const question = params.question;
2112
+ const choices = params.choices;
2113
+ console.log("question: " + question);
2114
+ console.log("choices: " + choices);
2115
+ let answer = await ((_c = (_a = context.callback) === null || _a === void 0 ? void 0 : (_b = _a.hooks).onHumanInputSingleChoice) === null || _c === void 0 ? void 0 : _c.call(_b, question, choices));
2116
+ if (!answer) {
2117
+ console.error("Cannot get user's answer.");
2118
+ return { status: "Error: Cannot get user's answer.", answer: "" };
2119
+ }
2120
+ else {
2121
+ console.log("answer: " + answer);
2122
+ return { status: "OK", answer: answer };
2123
+ }
2124
+ }
2125
+ }
2126
+ class HumanInputMultipleChoice {
2127
+ constructor() {
2128
+ this.name = 'human_input_multiple_choice';
2129
+ this.description = 'When you are unsure about the details of your next action, call me and ask the user for details in the "question" field with at least 2 choices. The user will provide you with ONE or MORE choice as an answer.';
2130
+ this.input_schema = {
2131
+ type: 'object',
2132
+ properties: {
2133
+ question: {
2134
+ type: 'string',
2135
+ description: 'Ask the user here.',
2136
+ },
2137
+ choices: {
2138
+ type: 'array',
2139
+ description: 'All of the choices.',
2140
+ }
2141
+ },
2142
+ required: ['question', 'choices'],
2143
+ };
2144
+ }
2145
+ async execute(context, params) {
2146
+ var _a, _b, _c;
2147
+ if (typeof params !== 'object' || params === null || !params.question || !params.choices) {
2148
+ throw new Error('Invalid parameters. Expected an object with a "question" and "choices" property.');
2149
+ }
2150
+ const question = params.question;
2151
+ const choices = params.choices;
2152
+ console.log("question: " + question);
2153
+ console.log("choices: " + choices);
2154
+ let answer = await ((_c = (_a = context.callback) === null || _a === void 0 ? void 0 : (_b = _a.hooks).onHumanInputMultipleChoice) === null || _c === void 0 ? void 0 : _c.call(_b, question, choices));
2155
+ if (!answer) {
2156
+ console.error("Cannot get user's answer.");
2157
+ return { status: "Error: Cannot get user's answer.", answer: [] };
2158
+ }
2159
+ else {
2160
+ console.log("answer: " + answer);
2161
+ return { status: "OK", answer: answer };
2162
+ }
2163
+ }
2164
+ }
2165
+ class HumanOperate {
2166
+ constructor() {
2167
+ this.name = 'human_operate';
2168
+ this.description = 'When you encounter operations that require login, CAPTCHA verification, or other tasks that you cannot complete, please call this tool, transfer control to the user, and explain why.';
2169
+ this.input_schema = {
2170
+ type: 'object',
2171
+ properties: {
2172
+ reason: {
2173
+ type: 'string',
2174
+ description: 'The reason why you need to transfer control.',
2175
+ },
2176
+ },
2177
+ required: ['reason'],
2178
+ };
2179
+ }
2180
+ async execute(context, params) {
2181
+ var _a, _b, _c;
2182
+ if (typeof params !== 'object' || params === null || !params.reason) {
2183
+ throw new Error('Invalid parameters. Expected an object with a "reason" property.');
2184
+ }
2185
+ const reason = params.reason;
2186
+ console.log("reason: " + reason);
2187
+ let userOperation = await ((_c = (_a = context.callback) === null || _a === void 0 ? void 0 : (_b = _a.hooks).onHumanOperate) === null || _c === void 0 ? void 0 : _c.call(_b, reason));
2188
+ if (!userOperation) {
2189
+ console.error("Cannot get user's operation.");
2190
+ return { status: "Error: Cannot get user's operation.", userOperation: "" };
2191
+ }
2192
+ else {
2193
+ console.log("userOperation: " + userOperation);
2194
+ return { status: "OK", userOperation: userOperation };
2195
+ }
2196
+ }
2197
+ }
2198
+
2199
+ class SummaryWorkflow {
2200
+ constructor() {
2201
+ this.name = 'summary_workflow';
2202
+ this.description = 'Summarize what this workflow has done from start to finish using an ordered list .';
2203
+ this.input_schema = {
2204
+ type: 'object',
2205
+ properties: {
2206
+ summary: {
2207
+ type: 'string',
2208
+ description: 'Your summary in markdown format.',
2209
+ },
2210
+ },
2211
+ required: ['summary'],
2212
+ };
2213
+ }
2214
+ async execute(context, params) {
2215
+ var _a, _b, _c;
2216
+ if (typeof params !== 'object' || params === null || !params.summary) {
2217
+ throw new Error('Invalid parameters. Expected an object with a "summary" property.');
2218
+ }
2219
+ const summary = params.summary;
2220
+ console.log("summary: " + summary);
2221
+ await ((_c = (_a = context.callback) === null || _a === void 0 ? void 0 : (_b = _a.hooks).onSummaryWorkflow) === null || _c === void 0 ? void 0 : _c.call(_b, summary));
2222
+ return { status: "OK" };
2223
+ }
2224
+ }
2225
+
1646
2226
  var tools = /*#__PURE__*/Object.freeze({
1647
2227
  __proto__: null,
1648
2228
  BrowserUse: BrowserUse,
2229
+ CancelWorkflow: CancelWorkflow,
1649
2230
  ElementClick: ElementClick,
1650
2231
  ExportFile: ExportFile,
1651
2232
  ExtractContent: ExtractContent,
1652
2233
  FindElementPosition: FindElementPosition,
2234
+ GetAllTabs: GetAllTabs,
2235
+ HumanInputMultipleChoice: HumanInputMultipleChoice,
2236
+ HumanInputSingleChoice: HumanInputSingleChoice,
2237
+ HumanInputText: HumanInputText,
2238
+ HumanOperate: HumanOperate,
1653
2239
  OpenUrl: OpenUrl,
2240
+ RequestLogin: RequestLogin,
1654
2241
  Screenshot: Screenshot,
2242
+ SummaryWorkflow: SummaryWorkflow,
1655
2243
  TabManagement: TabManagement,
1656
2244
  WebSearch: WebSearch
1657
2245
  });