@eko-ai/eko 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +72 -21
  2. package/dist/core/eko.d.ts +3 -2
  3. package/dist/extension/content/index.d.ts +1 -0
  4. package/dist/extension/tools/browser.d.ts +2 -1
  5. package/dist/extension/tools/get_all_tabs.d.ts +9 -0
  6. package/dist/extension/tools/index.d.ts +4 -1
  7. package/dist/extension/tools/request_login.d.ts +10 -0
  8. package/dist/extension/tools/tab_management.d.ts +1 -1
  9. package/dist/extension/utils.d.ts +2 -1
  10. package/dist/extension.cjs.js +797 -209
  11. package/dist/extension.esm.js +797 -209
  12. package/dist/extension_content_script.js +129 -2
  13. package/dist/index.cjs.js +518 -114
  14. package/dist/index.d.ts +2 -1
  15. package/dist/index.esm.js +518 -115
  16. package/dist/models/action.d.ts +9 -4
  17. package/dist/models/workflow.d.ts +8 -3
  18. package/dist/nodejs/script/build_dom_tree.d.ts +1 -0
  19. package/dist/nodejs/tools/browser_use.d.ts +28 -0
  20. package/dist/nodejs/tools/index.d.ts +2 -0
  21. package/dist/nodejs.cjs.js +71638 -12
  22. package/dist/nodejs.esm.js +71632 -6
  23. package/dist/schemas/workflow.schema.d.ts +2 -13
  24. package/dist/services/llm/claude-provider.d.ts +2 -1
  25. package/dist/services/llm/openai-provider.d.ts +2 -1
  26. package/dist/services/parser/workflow-parser.d.ts +0 -7
  27. package/dist/types/action.types.d.ts +8 -3
  28. package/dist/types/tools.types.d.ts +44 -1
  29. package/dist/types/workflow.types.d.ts +22 -9
  30. package/dist/universal_tools/cancel_workflow.d.ts +9 -0
  31. package/dist/universal_tools/human.d.ts +30 -0
  32. package/dist/universal_tools/index.d.ts +4 -0
  33. package/dist/universal_tools/summary_workflow.d.ts +9 -0
  34. package/dist/utils/execution-logger.d.ts +69 -0
  35. package/dist/web/tools/browser.d.ts +2 -1
  36. package/dist/web.cjs.js +29 -17
  37. package/dist/web.esm.js +29 -17
  38. package/package.json +6 -9
@@ -28,12 +28,21 @@ async function getWindowId(context) {
28
28
  async function getTabId(context) {
29
29
  let tabId = context.variables.get('tabId');
30
30
  if (tabId) {
31
- try {
32
- await chrome.tabs.get(tabId);
33
- }
34
- catch (e) {
35
- tabId = null;
31
+ // Ensure tabId is a number
32
+ tabId = Number(tabId);
33
+ // Check if it's a valid integer
34
+ if (!Number.isInteger(tabId)) {
36
35
  context.variables.delete('tabId');
36
+ tabId = null;
37
+ }
38
+ else {
39
+ try {
40
+ await chrome.tabs.get(tabId);
41
+ }
42
+ catch (e) {
43
+ tabId = null;
44
+ context.variables.delete('tabId');
45
+ }
37
46
  }
38
47
  }
39
48
  if (!tabId) {
@@ -50,28 +59,31 @@ async function getTabId(context) {
50
59
  else {
51
60
  tabId = await getCurrentTabId();
52
61
  }
62
+ if (!tabId) {
63
+ throw new Error('Could not find a valid tab');
64
+ }
65
+ context.variables.set('tabId', tabId);
53
66
  }
54
67
  return tabId;
55
68
  }
56
- function getCurrentTabId(windowId) {
57
- return new Promise((resolve) => {
58
- chrome.tabs.query({ windowId, active: true, lastFocusedWindow: true }, function (tabs) {
59
- if (tabs.length > 0) {
60
- resolve(tabs[0].id);
69
+ async function getCurrentTabId(windowId) {
70
+ return new Promise((resolve, reject) => {
71
+ chrome.tabs.query({ active: true, currentWindow: true }, function (tabs) {
72
+ if (chrome.runtime.lastError) {
73
+ console.error('Chrome runtime error:', chrome.runtime.lastError);
74
+ reject(chrome.runtime.lastError);
75
+ return;
61
76
  }
62
- else {
63
- chrome.tabs.query({ windowId, active: true, currentWindow: true }, function (_tabs) {
64
- if (_tabs.length > 0) {
65
- resolve(_tabs[0].id);
66
- return;
67
- }
68
- else {
69
- chrome.tabs.query({ windowId, status: 'complete', currentWindow: true }, function (__tabs) {
70
- resolve(__tabs.length ? __tabs[__tabs.length - 1].id : undefined);
71
- });
72
- }
73
- });
77
+ if (!tabs || tabs.length === 0) {
78
+ reject(new Error('No active tab found'));
79
+ return;
74
80
  }
81
+ const tabId = tabs[0].id;
82
+ if (typeof tabId !== 'number') {
83
+ reject(new Error('Invalid tab ID'));
84
+ return;
85
+ }
86
+ resolve(tabId);
75
87
  });
76
88
  });
77
89
  }
@@ -136,6 +148,19 @@ async function waitForTabComplete(tabId, timeout = 15000) {
136
148
  chrome.tabs.onUpdated.addListener(listener);
137
149
  });
138
150
  }
151
+ async function doesTabExists(tabId) {
152
+ const tabExists = await new Promise((resolve) => {
153
+ chrome.tabs.get(tabId, (tab) => {
154
+ if (chrome.runtime.lastError) {
155
+ resolve(false);
156
+ }
157
+ else {
158
+ resolve(true);
159
+ }
160
+ });
161
+ });
162
+ return tabExists;
163
+ }
139
164
  async function getPageSize(tabId) {
140
165
  if (!tabId) {
141
166
  tabId = await getCurrentTabId();
@@ -233,6 +258,7 @@ var utils = /*#__PURE__*/Object.freeze({
233
258
  __proto__: null,
234
259
  CountDownLatch: CountDownLatch,
235
260
  MsgEvent: MsgEvent,
261
+ doesTabExists: doesTabExists,
236
262
  executeScript: executeScript,
237
263
  getCurrentTabId: getCurrentTabId,
238
264
  getPageSize: getPageSize,
@@ -246,155 +272,355 @@ var utils = /*#__PURE__*/Object.freeze({
246
272
  });
247
273
 
248
274
  async function type(tabId, text, coordinate) {
249
- if (!coordinate) {
250
- coordinate = (await cursor_position(tabId)).coordinate;
275
+ console.log('Sending type message to tab:', tabId, { text, coordinate });
276
+ try {
277
+ if (!coordinate) {
278
+ coordinate = (await cursor_position(tabId)).coordinate;
279
+ }
280
+ await mouse_move(tabId, coordinate);
281
+ const response = await chrome.tabs.sendMessage(tabId, {
282
+ type: 'computer:type',
283
+ text,
284
+ coordinate,
285
+ });
286
+ console.log('Got response:', response);
287
+ return response;
288
+ }
289
+ catch (e) {
290
+ console.error('Failed to send type message:', e);
291
+ throw e;
251
292
  }
252
- await mouse_move(tabId, coordinate);
253
- return await chrome.tabs.sendMessage(tabId, {
254
- type: 'computer:type',
255
- text,
256
- coordinate,
257
- });
258
293
  }
259
294
  async function type_by(tabId, text, xpath, highlightIndex) {
260
- return await chrome.tabs.sendMessage(tabId, {
261
- type: 'computer:type',
262
- text,
263
- xpath,
264
- highlightIndex,
265
- });
295
+ console.log('Sending type message to tab:', tabId, { text, xpath, highlightIndex });
296
+ try {
297
+ const response = await chrome.tabs.sendMessage(tabId, {
298
+ type: 'computer:type',
299
+ text,
300
+ xpath,
301
+ highlightIndex,
302
+ });
303
+ console.log('Got response:', response);
304
+ return response;
305
+ }
306
+ catch (e) {
307
+ console.error('Failed to send type message:', e);
308
+ throw e;
309
+ }
266
310
  }
267
311
  async function clear_input(tabId, coordinate) {
268
- if (!coordinate) {
269
- coordinate = (await cursor_position(tabId)).coordinate;
312
+ console.log('Sending clear_input message to tab:', tabId, { coordinate });
313
+ try {
314
+ if (!coordinate) {
315
+ coordinate = (await cursor_position(tabId)).coordinate;
316
+ }
317
+ await mouse_move(tabId, coordinate);
318
+ const response = await chrome.tabs.sendMessage(tabId, {
319
+ type: 'computer:type',
320
+ text: '',
321
+ coordinate,
322
+ });
323
+ console.log('Got response:', response);
324
+ return response;
325
+ }
326
+ catch (e) {
327
+ console.error('Failed to send clear_input message:', e);
328
+ throw e;
270
329
  }
271
- await mouse_move(tabId, coordinate);
272
- return await chrome.tabs.sendMessage(tabId, {
273
- type: 'computer:type',
274
- text: '',
275
- coordinate,
276
- });
277
330
  }
278
331
  async function clear_input_by(tabId, xpath, highlightIndex) {
279
- return await chrome.tabs.sendMessage(tabId, {
280
- type: 'computer:type',
281
- text: '',
282
- xpath,
283
- highlightIndex,
284
- });
332
+ console.log('Sending clear_input_by message to tab:', tabId, { xpath, highlightIndex });
333
+ try {
334
+ const response = await chrome.tabs.sendMessage(tabId, {
335
+ type: 'computer:type',
336
+ text: '',
337
+ xpath,
338
+ highlightIndex,
339
+ });
340
+ console.log('Got response:', response);
341
+ return response;
342
+ }
343
+ catch (e) {
344
+ console.error('Failed to send clear_input_by message:', e);
345
+ throw e;
346
+ }
285
347
  }
286
348
  async function mouse_move(tabId, coordinate) {
287
- return await chrome.tabs.sendMessage(tabId, {
288
- type: 'computer:mouse_move',
289
- coordinate,
290
- });
349
+ console.log('Sending mouse_move message to tab:', tabId, { coordinate });
350
+ try {
351
+ const response = await chrome.tabs.sendMessage(tabId, {
352
+ type: 'computer:mouse_move',
353
+ coordinate,
354
+ });
355
+ console.log('Got response:', response);
356
+ return response;
357
+ }
358
+ catch (e) {
359
+ console.error('Failed to send mouse_move message:', e);
360
+ throw e;
361
+ }
291
362
  }
292
363
  async function left_click(tabId, coordinate) {
293
- if (!coordinate) {
294
- coordinate = (await cursor_position(tabId)).coordinate;
364
+ console.log('Sending left_click message to tab:', tabId, { coordinate });
365
+ try {
366
+ if (!coordinate) {
367
+ coordinate = (await cursor_position(tabId)).coordinate;
368
+ }
369
+ const response = await chrome.tabs.sendMessage(tabId, {
370
+ type: 'computer:left_click',
371
+ coordinate,
372
+ });
373
+ console.log('Got response:', response);
374
+ return response;
375
+ }
376
+ catch (e) {
377
+ console.error('Failed to send left_click message:', e);
378
+ throw e;
295
379
  }
296
- return await chrome.tabs.sendMessage(tabId, {
297
- type: 'computer:left_click',
298
- coordinate,
299
- });
300
380
  }
301
381
  async function left_click_by(tabId, xpath, highlightIndex) {
302
- return await chrome.tabs.sendMessage(tabId, {
303
- type: 'computer:left_click',
304
- xpath,
305
- highlightIndex,
306
- });
382
+ console.log('Sending left_click_by message to tab:', tabId, { xpath, highlightIndex });
383
+ try {
384
+ const response = await chrome.tabs.sendMessage(tabId, {
385
+ type: 'computer:left_click',
386
+ xpath,
387
+ highlightIndex,
388
+ });
389
+ console.log('Got response:', response);
390
+ return response;
391
+ }
392
+ catch (e) {
393
+ console.error('Failed to send left_click_by message:', e);
394
+ throw e;
395
+ }
307
396
  }
308
397
  async function right_click(tabId, coordinate) {
309
- if (!coordinate) {
310
- coordinate = (await cursor_position(tabId)).coordinate;
398
+ console.log('Sending right_click message to tab:', tabId, { coordinate });
399
+ try {
400
+ if (!coordinate) {
401
+ coordinate = (await cursor_position(tabId)).coordinate;
402
+ }
403
+ const response = await chrome.tabs.sendMessage(tabId, {
404
+ type: 'computer:right_click',
405
+ coordinate,
406
+ });
407
+ console.log('Got response:', response);
408
+ return response;
409
+ }
410
+ catch (e) {
411
+ console.error('Failed to send right_click message:', e);
412
+ throw e;
311
413
  }
312
- return await chrome.tabs.sendMessage(tabId, {
313
- type: 'computer:right_click',
314
- coordinate,
315
- });
316
414
  }
317
415
  async function right_click_by(tabId, xpath, highlightIndex) {
318
- return await chrome.tabs.sendMessage(tabId, {
319
- type: 'computer:right_click',
320
- xpath,
321
- highlightIndex,
322
- });
416
+ console.log('Sending right_click_by message to tab:', tabId, { xpath, highlightIndex });
417
+ try {
418
+ const response = await chrome.tabs.sendMessage(tabId, {
419
+ type: 'computer:right_click',
420
+ xpath,
421
+ highlightIndex,
422
+ });
423
+ console.log('Got response:', response);
424
+ return response;
425
+ }
426
+ catch (e) {
427
+ console.error('Failed to send right_click_by message:', e);
428
+ throw e;
429
+ }
323
430
  }
324
431
  async function double_click(tabId, coordinate) {
325
- if (!coordinate) {
326
- coordinate = (await cursor_position(tabId)).coordinate;
432
+ console.log('Sending double_click message to tab:', tabId, { coordinate });
433
+ try {
434
+ if (!coordinate) {
435
+ coordinate = (await cursor_position(tabId)).coordinate;
436
+ }
437
+ const response = await chrome.tabs.sendMessage(tabId, {
438
+ type: 'computer:double_click',
439
+ coordinate,
440
+ });
441
+ console.log('Got response:', response);
442
+ return response;
443
+ }
444
+ catch (e) {
445
+ console.error('Failed to send double_click message:', e);
446
+ throw e;
327
447
  }
328
- return await chrome.tabs.sendMessage(tabId, {
329
- type: 'computer:double_click',
330
- coordinate,
331
- });
332
448
  }
333
449
  async function double_click_by(tabId, xpath, highlightIndex) {
334
- return await chrome.tabs.sendMessage(tabId, {
335
- type: 'computer:double_click',
336
- xpath,
337
- highlightIndex,
338
- });
450
+ console.log('Sending double_click_by message to tab:', tabId, { xpath, highlightIndex });
451
+ try {
452
+ const response = await chrome.tabs.sendMessage(tabId, {
453
+ type: 'computer:double_click',
454
+ xpath,
455
+ highlightIndex,
456
+ });
457
+ console.log('Got response:', response);
458
+ return response;
459
+ }
460
+ catch (e) {
461
+ console.error('Failed to send double_click_by message:', e);
462
+ throw e;
463
+ }
339
464
  }
340
- async function screenshot(windowId) {
341
- let dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
342
- format: 'jpeg', // jpeg / png
343
- quality: 50, // 0-100
344
- });
345
- let data = dataUrl.substring(dataUrl.indexOf('base64,') + 7);
346
- return {
347
- image: {
348
- type: 'base64',
349
- media_type: dataUrl.indexOf('image/png') > -1 ? 'image/png' : 'image/jpeg',
350
- data: data,
351
- },
352
- };
465
+ async function screenshot(windowId, compress) {
466
+ console.log('Taking screenshot of window:', windowId, { compress });
467
+ try {
468
+ let dataUrl;
469
+ if (compress) {
470
+ dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
471
+ format: 'jpeg',
472
+ quality: 60, // 0-100
473
+ });
474
+ dataUrl = await compress_image(dataUrl, 0.7, 1);
475
+ }
476
+ else {
477
+ dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
478
+ format: 'jpeg',
479
+ quality: 50,
480
+ });
481
+ }
482
+ let data = dataUrl.substring(dataUrl.indexOf('base64,') + 7);
483
+ const result = {
484
+ image: {
485
+ type: 'base64',
486
+ media_type: dataUrl.indexOf('image/png') > -1 ? 'image/png' : 'image/jpeg',
487
+ data: data,
488
+ },
489
+ };
490
+ console.log('Got screenshot result:', result);
491
+ return result;
492
+ }
493
+ catch (e) {
494
+ console.error('Failed to take screenshot:', e);
495
+ throw e;
496
+ }
497
+ }
498
+ async function compress_image(dataUrl, scale = 0.8, quality = 0.8) {
499
+ console.log('Compressing image', { scale, quality });
500
+ try {
501
+ const bitmap = await createImageBitmap(await (await fetch(dataUrl)).blob());
502
+ let width = bitmap.width * scale;
503
+ let height = bitmap.height * scale;
504
+ const canvas = new OffscreenCanvas(width, height);
505
+ const ctx = canvas.getContext('2d');
506
+ ctx.drawImage(bitmap, 0, 0, width, height);
507
+ const blob = await canvas.convertToBlob({
508
+ type: 'image/jpeg',
509
+ quality: quality,
510
+ });
511
+ return new Promise((resolve) => {
512
+ const reader = new FileReader();
513
+ reader.onloadend = () => {
514
+ const result = reader.result;
515
+ console.log('Got compressed image result:', result);
516
+ resolve(result);
517
+ };
518
+ reader.readAsDataURL(blob);
519
+ });
520
+ }
521
+ catch (e) {
522
+ console.error('Failed to compress image:', e);
523
+ throw e;
524
+ }
353
525
  }
354
526
  async function scroll_to(tabId, coordinate) {
355
- let from_coordinate = (await cursor_position(tabId)).coordinate;
356
- return await chrome.tabs.sendMessage(tabId, {
357
- type: 'computer:scroll_to',
358
- from_coordinate,
359
- to_coordinate: coordinate,
360
- });
527
+ console.log('Sending scroll_to message to tab:', tabId, { coordinate });
528
+ try {
529
+ let from_coordinate = (await cursor_position(tabId)).coordinate;
530
+ const response = await chrome.tabs.sendMessage(tabId, {
531
+ type: 'computer:scroll_to',
532
+ from_coordinate,
533
+ to_coordinate: coordinate,
534
+ });
535
+ console.log('Got response:', response);
536
+ return response;
537
+ }
538
+ catch (e) {
539
+ console.error('Failed to send scroll_to message:', e);
540
+ throw e;
541
+ }
361
542
  }
362
543
  async function scroll_to_by(tabId, xpath, highlightIndex) {
363
- return await chrome.tabs.sendMessage(tabId, {
364
- type: 'computer:scroll_to',
365
- xpath,
366
- highlightIndex,
367
- });
544
+ console.log('Sending scroll_to_by message to tab:', tabId, { xpath, highlightIndex });
545
+ try {
546
+ const response = await chrome.tabs.sendMessage(tabId, {
547
+ type: 'computer:scroll_to',
548
+ xpath,
549
+ highlightIndex,
550
+ });
551
+ console.log('Got response:', response);
552
+ return response;
553
+ }
554
+ catch (e) {
555
+ console.error('Failed to send scroll_to_by message:', e);
556
+ throw e;
557
+ }
368
558
  }
369
559
  async function get_dropdown_options(tabId, xpath, highlightIndex) {
370
- return await chrome.tabs.sendMessage(tabId, {
371
- type: 'computer:get_dropdown_options',
372
- xpath,
373
- highlightIndex,
374
- });
560
+ console.log('Sending get_dropdown_options message to tab:', tabId, { xpath, highlightIndex });
561
+ try {
562
+ const response = await chrome.tabs.sendMessage(tabId, {
563
+ type: 'computer:get_dropdown_options',
564
+ xpath,
565
+ highlightIndex,
566
+ });
567
+ console.log('Got response:', response);
568
+ return response;
569
+ }
570
+ catch (e) {
571
+ console.error('Failed to send get_dropdown_options message:', e);
572
+ throw e;
573
+ }
375
574
  }
376
575
  async function select_dropdown_option(tabId, text, xpath, highlightIndex) {
377
- return await chrome.tabs.sendMessage(tabId, {
378
- type: 'computer:select_dropdown_option',
379
- text,
380
- xpath,
381
- highlightIndex,
382
- });
576
+ console.log('Sending select_dropdown_option message to tab:', tabId, { text, xpath, highlightIndex });
577
+ try {
578
+ const response = await chrome.tabs.sendMessage(tabId, {
579
+ type: 'computer:select_dropdown_option',
580
+ text,
581
+ xpath,
582
+ highlightIndex,
583
+ });
584
+ console.log('Got response:', response);
585
+ return response;
586
+ }
587
+ catch (e) {
588
+ console.error('Failed to send select_dropdown_option message:', e);
589
+ throw e;
590
+ }
383
591
  }
384
592
  async function cursor_position(tabId) {
385
- let result = await chrome.tabs.sendMessage(tabId, {
386
- type: 'computer:cursor_position',
387
- });
388
- return { coordinate: result.coordinate };
593
+ console.log('Sending cursor_position message to tab:', tabId);
594
+ try {
595
+ let result = await chrome.tabs.sendMessage(tabId, {
596
+ type: 'computer:cursor_position',
597
+ });
598
+ console.log('Got cursor position:', result.coordinate);
599
+ return { coordinate: result.coordinate };
600
+ }
601
+ catch (e) {
602
+ console.error('Failed to send cursor_position message:', e);
603
+ throw e;
604
+ }
389
605
  }
390
606
  async function size(tabId) {
391
- return await getPageSize(tabId);
607
+ console.log('Getting page size for tab:', tabId);
608
+ try {
609
+ const pageSize = await getPageSize(tabId);
610
+ console.log('Got page size:', pageSize);
611
+ return pageSize;
612
+ }
613
+ catch (e) {
614
+ console.error('Failed to get page size:', e);
615
+ throw e;
616
+ }
392
617
  }
393
618
 
394
619
  var browser = /*#__PURE__*/Object.freeze({
395
620
  __proto__: null,
396
621
  clear_input: clear_input,
397
622
  clear_input_by: clear_input_by,
623
+ compress_image: compress_image,
398
624
  cursor_position: cursor_position,
399
625
  double_click: double_click,
400
626
  double_click_by: double_click_by,
@@ -421,7 +647,6 @@ class BrowserUse {
421
647
  this.name = 'browser_use';
422
648
  this.description = `Use structured commands to interact with the browser, manipulating page elements through screenshots and webpage element extraction.
423
649
  * This is a browser GUI interface where you need to analyze webpages by taking screenshots and extracting page element structures, and specify action sequences to complete designated tasks.
424
- * Some operations may need time to process, so you might need to wait and continuously take screenshots and extract element structures to check the operation results.
425
650
  * Before any operation, you must first call the \`screenshot_extract_element\` command, which will return the browser page screenshot and structured element information, both specially processed.
426
651
  * ELEMENT INTERACTION:
427
652
  - Only use indexes that exist in the provided element list
@@ -431,17 +656,7 @@ class BrowserUse {
431
656
  - If no suitable elements exist, use other functions to complete the task
432
657
  - If stuck, try alternative approaches
433
658
  - Handle popups/cookies by accepting or closing them
434
- - Use scroll to find elements you are looking for
435
- * Form filling:
436
- - If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
437
- * ACTION SEQUENCING:
438
- - Actions are executed in the order they appear in the list
439
- - Each action should logically follow from the previous one
440
- - If the page changes after an action, the sequence is interrupted and you get the new state.
441
- - If content only disappears the sequence continues.
442
- - Only provide the action sequence until you think the page will change.
443
- - Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
444
- - only use multiple actions if it makes sense.`;
659
+ - Use scroll to find elements you are looking for`;
445
660
  this.input_schema = {
446
661
  type: 'object',
447
662
  properties: {
@@ -496,7 +711,17 @@ class BrowserUse {
496
711
  if (params === null || !params.action) {
497
712
  throw new Error('Invalid parameters. Expected an object with a "action" property.');
498
713
  }
499
- let tabId = await getTabId(context);
714
+ let tabId;
715
+ try {
716
+ tabId = await getTabId(context);
717
+ if (!tabId || !Number.isInteger(tabId)) {
718
+ throw new Error('Could not get valid tab ID');
719
+ }
720
+ }
721
+ catch (e) {
722
+ console.error('Tab ID error:', e);
723
+ return { success: false, error: 'Could not access browser tab' };
724
+ }
500
725
  let windowId = await getWindowId(context);
501
726
  let selector_map = context.selector_map;
502
727
  let selector_xpath;
@@ -583,7 +808,7 @@ class BrowserUse {
583
808
  return window.get_clickable_elements(true);
584
809
  }, []);
585
810
  context.selector_map = element_result.selector_map;
586
- let screenshot$1 = await screenshot(windowId);
811
+ let screenshot$1 = await screenshot(windowId, true);
587
812
  await executeScript(tabId, () => {
588
813
  return window.remove_highlight();
589
814
  }, []);
@@ -600,6 +825,7 @@ class BrowserUse {
600
825
  }
601
826
  }
602
827
  catch (e) {
828
+ console.error('Browser use error:', e);
603
829
  return { success: false, error: e === null || e === void 0 ? void 0 : e.message };
604
830
  }
605
831
  }
@@ -815,7 +1041,7 @@ ${pseudoHtml}
815
1041
  async function executeWithBrowserUse$1(context, task_prompt) {
816
1042
  let tabId = await getTabId(context);
817
1043
  let windowId = await getWindowId(context);
818
- let screenshot_result = await screenshot(windowId);
1044
+ let screenshot_result = await screenshot(windowId, false);
819
1045
  let messages = [
820
1046
  {
821
1047
  role: 'user',
@@ -895,6 +1121,7 @@ class ExportFile {
895
1121
  * @returns > { success: true }
896
1122
  */
897
1123
  async execute(context, params) {
1124
+ var _a, _b, _c;
898
1125
  if (typeof params !== 'object' || params === null || !('content' in params)) {
899
1126
  throw new Error('Invalid parameters. Expected an object with a "content" property.');
900
1127
  }
@@ -929,8 +1156,8 @@ class ExportFile {
929
1156
  else {
930
1157
  filename = params.filename;
931
1158
  }
932
- let tabId = await getTabId(context);
933
1159
  try {
1160
+ let tabId = await getTabId(context);
934
1161
  await chrome.scripting.executeScript({
935
1162
  target: { tabId: tabId },
936
1163
  func: exportFile,
@@ -939,13 +1166,14 @@ class ExportFile {
939
1166
  }
940
1167
  catch (e) {
941
1168
  let tab = await open_new_tab('https://www.google.com', true);
942
- tabId = tab.id;
1169
+ (_c = (_b = (_a = context.callback) === null || _a === void 0 ? void 0 : _a.hooks) === null || _b === void 0 ? void 0 : _b.onTabCreated) === null || _c === void 0 ? void 0 : _c.call(_b, tab.id);
1170
+ let tabId = tab.id;
943
1171
  await chrome.scripting.executeScript({
944
1172
  target: { tabId: tabId },
945
1173
  func: exportFile,
946
1174
  args: [filename, type, params.content],
947
1175
  });
948
- await sleep(1000);
1176
+ await sleep(5000);
949
1177
  await chrome.tabs.remove(tabId);
950
1178
  }
951
1179
  return { success: true };
@@ -1063,7 +1291,7 @@ ${pseudoHtml}
1063
1291
  async function executeWithBrowserUse(context, task_prompt) {
1064
1292
  await getTabId(context);
1065
1293
  let windowId = await getWindowId(context);
1066
- let screenshot_result = await screenshot(windowId);
1294
+ let screenshot_result = await screenshot(windowId, false);
1067
1295
  let messages = [
1068
1296
  {
1069
1297
  role: 'user',
@@ -1111,6 +1339,48 @@ async function executeWithBrowserUse(context, task_prompt) {
1111
1339
  };
1112
1340
  }
1113
1341
 
1342
+ class GetAllTabs {
1343
+ constructor() {
1344
+ this.name = 'get_all_tabs';
1345
+ this.description = 'Get the tabId, title, url and content from current all tabs without opening new tab.';
1346
+ this.input_schema = {
1347
+ type: 'object',
1348
+ properties: {},
1349
+ };
1350
+ }
1351
+ async execute(context, params) {
1352
+ const currentWindow = await chrome.windows.getCurrent();
1353
+ const windowId = currentWindow.id;
1354
+ const tabs = await chrome.tabs.query({ windowId });
1355
+ const tabsInfo = [];
1356
+ for (const tab of tabs) {
1357
+ if (tab.id === undefined) {
1358
+ console.warn(`Tab ID is undefined for tab with URL: ${tab.url}`);
1359
+ continue;
1360
+ }
1361
+ await injectScript(tab.id);
1362
+ await sleep(500);
1363
+ let content = await executeScript(tab.id, () => {
1364
+ return eko.extractHtmlContent();
1365
+ }, []);
1366
+ // Use title as description, but requirement may evolve
1367
+ let description = tab.title ? tab.title : "No description available.";
1368
+ const tabInfo = {
1369
+ id: tab.id,
1370
+ url: tab.url,
1371
+ title: tab.title,
1372
+ content: content,
1373
+ description: description,
1374
+ };
1375
+ console.log("url: " + tab.url);
1376
+ console.log("title: " + tab.title);
1377
+ console.log("description: " + description);
1378
+ tabsInfo.push(tabInfo);
1379
+ }
1380
+ return tabsInfo;
1381
+ }
1382
+ }
1383
+
1114
1384
  /**
1115
1385
  * Open Url
1116
1386
  */
@@ -1140,6 +1410,7 @@ class OpenUrl {
1140
1410
  * @returns > { tabId, windowId, title, success: true }
1141
1411
  */
1142
1412
  async execute(context, params) {
1413
+ var _a, _b, _c, _d, _e, _f;
1143
1414
  if (typeof params !== 'object' || params === null || !params.url) {
1144
1415
  throw new Error('Invalid parameters. Expected an object with a "url" property.');
1145
1416
  }
@@ -1152,10 +1423,12 @@ class OpenUrl {
1152
1423
  let tab;
1153
1424
  if (newWindow) {
1154
1425
  tab = await open_new_tab(url, true);
1426
+ (_c = (_b = (_a = context.callback) === null || _a === void 0 ? void 0 : _a.hooks) === null || _b === void 0 ? void 0 : _b.onTabCreated) === null || _c === void 0 ? void 0 : _c.call(_b, tab.id);
1155
1427
  }
1156
1428
  else {
1157
1429
  let windowId = await getWindowId(context);
1158
1430
  tab = await open_new_tab(url, false, windowId);
1431
+ (_f = (_e = (_d = context.callback) === null || _d === void 0 ? void 0 : _d.hooks) === null || _e === void 0 ? void 0 : _e.onTabCreated) === null || _f === void 0 ? void 0 : _f.call(_e, tab.id);
1159
1432
  }
1160
1433
  let windowId = tab.windowId;
1161
1434
  let tabId = tab.id;
@@ -1212,9 +1485,9 @@ class TabManagement {
1212
1485
  this.input_schema = {
1213
1486
  type: 'object',
1214
1487
  properties: {
1215
- commond: {
1488
+ command: {
1216
1489
  type: 'string',
1217
- description: `The commond to perform. The available commonds are:
1490
+ description: `The command to perform. The available commands are:
1218
1491
  * \`tab_all\`: View all tabs and return the tabId and title.
1219
1492
  * \`current_tab\`: Get current tab information (tabId, url, title).
1220
1493
  * \`go_back\`: Go back to the previous page in the current tab.
@@ -1224,29 +1497,30 @@ class TabManagement {
1224
1497
  * \`new_tab [url]\`: Open a new tab window and open the URL, eg: \`new_tab https://www.google.com\``,
1225
1498
  },
1226
1499
  },
1227
- required: ['commond'],
1500
+ required: ['command'],
1228
1501
  };
1229
1502
  }
1230
1503
  /**
1231
1504
  * Tab management
1232
1505
  *
1233
- * @param {*} params { commond: `new_tab [url]` | 'tab_all' | 'current_tab' | 'go_back' | 'close_tab' | 'switch_tab [tabId]' | `change_url [url]` }
1506
+ * @param {*} params { command: `new_tab [url]` | 'tab_all' | 'current_tab' | 'go_back' | 'close_tab' | 'switch_tab [tabId]' | `change_url [url]` }
1234
1507
  * @returns > { result, success: true }
1235
1508
  */
1236
1509
  async execute(context, params) {
1237
- if (params === null || !params.commond) {
1238
- throw new Error('Invalid parameters. Expected an object with a "commond" property.');
1510
+ var _a, _b, _c, _d, _e, _f;
1511
+ if (params === null || !params.command) {
1512
+ throw new Error('Invalid parameters. Expected an object with a "command" property.');
1239
1513
  }
1240
1514
  let windowId = await getWindowId(context);
1241
- let commond = params.commond.trim();
1242
- if (commond.startsWith('`')) {
1243
- commond = commond.substring(1);
1515
+ let command = params.command.trim();
1516
+ if (command.startsWith('`')) {
1517
+ command = command.substring(1);
1244
1518
  }
1245
- if (commond.endsWith('`')) {
1246
- commond = commond.substring(0, commond.length - 1);
1519
+ if (command.endsWith('`')) {
1520
+ command = command.substring(0, command.length - 1);
1247
1521
  }
1248
1522
  let result;
1249
- if (commond == 'tab_all') {
1523
+ if (command == 'tab_all') {
1250
1524
  result = [];
1251
1525
  let tabs = await chrome.tabs.query({ windowId: windowId });
1252
1526
  for (let i = 0; i < tabs.length; i++) {
@@ -1263,20 +1537,20 @@ class TabManagement {
1263
1537
  result.push(tabInfo);
1264
1538
  }
1265
1539
  }
1266
- else if (commond == 'current_tab') {
1540
+ else if (command == 'current_tab') {
1267
1541
  let tabId = await getTabId(context);
1268
1542
  let tab = await chrome.tabs.get(tabId);
1269
1543
  let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1270
1544
  result = tabInfo;
1271
1545
  }
1272
- else if (commond == 'go_back') {
1546
+ else if (command == 'go_back') {
1273
1547
  let tabId = await getTabId(context);
1274
1548
  await chrome.tabs.goBack(tabId);
1275
1549
  let tab = await chrome.tabs.get(tabId);
1276
1550
  let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1277
1551
  result = tabInfo;
1278
1552
  }
1279
- else if (commond == 'close_tab') {
1553
+ else if (command == 'close_tab') {
1280
1554
  let closedTabId = await getTabId(context);
1281
1555
  await chrome.tabs.remove(closedTabId);
1282
1556
  await sleep(100);
@@ -1294,16 +1568,16 @@ class TabManagement {
1294
1568
  let closeTabInfo = { closedTabId, newTabId, newTabTitle: tab.title };
1295
1569
  result = closeTabInfo;
1296
1570
  }
1297
- else if (commond.startsWith('switch_tab')) {
1298
- let tabId = parseInt(commond.replace('switch_tab', '').replace('[', '').replace(']', ''));
1571
+ else if (command.startsWith('switch_tab')) {
1572
+ let tabId = parseInt(command.replace('switch_tab', '').replace('[', '').replace(']', ''));
1299
1573
  let tab = await chrome.tabs.update(tabId, { active: true });
1300
1574
  context.variables.set('tabId', tab.id);
1301
1575
  context.variables.set('windowId', tab.windowId);
1302
1576
  let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1303
1577
  result = tabInfo;
1304
1578
  }
1305
- else if (commond.startsWith('change_url')) {
1306
- let url = commond.substring('change_url'.length).replace('[', '').replace(']', '').trim();
1579
+ else if (command.startsWith('change_url')) {
1580
+ let url = command.substring('change_url'.length).replace('[', '').replace(']', '').trim();
1307
1581
  let tabId = await getTabId(context);
1308
1582
  // await chrome.tabs.update(tabId, { url: url });
1309
1583
  await executeScript(tabId, () => {
@@ -1313,17 +1587,19 @@ class TabManagement {
1313
1587
  let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1314
1588
  result = tabInfo;
1315
1589
  }
1316
- else if (commond.startsWith('new_tab')) {
1317
- let url = commond.replace('new_tab', '').replace('[', '').replace(']', '').replace(/"/g, '');
1590
+ else if (command.startsWith('new_tab')) {
1591
+ let url = command.replace('new_tab', '').replace('[', '').replace(']', '').replace(/"/g, '');
1318
1592
  // First mandatory opening of a new window
1319
1593
  let newWindow = !context.variables.get('windowId') && !context.variables.get('tabId');
1320
1594
  let tab;
1321
1595
  if (newWindow) {
1322
1596
  tab = await open_new_tab(url, true);
1597
+ (_c = (_b = (_a = context.callback) === null || _a === void 0 ? void 0 : _a.hooks) === null || _b === void 0 ? void 0 : _b.onTabCreated) === null || _c === void 0 ? void 0 : _c.call(_b, tab.id);
1323
1598
  }
1324
1599
  else {
1325
1600
  let windowId = await getWindowId(context);
1326
1601
  tab = await open_new_tab(url, false, windowId);
1602
+ (_f = (_e = (_d = context.callback) === null || _d === void 0 ? void 0 : _d.hooks) === null || _e === void 0 ? void 0 : _e.onTabCreated) === null || _f === void 0 ? void 0 : _f.call(_e, tab.id);
1327
1603
  }
1328
1604
  let windowId = tab.windowId;
1329
1605
  let tabId = tab.id;
@@ -1347,7 +1623,7 @@ class TabManagement {
1347
1623
  result = tabInfo;
1348
1624
  }
1349
1625
  else {
1350
- throw Error('Unknown commond: ' + commond);
1626
+ throw Error('Unknown command: ' + command);
1351
1627
  }
1352
1628
  return result;
1353
1629
  }
@@ -1367,7 +1643,7 @@ class TabManagement {
1367
1643
  class WebSearch {
1368
1644
  constructor() {
1369
1645
  this.name = 'web_search';
1370
- this.description = 'Use web search to return search results';
1646
+ this.description = 'Search the web based on keywords and return relevant extracted content from webpages.';
1371
1647
  this.input_schema = {
1372
1648
  type: 'object',
1373
1649
  properties: {
@@ -1402,7 +1678,7 @@ class WebSearch {
1402
1678
  }
1403
1679
  let taskId = new Date().getTime() + '';
1404
1680
  let searchs = [{ url: url, keyword: query }];
1405
- let searchInfo = await deepSearch(taskId, searchs, maxResults || 5);
1681
+ let searchInfo = await deepSearch(context, taskId, searchs, maxResults || 5);
1406
1682
  let links = ((_a = searchInfo.result[0]) === null || _a === void 0 ? void 0 : _a.links) || [];
1407
1683
  return links.filter((s) => s.content);
1408
1684
  }
@@ -1475,7 +1751,7 @@ chrome.tabs.onUpdated.addListener(async function (tabId, changeInfo, tab) {
1475
1751
  * @param {array} searchs search list => [{ url: 'https://bing.com', keyword: 'ai' }]
1476
1752
  * @param {number} detailsMaxNum Maximum crawling quantity per search detail page
1477
1753
  */
1478
- async function deepSearch(taskId, searchs, detailsMaxNum, window) {
1754
+ async function deepSearch(context, taskId, searchs, detailsMaxNum, window) {
1479
1755
  let closeWindow = false;
1480
1756
  if (!window) {
1481
1757
  // open new window
@@ -1488,9 +1764,9 @@ async function deepSearch(taskId, searchs, detailsMaxNum, window) {
1488
1764
  }
1489
1765
  // crawler the search page details page link
1490
1766
  // [{ links: [{ title, url }] }]
1491
- let detailLinkGroups = await doDetailLinkGroups(taskId, searchs, detailsMaxNum, window);
1767
+ let detailLinkGroups = await doDetailLinkGroups(context, taskId, searchs, detailsMaxNum, window);
1492
1768
  // crawler all details page content and comments
1493
- let searchInfo = await doPageContent(taskId, detailLinkGroups, window);
1769
+ let searchInfo = await doPageContent(context, taskId, detailLinkGroups, window);
1494
1770
  console.log('searchInfo: ', searchInfo);
1495
1771
  // close window
1496
1772
  closeWindow && chrome.windows.remove(window.id);
@@ -1505,7 +1781,8 @@ async function deepSearch(taskId, searchs, detailsMaxNum, window) {
1505
1781
  * @param {*} window
1506
1782
  * @returns [{ links: [{ title, url }] }]
1507
1783
  */
1508
- async function doDetailLinkGroups(taskId, searchs, detailsMaxNum, window) {
1784
+ async function doDetailLinkGroups(context, taskId, searchs, detailsMaxNum, window) {
1785
+ var _a, _b, _c;
1509
1786
  let detailLinkGroups = [];
1510
1787
  let countDownLatch = new CountDownLatch(searchs.length);
1511
1788
  for (let i = 0; i < searchs.length; i++) {
@@ -1517,6 +1794,7 @@ async function doDetailLinkGroups(taskId, searchs, detailsMaxNum, window) {
1517
1794
  url: url,
1518
1795
  windowId: window.id,
1519
1796
  });
1797
+ (_c = (_b = (_a = context.callback) === null || _a === void 0 ? void 0 : _a.hooks) === null || _b === void 0 ? void 0 : _b.onTabCreated) === null || _c === void 0 ? void 0 : _c.call(_b, tab.id);
1520
1798
  let eventId = taskId + '_' + i;
1521
1799
  // monitor Tab status
1522
1800
  tabsUpdateEvent.addListener(async function (obj) {
@@ -1567,7 +1845,8 @@ async function doDetailLinkGroups(taskId, searchs, detailsMaxNum, window) {
1567
1845
  * @param {*} window
1568
1846
  * @returns search info
1569
1847
  */
1570
- async function doPageContent(taskId, detailLinkGroups, window) {
1848
+ async function doPageContent(context, taskId, detailLinkGroups, window) {
1849
+ var _a, _b, _c;
1571
1850
  const searchInfo = {
1572
1851
  total: 0,
1573
1852
  running: 0,
@@ -1591,65 +1870,374 @@ async function doPageContent(taskId, detailLinkGroups, window) {
1591
1870
  url: link.url,
1592
1871
  windowId: window.id,
1593
1872
  });
1873
+ (_c = (_b = (_a = context.callback) === null || _a === void 0 ? void 0 : _a.hooks) === null || _b === void 0 ? void 0 : _b.onTabCreated) === null || _c === void 0 ? void 0 : _c.call(_b, tab.id);
1594
1874
  searchInfo.running++;
1595
1875
  let eventId = taskId + '_' + i + '_' + j;
1596
- // monitor Tab status
1597
- tabsUpdateEvent.addListener(async function (obj) {
1598
- if (obj.tabId != tab.id) {
1599
- return;
1600
- }
1601
- if (obj.changeInfo.status === 'complete') {
1602
- try {
1876
+ // Create a timeout promise
1877
+ const timeoutPromise = new Promise((_, reject) => {
1878
+ setTimeout(() => reject(new Error('Page load timeout')), 10000); // Timeout after 10 seconds
1879
+ });
1880
+ // Create a tab monitoring promise
1881
+ const monitorTabPromise = new Promise(async (resolve, reject) => {
1882
+ tabsUpdateEvent.addListener(async function onTabUpdated(obj) {
1883
+ if (obj.tabId !== tab.id)
1884
+ return;
1885
+ if (obj.changeInfo.status === 'complete') {
1603
1886
  tabsUpdateEvent.removeListener(eventId);
1604
- // inject js
1605
- await injectScript(tab.id, filename);
1606
- await sleep(1000);
1607
- // cralwer content and comments
1608
- // { title, content }
1609
- let result = await chrome.tabs.sendMessage(tab.id, {
1610
- type: 'page:getContent',
1611
- });
1612
- if (!result) {
1613
- throw Error('No Result');
1887
+ try {
1888
+ // Inject script and get page content
1889
+ await injectScript(tab.id, filename);
1890
+ await sleep(1000);
1891
+ let result = await chrome.tabs.sendMessage(tab.id, {
1892
+ type: 'page:getContent',
1893
+ });
1894
+ if (!result)
1895
+ throw new Error('No Result');
1896
+ link.content = result.content;
1897
+ link.page_title = result.title;
1898
+ searchInfo.succeed++;
1899
+ resolve(); // Resolve the promise if successful
1900
+ }
1901
+ catch (error) {
1902
+ searchInfo.failed++;
1903
+ searchInfo.failedLinks.push(link);
1904
+ reject(error); // Reject the promise on error
1905
+ }
1906
+ finally {
1907
+ searchInfo.running--;
1908
+ countDownLatch.countDown();
1909
+ chrome.tabs.remove(tab.id);
1910
+ tabsUpdateEvent.removeListener(eventId);
1614
1911
  }
1615
- link.content = result.content;
1616
- link.page_title = result.title;
1617
- searchInfo.succeed++;
1618
- }
1619
- catch (e) {
1620
- searchInfo.failed++;
1621
- searchInfo.failedLinks.push(link);
1622
- console.error(link.title + ' crawler error', link.url, e);
1623
1912
  }
1624
- finally {
1913
+ else if (obj.changeInfo.status === 'unloaded') {
1625
1914
  searchInfo.running--;
1626
1915
  countDownLatch.countDown();
1627
1916
  chrome.tabs.remove(tab.id);
1628
1917
  tabsUpdateEvent.removeListener(eventId);
1918
+ reject(new Error('Tab unloaded')); // Reject if the tab is unloaded
1629
1919
  }
1630
- }
1631
- else if (obj.changeInfo.status === 'unloaded') {
1632
- searchInfo.running--;
1633
- countDownLatch.countDown();
1634
- chrome.tabs.remove(tab.id);
1635
- tabsUpdateEvent.removeListener(eventId);
1636
- }
1637
- }, eventId);
1920
+ }, eventId);
1921
+ });
1922
+ // Use Promise.race to enforce the timeout
1923
+ try {
1924
+ await Promise.race([monitorTabPromise, timeoutPromise]);
1925
+ }
1926
+ catch (e) {
1927
+ console.error(`${link.title} failed:`, e);
1928
+ searchInfo.running--;
1929
+ searchInfo.failed++;
1930
+ searchInfo.failedLinks.push(link);
1931
+ countDownLatch.countDown();
1932
+ chrome.tabs.remove(tab.id); // Clean up tab on failure
1933
+ }
1638
1934
  }
1639
1935
  }
1640
1936
  await countDownLatch.await(60000);
1641
1937
  return searchInfo;
1642
1938
  }
1643
1939
 
1940
+ class RequestLogin {
1941
+ constructor() {
1942
+ this.name = 'request_login';
1943
+ this.description =
1944
+ 'Login to this website, assist with identity verification when manual intervention is needed, guide users through the login process, and wait for their confirmation of successful login.';
1945
+ this.input_schema = {
1946
+ type: 'object',
1947
+ properties: {},
1948
+ };
1949
+ }
1950
+ async execute(context, params) {
1951
+ if (!params.force && await this.isLoginIn(context)) {
1952
+ return true;
1953
+ }
1954
+ let tabId = await getTabId(context);
1955
+ let task_id = 'login_required_' + tabId;
1956
+ const request_user_help = async () => {
1957
+ await chrome.tabs.sendMessage(tabId, {
1958
+ type: 'request_user_help',
1959
+ task_id,
1960
+ failure_type: 'login_required',
1961
+ failure_message: 'Access page require user authentication.',
1962
+ });
1963
+ };
1964
+ const login_interval = setInterval(async () => {
1965
+ try {
1966
+ request_user_help();
1967
+ }
1968
+ catch (e) {
1969
+ clearInterval(login_interval);
1970
+ }
1971
+ }, 2000);
1972
+ try {
1973
+ return await this.awaitLogin(tabId, task_id);
1974
+ }
1975
+ finally {
1976
+ clearInterval(login_interval);
1977
+ }
1978
+ }
1979
+ async awaitLogin(tabId, task_id) {
1980
+ return new Promise((resolve) => {
1981
+ const checkTabClosedInterval = setInterval(async () => {
1982
+ const tabExists = await doesTabExists(tabId);
1983
+ if (!tabExists) {
1984
+ clearInterval(checkTabClosedInterval);
1985
+ resolve(false);
1986
+ chrome.runtime.onMessage.removeListener(listener);
1987
+ }
1988
+ }, 1000);
1989
+ const listener = (message) => {
1990
+ if (message.type === 'issue_resolved' && message.task_id === task_id) {
1991
+ resolve(true);
1992
+ clearInterval(checkTabClosedInterval);
1993
+ }
1994
+ };
1995
+ chrome.runtime.onMessage.addListener(listener);
1996
+ });
1997
+ }
1998
+ async isLoginIn(context) {
1999
+ let windowId = await getWindowId(context);
2000
+ let screenshot_result = await screenshot(windowId, true);
2001
+ let messages = [
2002
+ {
2003
+ role: 'user',
2004
+ content: [
2005
+ {
2006
+ type: 'image',
2007
+ source: screenshot_result.image,
2008
+ },
2009
+ {
2010
+ type: 'text',
2011
+ text: 'Check if the current website is logged in. If not logged in, output `NOT_LOGIN`. If logged in, output `LOGGED_IN`. Output directly without explanation.',
2012
+ },
2013
+ ],
2014
+ },
2015
+ ];
2016
+ let response = await context.llmProvider.generateText(messages, { maxTokens: 256 });
2017
+ let text = response.textContent;
2018
+ if (!text) {
2019
+ text = JSON.stringify(response.content);
2020
+ }
2021
+ return text.indexOf('LOGGED_IN') > -1;
2022
+ }
2023
+ }
2024
+
2025
+ class CancelWorkflow {
2026
+ constructor() {
2027
+ this.name = 'cancel_workflow';
2028
+ this.description = 'Cancel the workflow. If any tool consistently encounters exceptions, invoke this tool to cancel the workflow.';
2029
+ this.input_schema = {
2030
+ type: 'object',
2031
+ properties: {
2032
+ reason: {
2033
+ type: 'string',
2034
+ description: 'Why the workflow should be cancelled.',
2035
+ },
2036
+ },
2037
+ required: ['reason'],
2038
+ };
2039
+ }
2040
+ async execute(context, params) {
2041
+ var _a;
2042
+ if (typeof params !== 'object' || params === null || !params.reason) {
2043
+ throw new Error('Invalid parameters. Expected an object with a "reason" property.');
2044
+ }
2045
+ const reason = params.reason;
2046
+ console.log("The workflow has been cancelled because: " + reason);
2047
+ await ((_a = context.workflow) === null || _a === void 0 ? void 0 : _a.cancel());
2048
+ return;
2049
+ }
2050
+ }
2051
+
2052
+ class HumanInputText {
2053
+ constructor() {
2054
+ this.name = 'human_input_text';
2055
+ this.description = 'When you are unsure about the details of your next action, call me and ask the user for details in the "question" field. The user will provide you with a text as an answer.';
2056
+ this.input_schema = {
2057
+ type: 'object',
2058
+ properties: {
2059
+ question: {
2060
+ type: 'string',
2061
+ description: 'Ask the user here.',
2062
+ },
2063
+ },
2064
+ required: ['question'],
2065
+ };
2066
+ }
2067
+ async execute(context, params) {
2068
+ var _a, _b, _c;
2069
+ if (typeof params !== 'object' || params === null || !params.question) {
2070
+ throw new Error('Invalid parameters. Expected an object with a "question" property.');
2071
+ }
2072
+ const question = params.question;
2073
+ console.log("question: " + question);
2074
+ let answer = await ((_c = (_a = context.callback) === null || _a === void 0 ? void 0 : (_b = _a.hooks).onHumanInputText) === null || _c === void 0 ? void 0 : _c.call(_b, question));
2075
+ if (!answer) {
2076
+ console.error("Cannot get user's answer.");
2077
+ return { status: "Error: Cannot get user's answer.", answer: "" };
2078
+ }
2079
+ else {
2080
+ console.log("answer: " + answer);
2081
+ return { status: "OK", answer: answer };
2082
+ }
2083
+ }
2084
+ }
2085
+ class HumanInputSingleChoice {
2086
+ constructor() {
2087
+ this.name = 'human_input_single_choice';
2088
+ this.description = 'When you are unsure about the details of your next action, call me and ask the user for details in the "question" field with at least 2 choices. The user will provide you with ONE choice as an answer.';
2089
+ this.input_schema = {
2090
+ type: 'object',
2091
+ properties: {
2092
+ question: {
2093
+ type: 'string',
2094
+ description: 'Ask the user here.',
2095
+ },
2096
+ choices: {
2097
+ type: 'array',
2098
+ description: 'All of the choices.',
2099
+ }
2100
+ },
2101
+ required: ['question', 'choices'],
2102
+ };
2103
+ }
2104
+ async execute(context, params) {
2105
+ var _a, _b, _c;
2106
+ if (typeof params !== 'object' || params === null || !params.question || !params.choices) {
2107
+ throw new Error('Invalid parameters. Expected an object with a "question" and "choices" property.');
2108
+ }
2109
+ const question = params.question;
2110
+ const choices = params.choices;
2111
+ console.log("question: " + question);
2112
+ console.log("choices: " + choices);
2113
+ let answer = await ((_c = (_a = context.callback) === null || _a === void 0 ? void 0 : (_b = _a.hooks).onHumanInputSingleChoice) === null || _c === void 0 ? void 0 : _c.call(_b, question, choices));
2114
+ if (!answer) {
2115
+ console.error("Cannot get user's answer.");
2116
+ return { status: "Error: Cannot get user's answer.", answer: "" };
2117
+ }
2118
+ else {
2119
+ console.log("answer: " + answer);
2120
+ return { status: "OK", answer: answer };
2121
+ }
2122
+ }
2123
+ }
2124
+ class HumanInputMultipleChoice {
2125
+ constructor() {
2126
+ this.name = 'human_input_multiple_choice';
2127
+ this.description = 'When you are unsure about the details of your next action, call me and ask the user for details in the "question" field with at least 2 choices. The user will provide you with ONE or MORE choice as an answer.';
2128
+ this.input_schema = {
2129
+ type: 'object',
2130
+ properties: {
2131
+ question: {
2132
+ type: 'string',
2133
+ description: 'Ask the user here.',
2134
+ },
2135
+ choices: {
2136
+ type: 'array',
2137
+ description: 'All of the choices.',
2138
+ }
2139
+ },
2140
+ required: ['question', 'choices'],
2141
+ };
2142
+ }
2143
+ async execute(context, params) {
2144
+ var _a, _b, _c;
2145
+ if (typeof params !== 'object' || params === null || !params.question || !params.choices) {
2146
+ throw new Error('Invalid parameters. Expected an object with a "question" and "choices" property.');
2147
+ }
2148
+ const question = params.question;
2149
+ const choices = params.choices;
2150
+ console.log("question: " + question);
2151
+ console.log("choices: " + choices);
2152
+ let answer = await ((_c = (_a = context.callback) === null || _a === void 0 ? void 0 : (_b = _a.hooks).onHumanInputMultipleChoice) === null || _c === void 0 ? void 0 : _c.call(_b, question, choices));
2153
+ if (!answer) {
2154
+ console.error("Cannot get user's answer.");
2155
+ return { status: "Error: Cannot get user's answer.", answer: [] };
2156
+ }
2157
+ else {
2158
+ console.log("answer: " + answer);
2159
+ return { status: "OK", answer: answer };
2160
+ }
2161
+ }
2162
+ }
2163
+ class HumanOperate {
2164
+ constructor() {
2165
+ this.name = 'human_operate';
2166
+ this.description = 'When you encounter operations that require login, CAPTCHA verification, or other tasks that you cannot complete, please call this tool, transfer control to the user, and explain why.';
2167
+ this.input_schema = {
2168
+ type: 'object',
2169
+ properties: {
2170
+ reason: {
2171
+ type: 'string',
2172
+ description: 'The reason why you need to transfer control.',
2173
+ },
2174
+ },
2175
+ required: ['reason'],
2176
+ };
2177
+ }
2178
+ async execute(context, params) {
2179
+ var _a, _b, _c;
2180
+ if (typeof params !== 'object' || params === null || !params.reason) {
2181
+ throw new Error('Invalid parameters. Expected an object with a "reason" property.');
2182
+ }
2183
+ const reason = params.reason;
2184
+ console.log("reason: " + reason);
2185
+ let userOperation = await ((_c = (_a = context.callback) === null || _a === void 0 ? void 0 : (_b = _a.hooks).onHumanOperate) === null || _c === void 0 ? void 0 : _c.call(_b, reason));
2186
+ if (!userOperation) {
2187
+ console.error("Cannot get user's operation.");
2188
+ return { status: "Error: Cannot get user's operation.", userOperation: "" };
2189
+ }
2190
+ else {
2191
+ console.log("userOperation: " + userOperation);
2192
+ return { status: "OK", userOperation: userOperation };
2193
+ }
2194
+ }
2195
+ }
2196
+
2197
+ class SummaryWorkflow {
2198
+ constructor() {
2199
+ this.name = 'summary_workflow';
2200
+ this.description = 'Summarize what this workflow has done from start to finish using an ordered list .';
2201
+ this.input_schema = {
2202
+ type: 'object',
2203
+ properties: {
2204
+ summary: {
2205
+ type: 'string',
2206
+ description: 'Your summary in markdown format.',
2207
+ },
2208
+ },
2209
+ required: ['summary'],
2210
+ };
2211
+ }
2212
+ async execute(context, params) {
2213
+ var _a, _b, _c;
2214
+ if (typeof params !== 'object' || params === null || !params.summary) {
2215
+ throw new Error('Invalid parameters. Expected an object with a "summary" property.');
2216
+ }
2217
+ const summary = params.summary;
2218
+ console.log("summary: " + summary);
2219
+ await ((_c = (_a = context.callback) === null || _a === void 0 ? void 0 : (_b = _a.hooks).onSummaryWorkflow) === null || _c === void 0 ? void 0 : _c.call(_b, summary));
2220
+ return { status: "OK" };
2221
+ }
2222
+ }
2223
+
1644
2224
  var tools = /*#__PURE__*/Object.freeze({
1645
2225
  __proto__: null,
1646
2226
  BrowserUse: BrowserUse,
2227
+ CancelWorkflow: CancelWorkflow,
1647
2228
  ElementClick: ElementClick,
1648
2229
  ExportFile: ExportFile,
1649
2230
  ExtractContent: ExtractContent,
1650
2231
  FindElementPosition: FindElementPosition,
2232
+ GetAllTabs: GetAllTabs,
2233
+ HumanInputMultipleChoice: HumanInputMultipleChoice,
2234
+ HumanInputSingleChoice: HumanInputSingleChoice,
2235
+ HumanInputText: HumanInputText,
2236
+ HumanOperate: HumanOperate,
1651
2237
  OpenUrl: OpenUrl,
2238
+ RequestLogin: RequestLogin,
1652
2239
  Screenshot: Screenshot,
2240
+ SummaryWorkflow: SummaryWorkflow,
1653
2241
  TabManagement: TabManagement,
1654
2242
  WebSearch: WebSearch
1655
2243
  });