@eko-ai/eko 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +70 -0
  3. package/dist/core/eko.d.ts +17 -0
  4. package/dist/core/tool-registry.d.ts +13 -0
  5. package/dist/extension/content/index.d.ts +7 -0
  6. package/dist/extension/core.d.ts +11 -0
  7. package/dist/extension/index.d.ts +7 -0
  8. package/dist/extension/script/bing.js +25 -0
  9. package/dist/extension/script/build_dom_tree.js +657 -0
  10. package/dist/extension/script/common.js +204 -0
  11. package/dist/extension/script/duckduckgo.js +25 -0
  12. package/dist/extension/script/google.js +26 -0
  13. package/dist/extension/tools/browser.d.ts +21 -0
  14. package/dist/extension/tools/browser_use.d.ts +18 -0
  15. package/dist/extension/tools/element_click.d.ts +12 -0
  16. package/dist/extension/tools/export_file.d.ts +18 -0
  17. package/dist/extension/tools/extract_content.d.ts +18 -0
  18. package/dist/extension/tools/find_element_position.d.ts +12 -0
  19. package/dist/extension/tools/form_autofill.d.ts +11 -0
  20. package/dist/extension/tools/html_script.d.ts +21 -0
  21. package/dist/extension/tools/index.d.ts +11 -0
  22. package/dist/extension/tools/open_url.d.ts +18 -0
  23. package/dist/extension/tools/screenshot.d.ts +18 -0
  24. package/dist/extension/tools/tab_management.d.ts +19 -0
  25. package/dist/extension/tools/web_search.d.ts +18 -0
  26. package/dist/extension/utils.d.ts +30 -0
  27. package/dist/extension.cjs.js +1783 -0
  28. package/dist/extension.esm.js +1776 -0
  29. package/dist/extension_content_script.js +247 -0
  30. package/dist/fellou/computer.d.ts +20 -0
  31. package/dist/fellou/index.d.ts +6 -0
  32. package/dist/fellou/tools/computer_use.d.ts +18 -0
  33. package/dist/fellou/tools/index.d.ts +2 -0
  34. package/dist/fellou.cjs.js +238 -0
  35. package/dist/fellou.esm.js +235 -0
  36. package/dist/index.cjs.js +9350 -0
  37. package/dist/index.d.ts +8 -0
  38. package/dist/index.esm.js +9340 -0
  39. package/dist/models/action.d.ts +20 -0
  40. package/dist/models/workflow.d.ts +15 -0
  41. package/dist/nodejs/index.d.ts +2 -0
  42. package/dist/nodejs/tools/index.d.ts +1 -0
  43. package/dist/nodejs.cjs.js +7 -0
  44. package/dist/nodejs.esm.js +5 -0
  45. package/dist/schemas/workflow.schema.d.ts +85 -0
  46. package/dist/services/llm/claude-provider.d.ts +10 -0
  47. package/dist/services/llm/openai-provider.d.ts +10 -0
  48. package/dist/services/parser/workflow-parser.d.ts +29 -0
  49. package/dist/services/workflow/generator.d.ts +11 -0
  50. package/dist/services/workflow/templates.d.ts +7 -0
  51. package/dist/types/action.types.d.ts +36 -0
  52. package/dist/types/eko.types.d.ts +21 -0
  53. package/dist/types/framework.types.d.ts +11 -0
  54. package/dist/types/index.d.ts +6 -0
  55. package/dist/types/llm.types.d.ts +54 -0
  56. package/dist/types/parser.types.d.ts +9 -0
  57. package/dist/types/tools.types.d.ts +88 -0
  58. package/dist/types/workflow.types.d.ts +39 -0
  59. package/dist/web/index.d.ts +2 -0
  60. package/dist/web/tools/index.d.ts +1 -0
  61. package/dist/web.cjs.js +7 -0
  62. package/dist/web.esm.js +5 -0
  63. package/package.json +108 -0
@@ -0,0 +1,1783 @@
1
+ 'use strict';
2
+
3
+ async function getWindowId(context) {
4
+ let windowId = context.variables.get('windowId');
5
+ if (windowId) {
6
+ try {
7
+ await chrome.windows.get(windowId);
8
+ }
9
+ catch (e) {
10
+ windowId = null;
11
+ context.variables.delete('windowId');
12
+ let tabId = context.variables.get('tabId');
13
+ if (tabId) {
14
+ try {
15
+ let tab = await chrome.tabs.get(tabId);
16
+ windowId = tab.windowId;
17
+ }
18
+ catch (e) {
19
+ context.variables.delete('tabId');
20
+ }
21
+ }
22
+ }
23
+ }
24
+ if (!windowId) {
25
+ const window = await chrome.windows.getCurrent();
26
+ windowId = window.id;
27
+ }
28
+ return windowId;
29
+ }
30
+ async function getTabId(context) {
31
+ let tabId = context.variables.get('tabId');
32
+ if (tabId) {
33
+ try {
34
+ await chrome.tabs.get(tabId);
35
+ }
36
+ catch (e) {
37
+ tabId = null;
38
+ context.variables.delete('tabId');
39
+ }
40
+ }
41
+ if (!tabId) {
42
+ tabId = await getCurrentTabId();
43
+ }
44
+ return tabId;
45
+ }
46
+ function getCurrentTabId() {
47
+ return new Promise((resolve) => {
48
+ chrome.tabs.query({ active: true, lastFocusedWindow: true }, function (tabs) {
49
+ if (tabs.length > 0) {
50
+ resolve(tabs[0].id);
51
+ }
52
+ else {
53
+ chrome.tabs.query({ active: true, currentWindow: true }, function (_tabs) {
54
+ if (_tabs.length > 0) {
55
+ resolve(_tabs[0].id);
56
+ return;
57
+ }
58
+ else {
59
+ chrome.tabs.query({ status: 'complete', currentWindow: true }, function (__tabs) {
60
+ resolve(__tabs.length ? __tabs[__tabs.length - 1].id : undefined);
61
+ });
62
+ }
63
+ });
64
+ }
65
+ });
66
+ });
67
+ }
68
+ async function open_new_tab(url, newWindow, windowId) {
69
+ let tabId;
70
+ if (newWindow) {
71
+ let window = await chrome.windows.create({
72
+ type: 'normal',
73
+ state: 'maximized',
74
+ url: url,
75
+ });
76
+ windowId = window.id;
77
+ let tabs = window.tabs || [
78
+ await chrome.tabs.create({
79
+ url: url,
80
+ windowId: windowId,
81
+ }),
82
+ ];
83
+ tabId = tabs[0].id;
84
+ }
85
+ else {
86
+ if (!windowId) {
87
+ const window = await chrome.windows.getCurrent();
88
+ windowId = window.id;
89
+ }
90
+ let tab = await chrome.tabs.create({
91
+ url: url,
92
+ windowId: windowId,
93
+ });
94
+ tabId = tab.id;
95
+ }
96
+ let tab = await waitForTabComplete(tabId);
97
+ await sleep(200);
98
+ return tab;
99
+ }
100
+ async function executeScript(tabId, func, args) {
101
+ let frameResults = await chrome.scripting.executeScript({
102
+ target: { tabId: tabId },
103
+ func: func,
104
+ args: args,
105
+ });
106
+ return frameResults[0].result;
107
+ }
108
+ async function waitForTabComplete(tabId, timeout = 30000) {
109
+ return new Promise(async (resolve, reject) => {
110
+ let tab = await chrome.tabs.get(tabId);
111
+ if (tab.status === 'complete') {
112
+ resolve(tab);
113
+ return;
114
+ }
115
+ const time = setTimeout(() => {
116
+ chrome.tabs.onUpdated.removeListener(listener);
117
+ reject();
118
+ }, timeout);
119
+ const listener = async (updatedTabId, changeInfo, tab) => {
120
+ if (updatedTabId === tabId && changeInfo.status === 'complete') {
121
+ chrome.tabs.onUpdated.removeListener(listener);
122
+ clearTimeout(time);
123
+ resolve(tab);
124
+ }
125
+ };
126
+ chrome.tabs.onUpdated.addListener(listener);
127
+ });
128
+ }
129
+ async function getPageSize(tabId) {
130
+ if (!tabId) {
131
+ tabId = await getCurrentTabId();
132
+ }
133
+ let injectionResult = await chrome.scripting.executeScript({
134
+ target: { tabId: tabId },
135
+ func: () => [
136
+ window.innerWidth || document.documentElement.clientWidth || document.body.clientWidth,
137
+ window.innerHeight || document.documentElement.clientHeight || document.body.clientHeight,
138
+ ],
139
+ });
140
+ return [injectionResult[0].result[0], injectionResult[0].result[1]];
141
+ }
142
+ function sleep(time) {
143
+ return new Promise((resolve) => setTimeout(() => resolve(), time));
144
+ }
145
+ async function injectScript(tabId, filename) {
146
+ let files = ['eko/script/common.js'];
147
+ if (filename) {
148
+ files.push('eko/script/' + filename);
149
+ }
150
+ await chrome.scripting.executeScript({
151
+ target: { tabId },
152
+ files: files,
153
+ });
154
+ }
155
+ class MsgEvent {
156
+ constructor() {
157
+ this.eventMap = {};
158
+ }
159
+ addListener(callback, id) {
160
+ if (!id) {
161
+ id = new Date().getTime() + '' + Math.floor(Math.random() * 10000);
162
+ }
163
+ this.eventMap[id] = callback;
164
+ return id;
165
+ }
166
+ removeListener(id) {
167
+ delete this.eventMap[id];
168
+ }
169
+ async publish(msg) {
170
+ let values = Object.values(this.eventMap);
171
+ for (let i = 0; i < values.length; i++) {
172
+ try {
173
+ let result = values[i](msg);
174
+ if (isPromise(result)) {
175
+ await result;
176
+ }
177
+ }
178
+ catch (e) {
179
+ console.error(e);
180
+ }
181
+ }
182
+ }
183
+ }
184
+ /**
185
+ * Counter (Function: Wait for all asynchronous tasks to complete)
186
+ */
187
+ class CountDownLatch {
188
+ constructor(count) {
189
+ this.resolve = undefined;
190
+ this.currentCount = count;
191
+ }
192
+ countDown() {
193
+ this.currentCount = this.currentCount - 1;
194
+ if (this.currentCount <= 0) {
195
+ this.resolve && this.resolve();
196
+ }
197
+ }
198
+ await(timeout) {
199
+ const $this = this;
200
+ return new Promise((_resolve, reject) => {
201
+ let resolve = _resolve;
202
+ if (timeout > 0) {
203
+ let timeId = setTimeout(reject, timeout);
204
+ resolve = () => {
205
+ clearTimeout(timeId);
206
+ _resolve();
207
+ };
208
+ }
209
+ $this.resolve = resolve;
210
+ if ($this.currentCount <= 0) {
211
+ resolve();
212
+ }
213
+ });
214
+ }
215
+ }
216
+ function isPromise(obj) {
217
+ return (!!obj &&
218
+ (typeof obj === 'object' || typeof obj === 'function') &&
219
+ typeof obj.then === 'function');
220
+ }
221
+
222
+ var utils = /*#__PURE__*/Object.freeze({
223
+ __proto__: null,
224
+ CountDownLatch: CountDownLatch,
225
+ MsgEvent: MsgEvent,
226
+ executeScript: executeScript,
227
+ getCurrentTabId: getCurrentTabId,
228
+ getPageSize: getPageSize,
229
+ getTabId: getTabId,
230
+ getWindowId: getWindowId,
231
+ injectScript: injectScript,
232
+ isPromise: isPromise,
233
+ open_new_tab: open_new_tab,
234
+ sleep: sleep,
235
+ waitForTabComplete: waitForTabComplete
236
+ });
237
+
238
+ async function key(tabId, key, coordinate) {
239
+ if (!coordinate) {
240
+ coordinate = (await cursor_position(tabId)).coordinate;
241
+ }
242
+ await mouse_move(tabId, coordinate);
243
+ let mapping = {};
244
+ let keys = key.replace(/\s+/g, ' ').split(' ');
245
+ let result;
246
+ for (let i = 0; i < keys.length; i++) {
247
+ let _key = keys[i];
248
+ let keyEvents = {
249
+ key: '',
250
+ ctrlKey: false,
251
+ altKey: false,
252
+ shiftKey: false,
253
+ metaKey: false,
254
+ };
255
+ if (_key.indexOf('+') > -1) {
256
+ let mapped_keys = _key.split('+').map((k) => mapping[k] || k);
257
+ for (let i = 0; i < mapped_keys.length - 1; i++) {
258
+ let k = mapped_keys[i].toLowerCase();
259
+ if (k == 'ctrl' || k == 'control') {
260
+ keyEvents.ctrlKey = true;
261
+ }
262
+ else if (k == 'alt' || k == 'option') {
263
+ keyEvents.altKey = true;
264
+ }
265
+ else if (k == 'shift') {
266
+ keyEvents.shiftKey = true;
267
+ }
268
+ else if (k == 'meta' || k == 'command') {
269
+ keyEvents.metaKey = true;
270
+ }
271
+ else {
272
+ console.log('Unknown Key: ' + k);
273
+ }
274
+ }
275
+ keyEvents.key = mapped_keys[mapped_keys.length - 1];
276
+ }
277
+ else {
278
+ keyEvents.key = mapping[_key] || _key;
279
+ }
280
+ if (!keyEvents.key) {
281
+ continue;
282
+ }
283
+ result = await chrome.tabs.sendMessage(tabId, {
284
+ type: 'computer:key',
285
+ coordinate,
286
+ ...keyEvents,
287
+ });
288
+ await sleep(100);
289
+ }
290
+ return result;
291
+ }
292
+ async function type(tabId, text, coordinate) {
293
+ if (!coordinate) {
294
+ coordinate = (await cursor_position(tabId)).coordinate;
295
+ }
296
+ await mouse_move(tabId, coordinate);
297
+ return await chrome.tabs.sendMessage(tabId, {
298
+ type: 'computer:type',
299
+ text,
300
+ coordinate,
301
+ });
302
+ }
303
+ async function type_by_xpath(tabId, text, xpath) {
304
+ return await chrome.tabs.sendMessage(tabId, {
305
+ type: 'computer:type',
306
+ text,
307
+ xpath,
308
+ });
309
+ }
310
+ async function clear_input(tabId, coordinate) {
311
+ if (!coordinate) {
312
+ coordinate = (await cursor_position(tabId)).coordinate;
313
+ }
314
+ await mouse_move(tabId, coordinate);
315
+ return await chrome.tabs.sendMessage(tabId, {
316
+ type: 'computer:type',
317
+ text: '',
318
+ coordinate,
319
+ });
320
+ }
321
+ async function clear_input_by_xpath(tabId, xpath) {
322
+ return await chrome.tabs.sendMessage(tabId, {
323
+ type: 'computer:type',
324
+ text: '',
325
+ xpath,
326
+ });
327
+ }
328
+ async function mouse_move(tabId, coordinate) {
329
+ return await chrome.tabs.sendMessage(tabId, {
330
+ type: 'computer:mouse_move',
331
+ coordinate,
332
+ });
333
+ }
334
+ async function left_click(tabId, coordinate) {
335
+ if (!coordinate) {
336
+ coordinate = (await cursor_position(tabId)).coordinate;
337
+ }
338
+ return await chrome.tabs.sendMessage(tabId, {
339
+ type: 'computer:left_click',
340
+ coordinate,
341
+ });
342
+ }
343
+ async function left_click_by_xpath(tabId, xpath) {
344
+ return await chrome.tabs.sendMessage(tabId, {
345
+ type: 'computer:left_click',
346
+ xpath,
347
+ });
348
+ }
349
+ async function left_click_drag(tabId, coordinate) {
350
+ let from_coordinate = (await cursor_position(tabId)).coordinate;
351
+ return await chrome.tabs.sendMessage(tabId, {
352
+ type: 'computer:left_click_drag',
353
+ from_coordinate,
354
+ to_coordinate: coordinate,
355
+ });
356
+ }
357
+ async function right_click(tabId, coordinate) {
358
+ if (!coordinate) {
359
+ coordinate = (await cursor_position(tabId)).coordinate;
360
+ }
361
+ return await chrome.tabs.sendMessage(tabId, {
362
+ type: 'computer:right_click',
363
+ coordinate,
364
+ });
365
+ }
366
+ async function right_click_by_xpath(tabId, xpath) {
367
+ return await chrome.tabs.sendMessage(tabId, {
368
+ type: 'computer:right_click',
369
+ xpath,
370
+ });
371
+ }
372
+ async function double_click(tabId, coordinate) {
373
+ if (!coordinate) {
374
+ coordinate = (await cursor_position(tabId)).coordinate;
375
+ }
376
+ return await chrome.tabs.sendMessage(tabId, {
377
+ type: 'computer:double_click',
378
+ coordinate,
379
+ });
380
+ }
381
+ async function double_click_by_xpath(tabId, xpath) {
382
+ return await chrome.tabs.sendMessage(tabId, {
383
+ type: 'computer:double_click',
384
+ xpath,
385
+ });
386
+ }
387
+ async function screenshot(windowId) {
388
+ let dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
389
+ format: 'jpeg', // jpeg / png
390
+ quality: 50, // 0-100
391
+ });
392
+ let data = dataUrl.substring(dataUrl.indexOf('base64,') + 7);
393
+ return {
394
+ image: {
395
+ type: 'base64',
396
+ media_type: dataUrl.indexOf('image/png') > -1 ? 'image/png' : 'image/jpeg',
397
+ data: data,
398
+ },
399
+ };
400
+ }
401
+ async function scroll_to(tabId, coordinate) {
402
+ let from_coordinate = (await cursor_position(tabId)).coordinate;
403
+ return await chrome.tabs.sendMessage(tabId, {
404
+ type: 'computer:scroll_to',
405
+ from_coordinate,
406
+ to_coordinate: coordinate,
407
+ });
408
+ }
409
+ async function scroll_to_xpath(tabId, xpath) {
410
+ return await chrome.tabs.sendMessage(tabId, {
411
+ type: 'computer:scroll_to',
412
+ xpath,
413
+ });
414
+ }
415
+ async function cursor_position(tabId) {
416
+ let result = await chrome.tabs.sendMessage(tabId, {
417
+ type: 'computer:cursor_position',
418
+ });
419
+ return { coordinate: result.coordinate };
420
+ }
421
+ async function size(tabId) {
422
+ return await getPageSize(tabId);
423
+ }
424
+
425
+ var browser = /*#__PURE__*/Object.freeze({
426
+ __proto__: null,
427
+ clear_input: clear_input,
428
+ clear_input_by_xpath: clear_input_by_xpath,
429
+ cursor_position: cursor_position,
430
+ double_click: double_click,
431
+ double_click_by_xpath: double_click_by_xpath,
432
+ key: key,
433
+ left_click: left_click,
434
+ left_click_by_xpath: left_click_by_xpath,
435
+ left_click_drag: left_click_drag,
436
+ mouse_move: mouse_move,
437
+ right_click: right_click,
438
+ right_click_by_xpath: right_click_by_xpath,
439
+ screenshot: screenshot,
440
+ scroll_to: scroll_to,
441
+ scroll_to_xpath: scroll_to_xpath,
442
+ size: size,
443
+ type: type,
444
+ type_by_xpath: type_by_xpath
445
+ });
446
+
447
+ function exportFile(filename, type, content) {
448
+ const blob = new Blob([content], { type: type });
449
+ const link = document.createElement('a');
450
+ link.href = URL.createObjectURL(blob);
451
+ link.download = filename;
452
+ document.body.appendChild(link);
453
+ link.click();
454
+ document.body.removeChild(link);
455
+ URL.revokeObjectURL(link.href);
456
+ }
457
+ function getDropdownOptions(xpath) {
458
+ const select = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null)
459
+ .singleNodeValue;
460
+ if (!select) {
461
+ return null;
462
+ }
463
+ return {
464
+ options: Array.from(select.options).map((opt) => ({
465
+ index: opt.index,
466
+ text: opt.text.trim(),
467
+ value: opt.value,
468
+ })),
469
+ id: select.id,
470
+ name: select.name,
471
+ };
472
+ }
473
+ function selectDropdownOption(xpath, text) {
474
+ const select = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null)
475
+ .singleNodeValue;
476
+ if (!select || select.tagName.toUpperCase() !== 'SELECT') {
477
+ return { success: false, error: 'Select not found or invalid element type' };
478
+ }
479
+ const option = Array.from(select.options).find((opt) => opt.text.trim() === text);
480
+ if (!option) {
481
+ return {
482
+ success: false,
483
+ error: 'Option not found',
484
+ availableOptions: Array.from(select.options).map((o) => o.text.trim()),
485
+ };
486
+ }
487
+ select.value = option.value;
488
+ select.dispatchEvent(new Event('change'));
489
+ return {
490
+ success: true,
491
+ selectedValue: option.value,
492
+ selectedText: option.text.trim(),
493
+ };
494
+ }
495
+ /**
496
+ * Extract the elements related to html operability and wrap them into pseudo-html code.
497
+ */
498
+ function extractOperableElements() {
499
+ // visible
500
+ const isElementVisible = (element) => {
501
+ const style = window.getComputedStyle(element);
502
+ return (style.display !== 'none' &&
503
+ style.visibility !== 'hidden' &&
504
+ style.opacity !== '0' &&
505
+ element.offsetWidth > 0 &&
506
+ element.offsetHeight > 0);
507
+ };
508
+ // element original index
509
+ const getElementIndex = (element) => {
510
+ const xpath = document.evaluate('preceding::*', element, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
511
+ return xpath.snapshotLength;
512
+ };
513
+ // exclude
514
+ const addExclude = (excludes, children) => {
515
+ for (let i = 0; i < children.length; i++) {
516
+ excludes.push(children[i]);
517
+ if (children[i].children) {
518
+ addExclude(excludes, children[i].children);
519
+ }
520
+ }
521
+ };
522
+ // { pseudoId: element }
523
+ let elementMap = {};
524
+ let nextId = 1;
525
+ let elements = [];
526
+ let excludes = [];
527
+ // operable element
528
+ const operableSelectors = 'a, button, input, textarea, select';
529
+ document.querySelectorAll(operableSelectors).forEach((element) => {
530
+ if (isElementVisible(element) && excludes.indexOf(element) == -1) {
531
+ const id = nextId++;
532
+ elementMap[id.toString()] = element;
533
+ const tagName = element.tagName.toLowerCase();
534
+ const attributes = Array.from(element.attributes)
535
+ .filter((attr) => ['id', 'name', 'type', 'value', 'href', 'title', 'placeholder'].includes(attr.name))
536
+ .map((attr) => `${attr.name == 'id' ? 'target' : attr.name}="${attr.value}"`)
537
+ .join(' ');
538
+ elements.push({
539
+ originalIndex: getElementIndex(element),
540
+ id: id,
541
+ html: `<${tagName} id="${id}" ${attributes}>${tagName == 'select' ? element.innerHTML : element.innerText || ''}</${tagName}>`,
542
+ });
543
+ addExclude(excludes, element.children);
544
+ }
545
+ });
546
+ // short text element
547
+ const textWalker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT, {
548
+ acceptNode: function (node) {
549
+ var _a;
550
+ if (node.matches(operableSelectors) || excludes.indexOf(node) != -1) {
551
+ // skip
552
+ return NodeFilter.FILTER_SKIP;
553
+ }
554
+ // text <= 100
555
+ const text = (_a = node.innerText) === null || _a === void 0 ? void 0 : _a.trim();
556
+ if (isElementVisible(node) &&
557
+ text &&
558
+ text.length <= 100 &&
559
+ text.length > 0 &&
560
+ node.children.length === 0) {
561
+ return NodeFilter.FILTER_ACCEPT;
562
+ }
563
+ // skip
564
+ return NodeFilter.FILTER_SKIP;
565
+ },
566
+ });
567
+ let currentNode;
568
+ while ((currentNode = textWalker.nextNode())) {
569
+ const id = nextId++;
570
+ elementMap[id.toString()] = currentNode;
571
+ const tagName = currentNode.tagName.toLowerCase();
572
+ elements.push({
573
+ originalIndex: getElementIndex(currentNode),
574
+ id: id,
575
+ html: `<${tagName} id="${id}">${currentNode.innerText.trim()}</${tagName}>`,
576
+ });
577
+ }
578
+ // element sort
579
+ elements.sort((a, b) => a.originalIndex - b.originalIndex);
580
+ // cache
581
+ window.operableElementMap = elementMap;
582
+ // pseudo html
583
+ return elements.map((e) => e.html).join('\n');
584
+ }
585
+ function clickOperableElement(id) {
586
+ let element = window.operableElementMap[id];
587
+ if (!element) {
588
+ return false;
589
+ }
590
+ if (element.click) {
591
+ element.click();
592
+ }
593
+ else {
594
+ element.dispatchEvent(new MouseEvent('click', {
595
+ view: window,
596
+ bubbles: true,
597
+ cancelable: true,
598
+ }));
599
+ }
600
+ return true;
601
+ }
602
+ function getOperableElementRect(id) {
603
+ let element = window.operableElementMap[id];
604
+ if (!element) {
605
+ return null;
606
+ }
607
+ const rect = element.getBoundingClientRect();
608
+ return {
609
+ left: rect.left + window.scrollX,
610
+ top: rect.top + window.scrollY,
611
+ right: rect.right + window.scrollX,
612
+ bottom: rect.bottom + window.scrollY,
613
+ width: rect.right - rect.left,
614
+ height: rect.bottom - rect.top,
615
+ };
616
+ }
617
+
618
+ /**
619
+ * Browser Use for general
620
+ */
621
+ class BrowserUse {
622
+ constructor() {
623
+ this.name = 'browser_use';
624
+ this.description = `Use structured commands to interact with the browser, manipulating page elements through screenshots and webpage element extraction.
625
+ * This is a browser GUI interface where you need to analyze webpages by taking screenshots and extracting page element structures, and specify action sequences to complete designated tasks.
626
+ * Some operations may need time to process, so you might need to wait and continuously take screenshots and extract element structures to check the operation results.
627
+ * Before any operation, you must first call the \`screenshot_extract_element\` command, which will return the browser page screenshot and structured element information, both specially processed.
628
+ * ELEMENT INTERACTION:
629
+ - Only use indexes that exist in the provided element list
630
+ - Each element has a unique index number (e.g., "[33]:<button>")
631
+ - Elements marked with "[]:" are non-interactive (for context only)
632
+ * NAVIGATION & ERROR HANDLING:
633
+ - If no suitable elements exist, use other functions to complete the task
634
+ - If stuck, try alternative approaches
635
+ - Handle popups/cookies by accepting or closing them
636
+ - Use scroll to find elements you are looking for
637
+ * Form filling:
638
+ - If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
639
+ * ACTION SEQUENCING:
640
+ - Actions are executed in the order they appear in the list
641
+ - Each action should logically follow from the previous one
642
+ - If the page changes after an action, the sequence is interrupted and you get the new state.
643
+ - If content only disappears the sequence continues.
644
+ - Only provide the action sequence until you think the page will change.
645
+ - Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
646
+ - only use multiple actions if it makes sense.`;
647
+ this.input_schema = {
648
+ type: 'object',
649
+ properties: {
650
+ action: {
651
+ type: 'string',
652
+ description: `The action to perform. The available actions are:
653
+ * \`screenshot_extract_element\`: Take a screenshot of the web page and extract operable elements.
654
+ - Screenshots are used to understand page layouts, with labeled bounding boxes corresponding to element indexes. Each bounding box and its label share the same color, with labels typically positioned in the top-right corner of the box.
655
+ - Screenshots help verify element positions and relationships. Labels may sometimes overlap, so extracted elements are used to verify the correct elements.
656
+ - In addition to screenshots, simplified information about interactive elements is returned, with element indexes corresponding to those in the screenshots.
657
+ * \`input_text\`: Enter a string in the interactive element.
658
+ * \`clear_text\`: Clear the text in the input/textarea element.
659
+ * \`click\`: Click to element.
660
+ * \`right_click\`: Right-click on the element.
661
+ * \`double_click\`: Double-click on the element.
662
+ * \`scroll_to\`: Scroll to the specified element.
663
+ * \`extract_content\`: Extract the text content of the current webpage.
664
+ * \`get_dropdown_options\`: Get all options from a native dropdown element.
665
+ * \`select_dropdown_option\`: Select dropdown option for interactive element index by the text of the option you want to select.`,
666
+ enum: [
667
+ 'screenshot_extract_element',
668
+ 'input_text',
669
+ 'clear_text',
670
+ 'click',
671
+ 'right_click',
672
+ 'double_click',
673
+ 'scroll_to',
674
+ 'extract_content',
675
+ 'get_dropdown_options',
676
+ 'select_dropdown_option',
677
+ ],
678
+ },
679
+ index: {
680
+ type: 'integer',
681
+ description: 'index of element, Operation elements must pass the corresponding index of the element',
682
+ },
683
+ text: {
684
+ type: 'string',
685
+ description: 'Required by `action=input_text` and `action=select_dropdown_option`',
686
+ },
687
+ },
688
+ required: ['action'],
689
+ };
690
+ }
691
+ /**
692
+ * browser
693
+ *
694
+ * @param {*} params { action: 'input_text', index: 1, text: 'string' }
695
+ * @returns > { success: true, image?: { type: 'base64', media_type: 'image/jpeg', data: '/9j...' }, text?: string }
696
+ */
697
+ async execute(context, params) {
698
+ var _a;
699
+ try {
700
+ if (params === null || !params.action) {
701
+ throw new Error('Invalid parameters. Expected an object with a "action" property.');
702
+ }
703
+ let tabId = await getTabId(context);
704
+ let windowId = await getWindowId(context);
705
+ let selector_map = context.variables.get('selector_map');
706
+ let selector_xpath;
707
+ if (params.index != null && selector_map) {
708
+ selector_xpath = (_a = selector_map[params.index]) === null || _a === void 0 ? void 0 : _a.xpath;
709
+ if (!selector_xpath) {
710
+ throw new Error('Element does not exist');
711
+ }
712
+ }
713
+ let result;
714
+ switch (params.action) {
715
+ case 'input_text':
716
+ if (params.index == null) {
717
+ throw new Error('index parameter is required');
718
+ }
719
+ if (params.text == null) {
720
+ throw new Error('text parameter is required');
721
+ }
722
+ result = await type_by_xpath(tabId, params.text, selector_xpath);
723
+ await sleep(200);
724
+ break;
725
+ case 'clear_text':
726
+ if (params.index == null) {
727
+ throw new Error('index parameter is required');
728
+ }
729
+ result = await clear_input_by_xpath(tabId, selector_xpath);
730
+ await sleep(100);
731
+ break;
732
+ case 'click':
733
+ if (params.index == null) {
734
+ throw new Error('index parameter is required');
735
+ }
736
+ result = await left_click_by_xpath(tabId, selector_xpath);
737
+ await sleep(100);
738
+ break;
739
+ case 'right_click':
740
+ if (params.index == null) {
741
+ throw new Error('index parameter is required');
742
+ }
743
+ result = await right_click_by_xpath(tabId, selector_xpath);
744
+ await sleep(100);
745
+ break;
746
+ case 'double_click':
747
+ if (params.index == null) {
748
+ throw new Error('index parameter is required');
749
+ }
750
+ result = await double_click_by_xpath(tabId, selector_xpath);
751
+ await sleep(100);
752
+ break;
753
+ case 'scroll_to':
754
+ if (params.index == null) {
755
+ throw new Error('index parameter is required');
756
+ }
757
+ result = await scroll_to_xpath(tabId, selector_xpath);
758
+ await sleep(500);
759
+ break;
760
+ case 'extract_content':
761
+ let tab = await chrome.tabs.get(tabId);
762
+ await injectScript(tabId);
763
+ await sleep(200);
764
+ let content = await executeScript(tabId, () => {
765
+ return eko.extractHtmlContent();
766
+ }, []);
767
+ result = {
768
+ title: tab.title,
769
+ url: tab.url,
770
+ content: content,
771
+ };
772
+ break;
773
+ case 'get_dropdown_options':
774
+ if (params.index == null) {
775
+ throw new Error('index parameter is required');
776
+ }
777
+ result = await executeScript(tabId, getDropdownOptions, [selector_xpath]);
778
+ break;
779
+ case 'select_dropdown_option':
780
+ if (params.index == null) {
781
+ throw new Error('index parameter is required');
782
+ }
783
+ if (params.text == null) {
784
+ throw new Error('text parameter is required');
785
+ }
786
+ result = await executeScript(tabId, selectDropdownOption, [selector_xpath, params.text]);
787
+ break;
788
+ case 'screenshot_extract_element':
789
+ await sleep(100);
790
+ await injectScript(tabId, 'build_dom_tree.js');
791
+ await sleep(100);
792
+ let element_result = await executeScript(tabId, () => {
793
+ return window.get_clickable_elements(true);
794
+ }, []);
795
+ context.variables.set('selector_map', element_result.selector_map);
796
+ let screenshot$1 = await screenshot(windowId);
797
+ await executeScript(tabId, () => {
798
+ return window.remove_highlight();
799
+ }, []);
800
+ result = { image: screenshot$1.image, text: element_result.element_str };
801
+ break;
802
+ default:
803
+ throw Error(`Invalid parameters. The "${params.action}" value is not included in the "action" enumeration.`);
804
+ }
805
+ if (result) {
806
+ return { success: true, ...result };
807
+ }
808
+ else {
809
+ return { success: false };
810
+ }
811
+ }
812
+ catch (e) {
813
+ return { success: false, error: e === null || e === void 0 ? void 0 : e.message };
814
+ }
815
+ }
816
+ }
817
+
818
+ /**
819
+ * Element click
820
+ */
821
+ class ElementClick {
822
+ constructor() {
823
+ this.name = 'element_click';
824
+ this.description = 'Click the element through task prompts';
825
+ this.input_schema = {
826
+ type: 'object',
827
+ properties: {
828
+ task_prompt: {
829
+ type: 'string',
830
+ description: 'Task prompt, eg: click search button',
831
+ },
832
+ },
833
+ required: ['task_prompt'],
834
+ };
835
+ }
836
+ async execute(context, params) {
837
+ if (typeof params !== 'object' || params === null || !params.task_prompt) {
838
+ throw new Error('Invalid parameters. Expected an object with a "task_prompt" property.');
839
+ }
840
+ let result;
841
+ let task_prompt = params.task_prompt;
842
+ try {
843
+ result = await executeWithHtmlElement$1(context, task_prompt);
844
+ }
845
+ catch (e) {
846
+ console.log(e);
847
+ result = false;
848
+ }
849
+ if (!result) {
850
+ result = await executeWithBrowserUse$1(context, task_prompt);
851
+ }
852
+ return result;
853
+ }
854
+ }
855
+ async function executeWithHtmlElement$1(context, task_prompt) {
856
+ let tabId = await getTabId(context);
857
+ let pseudoHtml = await executeScript(tabId, extractOperableElements, []);
858
+ let messages = [
859
+ {
860
+ role: 'user',
861
+ content: `# Task
862
+ Determine the operation intent based on user input, find the element ID that the user needs to operate on in the webpage HTML, and if the element does not exist, do nothing.
863
+ Output JSON format, no explanation required.
864
+
865
+ # User input
866
+ ${task_prompt}
867
+
868
+ # Output example (when the element exists)
869
+ {"elementId": "1", "operationType": "click"}
870
+
871
+ # Output example (when the element does not exist)
872
+ {"elementId": null, "operationType": "unknown"}
873
+
874
+ # HTML
875
+ ${pseudoHtml}
876
+ `,
877
+ },
878
+ ];
879
+ let llm_params = { maxTokens: 1024 };
880
+ let response = await context.llmProvider.generateText(messages, llm_params);
881
+ let content = typeof response.content == 'string' ? response.content : response.content[0].text;
882
+ let json = content.substring(content.indexOf('{'), content.indexOf('}') + 1);
883
+ let elementId = JSON.parse(json).elementId;
884
+ if (elementId) {
885
+ return await executeScript(tabId, clickOperableElement, [elementId]);
886
+ }
887
+ return false;
888
+ }
889
+ async function executeWithBrowserUse$1(context, task_prompt) {
890
+ let tabId = await getTabId(context);
891
+ let windowId = await getWindowId(context);
892
+ let screenshot_result = await screenshot(windowId);
893
+ let messages = [
894
+ {
895
+ role: 'user',
896
+ content: [
897
+ {
898
+ type: 'image',
899
+ source: screenshot_result.image,
900
+ },
901
+ {
902
+ type: 'text',
903
+ text: 'click: ' + task_prompt,
904
+ },
905
+ ],
906
+ },
907
+ ];
908
+ let llm_params = {
909
+ maxTokens: 1024,
910
+ toolChoice: {
911
+ type: 'tool',
912
+ name: 'left_click',
913
+ },
914
+ tools: [
915
+ {
916
+ name: 'left_click',
917
+ description: 'click element',
918
+ input_schema: {
919
+ type: 'object',
920
+ properties: {
921
+ coordinate: {
922
+ type: 'array',
923
+ description: '(x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates.',
924
+ },
925
+ },
926
+ required: ['coordinate'],
927
+ },
928
+ },
929
+ ],
930
+ };
931
+ let response = await context.llmProvider.generateText(messages, llm_params);
932
+ let input = response.toolCalls[0].input;
933
+ let coordinate = input.coordinate;
934
+ let click_result = await left_click(tabId, coordinate);
935
+ return click_result;
936
+ }
937
+
938
+ /**
939
+ * Export file
940
+ */
941
+ class ExportFile {
942
+ constructor() {
943
+ this.name = 'export_file';
944
+ this.description = 'Content exported as a file, support text format';
945
+ this.input_schema = {
946
+ type: 'object',
947
+ properties: {
948
+ fileType: {
949
+ type: 'string',
950
+ description: 'File format type',
951
+ enum: ['txt', 'csv', 'md', 'html', 'js', 'xml', 'json', 'yml', 'sql'],
952
+ },
953
+ content: {
954
+ type: 'string',
955
+ description: 'Export file content',
956
+ },
957
+ filename: {
958
+ type: 'string',
959
+ description: 'File name',
960
+ },
961
+ },
962
+ required: ['fileType', 'content'],
963
+ };
964
+ }
965
+ /**
966
+ * export
967
+ *
968
+ * @param {*} params { fileType: 'csv', content: 'field1,field2\ndata1,data2' }
969
+ * @returns > { success: true }
970
+ */
971
+ async execute(context, params) {
972
+ if (typeof params !== 'object' || params === null || !('content' in params)) {
973
+ throw new Error('Invalid parameters. Expected an object with a "content" property.');
974
+ }
975
+ let type = 'text/plain';
976
+ switch (params.fileType) {
977
+ case 'csv':
978
+ type = 'text/csv';
979
+ break;
980
+ case 'md':
981
+ type = 'text/markdown';
982
+ break;
983
+ case 'html':
984
+ type = 'text/html';
985
+ break;
986
+ case 'js':
987
+ type = 'application/javascript';
988
+ break;
989
+ case 'xml':
990
+ type = 'text/xml';
991
+ break;
992
+ case 'json':
993
+ type = 'application/json';
994
+ break;
995
+ }
996
+ let filename;
997
+ if (!params.filename) {
998
+ filename = new Date().getTime() + '.' + params.fileType;
999
+ }
1000
+ else if (!(params.filename + '').endsWith(params.fileType)) {
1001
+ filename = params.filename + '.' + params.fileType;
1002
+ }
1003
+ else {
1004
+ filename = params.filename;
1005
+ }
1006
+ let tabId = await getTabId(context);
1007
+ try {
1008
+ await chrome.scripting.executeScript({
1009
+ target: { tabId: tabId },
1010
+ func: exportFile,
1011
+ args: [filename, type, params.content],
1012
+ });
1013
+ }
1014
+ catch (e) {
1015
+ let tab = await open_new_tab('https://www.google.com', true);
1016
+ tabId = tab.id;
1017
+ await chrome.scripting.executeScript({
1018
+ target: { tabId: tabId },
1019
+ func: exportFile,
1020
+ args: [filename, type, params.content],
1021
+ });
1022
+ await sleep(1000);
1023
+ await chrome.tabs.remove(tabId);
1024
+ }
1025
+ return { success: true };
1026
+ }
1027
+ }
1028
+
1029
+ /**
1030
+ * Extract Page Content
1031
+ */
1032
+ class ExtractContent {
1033
+ constructor() {
1034
+ this.name = 'extract_content';
1035
+ this.description = 'Extract the text content of the current webpage';
1036
+ this.input_schema = {
1037
+ type: 'object',
1038
+ properties: {},
1039
+ };
1040
+ }
1041
+ /**
1042
+ * Extract Page Content
1043
+ *
1044
+ * @param {*} params {}
1045
+ * @returns > { tabId, result: { title, url, content }, success: true }
1046
+ */
1047
+ async execute(context, params) {
1048
+ let tabId = await getTabId(context);
1049
+ let tab = await chrome.tabs.get(tabId);
1050
+ await injectScript(tabId);
1051
+ await sleep(500);
1052
+ let content = await executeScript(tabId, () => {
1053
+ return eko.extractHtmlContent();
1054
+ }, []);
1055
+ return {
1056
+ tabId,
1057
+ result: {
1058
+ title: tab.title,
1059
+ url: tab.url,
1060
+ content: content,
1061
+ }
1062
+ };
1063
+ }
1064
+ }
1065
+
1066
+ /**
1067
+ * Find Element Position
1068
+ */
1069
+ class FindElementPosition {
1070
+ constructor() {
1071
+ this.name = 'find_element_position';
1072
+ this.description = 'Locate Element Coordinates through Task Prompts';
1073
+ this.input_schema = {
1074
+ type: 'object',
1075
+ properties: {
1076
+ task_prompt: {
1077
+ type: 'string',
1078
+ description: 'Task prompt, eg: find the search input box',
1079
+ },
1080
+ },
1081
+ required: ['task_prompt'],
1082
+ };
1083
+ }
1084
+ async execute(context, params) {
1085
+ if (typeof params !== 'object' || params === null || !params.task_prompt) {
1086
+ throw new Error('Invalid parameters. Expected an object with a "task_prompt" property.');
1087
+ }
1088
+ let result;
1089
+ let task_prompt = params.task_prompt;
1090
+ try {
1091
+ result = await executeWithHtmlElement(context, task_prompt);
1092
+ }
1093
+ catch (e) {
1094
+ console.log(e);
1095
+ result = null;
1096
+ }
1097
+ if (!result) {
1098
+ result = await executeWithBrowserUse(context, task_prompt);
1099
+ }
1100
+ return result;
1101
+ }
1102
+ }
1103
+ async function executeWithHtmlElement(context, task_prompt) {
1104
+ let tabId = await getTabId(context);
1105
+ let pseudoHtml = await executeScript(tabId, extractOperableElements, []);
1106
+ let messages = [
1107
+ {
1108
+ role: 'user',
1109
+ content: `# Task
1110
+ Find the element ID that the user needs to operate on in the webpage HTML, and if the element does not exist, do nothing.
1111
+ Output JSON format, no explanation required.
1112
+
1113
+ # User input
1114
+ ${task_prompt}
1115
+
1116
+ # Output example (when the element exists)
1117
+ {"elementId": "1"}
1118
+
1119
+ # Output example (when the element does not exist)
1120
+ {"elementId": null}
1121
+
1122
+ # HTML
1123
+ ${pseudoHtml}
1124
+ `,
1125
+ },
1126
+ ];
1127
+ let llm_params = { maxTokens: 1024 };
1128
+ let response = await context.llmProvider.generateText(messages, llm_params);
1129
+ let content = typeof response.content == 'string' ? response.content : response.content[0].text;
1130
+ let json = content.substring(content.indexOf('{'), content.indexOf('}') + 1);
1131
+ let elementId = JSON.parse(json).elementId;
1132
+ if (elementId) {
1133
+ return await executeScript(tabId, getOperableElementRect, [elementId]);
1134
+ }
1135
+ return null;
1136
+ }
1137
+ async function executeWithBrowserUse(context, task_prompt) {
1138
+ await getTabId(context);
1139
+ let windowId = await getWindowId(context);
1140
+ let screenshot_result = await screenshot(windowId);
1141
+ let messages = [
1142
+ {
1143
+ role: 'user',
1144
+ content: [
1145
+ {
1146
+ type: 'image',
1147
+ source: screenshot_result.image,
1148
+ },
1149
+ {
1150
+ type: 'text',
1151
+ text: 'Find the element: ' + task_prompt,
1152
+ },
1153
+ ],
1154
+ },
1155
+ ];
1156
+ let llm_params = {
1157
+ maxTokens: 1024,
1158
+ toolChoice: {
1159
+ type: 'tool',
1160
+ name: 'get_element_by_coordinate',
1161
+ },
1162
+ tools: [
1163
+ {
1164
+ name: 'get_element_by_coordinate',
1165
+ description: 'Retrieve element information based on coordinate',
1166
+ input_schema: {
1167
+ type: 'object',
1168
+ properties: {
1169
+ coordinate: {
1170
+ type: 'array',
1171
+ description: '(x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates.',
1172
+ },
1173
+ },
1174
+ required: ['coordinate'],
1175
+ },
1176
+ },
1177
+ ],
1178
+ };
1179
+ let response = await context.llmProvider.generateText(messages, llm_params);
1180
+ let input = response.toolCalls[0].input;
1181
+ let coordinate = input.coordinate;
1182
+ return {
1183
+ left: coordinate[0],
1184
+ top: coordinate[1],
1185
+ };
1186
+ }
1187
+
1188
+ /**
1189
+ * Form Autofill
1190
+ */
1191
+ class FormAutofill {
1192
+ constructor() {
1193
+ this.name = 'form_autofill';
1194
+ this.description = 'Automatically fill in form data on web pages';
1195
+ this.input_schema = {
1196
+ type: 'object',
1197
+ properties: {}
1198
+ };
1199
+ }
1200
+ async execute(context, params) {
1201
+ // form -> input, textarea, select ...
1202
+ throw new Error('Not implemented');
1203
+ }
1204
+ }
1205
+
1206
+ /**
1207
+ * Open Url
1208
+ */
1209
+ class OpenUrl {
1210
+ constructor() {
1211
+ this.name = 'open_url';
1212
+ this.description = 'Open the specified URL link in browser window';
1213
+ this.input_schema = {
1214
+ type: 'object',
1215
+ properties: {
1216
+ url: {
1217
+ type: 'string',
1218
+ description: 'URL link address',
1219
+ },
1220
+ newWindow: {
1221
+ type: 'boolean',
1222
+ description: 'true: Open in a new window; false: Open in the current window.',
1223
+ },
1224
+ },
1225
+ required: ['url'],
1226
+ };
1227
+ }
1228
+ /**
1229
+ * Open Url
1230
+ *
1231
+ * @param {*} params { url: 'https://www.google.com', newWindow: true }
1232
+ * @returns > { tabId, windowId, title, success: true }
1233
+ */
1234
+ async execute(context, params) {
1235
+ if (typeof params !== 'object' || params === null || !params.url) {
1236
+ throw new Error('Invalid parameters. Expected an object with a "url" property.');
1237
+ }
1238
+ let url = params.url;
1239
+ let newWindow = params.newWindow;
1240
+ if (!newWindow && !context.variables.get('windowId') && !context.variables.get('tabId')) {
1241
+ // First mandatory opening of a new window
1242
+ newWindow = true;
1243
+ }
1244
+ let tab;
1245
+ if (newWindow) {
1246
+ tab = await open_new_tab(url, true);
1247
+ }
1248
+ else {
1249
+ let windowId = await getWindowId(context);
1250
+ tab = await open_new_tab(url, false, windowId);
1251
+ }
1252
+ let windowId = tab.windowId;
1253
+ let tabId = tab.id;
1254
+ context.variables.set('windowId', windowId);
1255
+ context.variables.set('tabId', tabId);
1256
+ if (newWindow) {
1257
+ let windowIds = context.variables.get('windowIds');
1258
+ if (windowIds) {
1259
+ windowIds.push(windowId);
1260
+ }
1261
+ else {
1262
+ context.variables.set('windowIds', [windowId]);
1263
+ }
1264
+ }
1265
+ return {
1266
+ tabId,
1267
+ windowId,
1268
+ title: tab.title,
1269
+ };
1270
+ }
1271
+ }
1272
+
1273
+ /**
1274
+ * Current Page Screenshot
1275
+ */
1276
+ class Screenshot {
1277
+ constructor() {
1278
+ this.name = 'screenshot';
1279
+ this.description = 'Screenshot the current webpage window';
1280
+ this.input_schema = {
1281
+ type: 'object',
1282
+ properties: {},
1283
+ };
1284
+ }
1285
+ /**
1286
+ * Current Page Screenshot
1287
+ *
1288
+ * @param {*} params {}
1289
+ * @returns > { image: { type: 'base64', media_type: 'image/png', data } }
1290
+ */
1291
+ async execute(context, params) {
1292
+ let windowId = await getWindowId(context);
1293
+ return await screenshot(windowId);
1294
+ }
1295
+ }
1296
+
1297
+ /**
1298
+ * Browser tab management
1299
+ */
1300
+ class TabManagement {
1301
+ constructor() {
1302
+ this.name = 'tab_management';
1303
+ this.description = 'Browser tab management, view and operate tabs';
1304
+ this.input_schema = {
1305
+ type: 'object',
1306
+ properties: {
1307
+ commond: {
1308
+ type: 'string',
1309
+ description: `The commond to perform. The available commonds are:
1310
+ * \`tab_all\`: View all tabs and return the tabId and title.
1311
+ * \`current_tab\`: Get current tab information (tabId, url, title).
1312
+ * \`go_back\`: Go back to the previous page in the current tab.
1313
+ * \`change_url [url]\`: open URL in the current tab, eg: \`change_url https://www.google.com\`.
1314
+ * \`close_tab\`: Close the current tab.
1315
+ * \`switch_tab [tabId]\`: Switch to the specified tab using tabId, eg: \`switch_tab 1000\`.
1316
+ * \`new_tab [url]\`: Open a new tab window and open the URL, eg: \`new_tab https://www.google.com\``,
1317
+ },
1318
+ },
1319
+ required: ['commond'],
1320
+ };
1321
+ }
1322
+ /**
1323
+ * Tab management
1324
+ *
1325
+ * @param {*} params { commond: `new_tab [url]` | 'tab_all' | 'current_tab' | 'go_back' | 'close_tab' | 'switch_tab [tabId]' | `change_url [url]` }
1326
+ * @returns > { result, success: true }
1327
+ */
1328
+ async execute(context, params) {
1329
+ if (params === null || !params.commond) {
1330
+ throw new Error('Invalid parameters. Expected an object with a "commond" property.');
1331
+ }
1332
+ let windowId = await getWindowId(context);
1333
+ let commond = params.commond.trim();
1334
+ if (commond.startsWith('`')) {
1335
+ commond = commond.substring(1);
1336
+ }
1337
+ if (commond.endsWith('`')) {
1338
+ commond = commond.substring(0, commond.length - 1);
1339
+ }
1340
+ let result;
1341
+ if (commond == 'tab_all') {
1342
+ result = [];
1343
+ let tabs = await chrome.tabs.query({ windowId: windowId });
1344
+ for (let i = 0; i < tabs.length; i++) {
1345
+ let tab = tabs[i];
1346
+ let tabInfo = {
1347
+ tabId: tab.id,
1348
+ windowId: tab.windowId,
1349
+ title: tab.title,
1350
+ url: tab.url,
1351
+ };
1352
+ if (tab.active) {
1353
+ tabInfo.active = true;
1354
+ }
1355
+ result.push(tabInfo);
1356
+ }
1357
+ }
1358
+ else if (commond == 'current_tab') {
1359
+ let tabId = await getTabId(context);
1360
+ let tab = await chrome.tabs.get(tabId);
1361
+ let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1362
+ result = tabInfo;
1363
+ }
1364
+ else if (commond == 'go_back') {
1365
+ let tabId = await getTabId(context);
1366
+ await chrome.tabs.goBack(tabId);
1367
+ let tab = await chrome.tabs.get(tabId);
1368
+ let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1369
+ result = tabInfo;
1370
+ }
1371
+ else if (commond == 'close_tab') {
1372
+ let closedTabId = await getTabId(context);
1373
+ await chrome.tabs.remove(closedTabId);
1374
+ await sleep(100);
1375
+ let tabs = await chrome.tabs.query({ active: true, currentWindow: true });
1376
+ if (tabs.length == 0) {
1377
+ tabs = await chrome.tabs.query({ status: 'complete', currentWindow: true });
1378
+ }
1379
+ let tab = tabs[tabs.length - 1];
1380
+ if (!tab.active) {
1381
+ await chrome.tabs.update(tab.id, { active: true });
1382
+ }
1383
+ let newTabId = tab.id;
1384
+ context.variables.set('tabId', tab.id);
1385
+ context.variables.set('windowId', tab.windowId);
1386
+ let closeTabInfo = { closedTabId, newTabId, newTabTitle: tab.title };
1387
+ result = closeTabInfo;
1388
+ }
1389
+ else if (commond.startsWith('switch_tab')) {
1390
+ let tabId = parseInt(commond.replace('switch_tab', '').replace('[', '').replace(']', ''));
1391
+ let tab = await chrome.tabs.update(tabId, { active: true });
1392
+ context.variables.set('tabId', tab.id);
1393
+ context.variables.set('windowId', tab.windowId);
1394
+ let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1395
+ result = tabInfo;
1396
+ }
1397
+ else if (commond.startsWith('change_url')) {
1398
+ let url = commond.substring('change_url'.length).replace('[', '').replace(']', '').trim();
1399
+ let tabId = await getTabId(context);
1400
+ // await chrome.tabs.update(tabId, { url: url });
1401
+ await executeScript(tabId, () => {
1402
+ location.href = url;
1403
+ }, []);
1404
+ let tab = await waitForTabComplete(tabId);
1405
+ let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1406
+ result = tabInfo;
1407
+ }
1408
+ else if (commond.startsWith('new_tab')) {
1409
+ let url = commond.replace('new_tab', '').replace('[', '').replace(']', '').replace(/"/g, '');
1410
+ // First mandatory opening of a new window
1411
+ let newWindow = !context.variables.get('windowId') && !context.variables.get('tabId');
1412
+ let tab;
1413
+ if (newWindow) {
1414
+ tab = await open_new_tab(url, true);
1415
+ }
1416
+ else {
1417
+ let windowId = await getWindowId(context);
1418
+ tab = await open_new_tab(url, false, windowId);
1419
+ }
1420
+ let windowId = tab.windowId;
1421
+ let tabId = tab.id;
1422
+ context.variables.set('windowId', windowId);
1423
+ context.variables.set('tabId', tabId);
1424
+ if (newWindow) {
1425
+ let windowIds = context.variables.get('windowIds');
1426
+ if (windowIds) {
1427
+ windowIds.push(windowId);
1428
+ }
1429
+ else {
1430
+ context.variables.set('windowIds', [windowId]);
1431
+ }
1432
+ }
1433
+ let tabInfo = {
1434
+ tabId: tab.id,
1435
+ windowId: tab.windowId,
1436
+ title: tab.title,
1437
+ url: tab.url,
1438
+ };
1439
+ result = tabInfo;
1440
+ }
1441
+ else {
1442
+ throw Error('Unknown commond: ' + commond);
1443
+ }
1444
+ return result;
1445
+ }
1446
+ destroy(context) {
1447
+ let windowIds = context.variables.get('windowIds');
1448
+ if (windowIds) {
1449
+ for (let i = 0; i < windowIds.length; i++) {
1450
+ chrome.windows.remove(windowIds[i]);
1451
+ }
1452
+ }
1453
+ }
1454
+ }
1455
+
1456
+ /**
1457
+ * Web Search
1458
+ */
1459
+ class WebSearch {
1460
+ constructor() {
1461
+ this.name = 'web_search';
1462
+ this.description = 'Use web search to return search results';
1463
+ this.input_schema = {
1464
+ type: 'object',
1465
+ properties: {
1466
+ query: {
1467
+ type: 'string',
1468
+ description: 'search for keywords',
1469
+ },
1470
+ maxResults: {
1471
+ type: 'integer',
1472
+ description: 'Maximum search results, default 5',
1473
+ },
1474
+ },
1475
+ required: ['query'],
1476
+ };
1477
+ }
1478
+ /**
1479
+ * search
1480
+ *
1481
+ * @param {*} params { url: 'https://www.google.com', query: 'ai agent', maxResults: 5 }
1482
+ * @returns > [{ title, url, content }]
1483
+ */
1484
+ async execute(context, params) {
1485
+ var _a;
1486
+ if (typeof params !== 'object' || params === null || !params.query) {
1487
+ throw new Error('Invalid parameters. Expected an object with a "query" property.');
1488
+ }
1489
+ let url = params.url;
1490
+ let query = params.query;
1491
+ let maxResults = params.maxResults;
1492
+ if (!url) {
1493
+ url = 'https://www.google.com';
1494
+ }
1495
+ let taskId = new Date().getTime() + '';
1496
+ let searchs = [{ url: url, keyword: query }];
1497
+ let searchInfo = await deepSearch(taskId, searchs, maxResults || 5);
1498
+ let links = ((_a = searchInfo.result[0]) === null || _a === void 0 ? void 0 : _a.links) || [];
1499
+ return links.filter((s) => s.content);
1500
+ }
1501
+ }
1502
+ const deepSearchInjects = {
1503
+ 'bing.com': {
1504
+ filename: 'bing.js',
1505
+ buildSearchUrl: function (url, keyword) {
1506
+ return 'https://bing.com/search?q=' + encodeURI(keyword);
1507
+ },
1508
+ },
1509
+ 'duckduckgo.com': {
1510
+ filename: 'duckduckgo.js',
1511
+ buildSearchUrl: function (url, keyword) {
1512
+ return 'https://duckduckgo.com/?q=' + encodeURI(keyword);
1513
+ },
1514
+ },
1515
+ 'google.com': {
1516
+ filename: 'google.js',
1517
+ buildSearchUrl: function (url, keyword) {
1518
+ return 'https://www.google.com/search?q=' + encodeURI(keyword);
1519
+ },
1520
+ },
1521
+ default: {
1522
+ filename: 'google.js',
1523
+ buildSearchUrl: function (url, keyword) {
1524
+ url = url.trim();
1525
+ let idx = url.indexOf('//');
1526
+ if (idx > -1) {
1527
+ url = url.substring(idx + 2);
1528
+ }
1529
+ idx = url.indexOf('/', 2);
1530
+ if (idx > -1) {
1531
+ url = url.substring(0, idx);
1532
+ }
1533
+ keyword = 'site:' + url + ' ' + keyword;
1534
+ return 'https://www.google.com/search?q=' + encodeURIComponent(keyword);
1535
+ },
1536
+ },
1537
+ };
1538
+ function buildDeepSearchUrl(url, keyword) {
1539
+ let idx = url.indexOf('/', url.indexOf('//') + 2);
1540
+ let baseUrl = idx > -1 ? url.substring(0, idx) : url;
1541
+ let domains = Object.keys(deepSearchInjects);
1542
+ let inject = null;
1543
+ for (let j = 0; j < domains.length; j++) {
1544
+ let domain = domains[j];
1545
+ if (baseUrl == domain || baseUrl.endsWith('.' + domain) || baseUrl.endsWith('/' + domain)) {
1546
+ inject = deepSearchInjects[domain];
1547
+ break;
1548
+ }
1549
+ }
1550
+ if (!inject) {
1551
+ inject = deepSearchInjects['default'];
1552
+ }
1553
+ return {
1554
+ filename: inject.filename,
1555
+ url: inject.buildSearchUrl(url, keyword),
1556
+ };
1557
+ }
1558
+ // Event
1559
+ const tabsUpdateEvent = new MsgEvent();
1560
+ chrome.tabs.onUpdated.addListener(async function (tabId, changeInfo, tab) {
1561
+ await tabsUpdateEvent.publish({ tabId, changeInfo, tab });
1562
+ });
1563
+ /**
1564
+ * deep search
1565
+ *
1566
+ * @param {string} taskId task id
1567
+ * @param {array} searchs search list => [{ url: 'https://bing.com', keyword: 'ai' }]
1568
+ * @param {number} detailsMaxNum Maximum crawling quantity per search detail page
1569
+ */
1570
+ async function deepSearch(taskId, searchs, detailsMaxNum, window) {
1571
+ let closeWindow = false;
1572
+ if (!window) {
1573
+ // open new window
1574
+ window = await chrome.windows.create({
1575
+ type: 'normal',
1576
+ state: 'maximized',
1577
+ url: null,
1578
+ });
1579
+ closeWindow = true;
1580
+ }
1581
+ // crawler the search page details page link
1582
+ // [{ links: [{ title, url }] }]
1583
+ let detailLinkGroups = await doDetailLinkGroups(taskId, searchs, detailsMaxNum, window);
1584
+ // crawler all details page content and comments
1585
+ let searchInfo = await doPageContent(taskId, detailLinkGroups, window);
1586
+ console.log('searchInfo: ', searchInfo);
1587
+ // close window
1588
+ closeWindow && chrome.windows.remove(window.id);
1589
+ return searchInfo;
1590
+ }
1591
+ /**
1592
+ * crawler the search page details page link
1593
+ *
1594
+ * @param {string} taskId task id
1595
+ * @param {array} searchs search list => [{ url: 'https://bing.com', keyword: 'ai' }]
1596
+ * @param {number} detailsMaxNum Maximum crawling quantity per search detail page
1597
+ * @param {*} window
1598
+ * @returns [{ links: [{ title, url }] }]
1599
+ */
1600
+ async function doDetailLinkGroups(taskId, searchs, detailsMaxNum, window) {
1601
+ let detailLinkGroups = [];
1602
+ let countDownLatch = new CountDownLatch(searchs.length);
1603
+ for (let i = 0; i < searchs.length; i++) {
1604
+ try {
1605
+ // script name & build search URL
1606
+ const { filename, url } = buildDeepSearchUrl(searchs[i].url, searchs[i].keyword);
1607
+ // open new Tab
1608
+ let tab = await chrome.tabs.create({
1609
+ url: url,
1610
+ windowId: window.id,
1611
+ });
1612
+ let eventId = taskId + '_' + i;
1613
+ // monitor Tab status
1614
+ tabsUpdateEvent.addListener(async function (obj) {
1615
+ if (obj.tabId != tab.id) {
1616
+ return;
1617
+ }
1618
+ if (obj.changeInfo.status === 'complete') {
1619
+ tabsUpdateEvent.removeListener(eventId);
1620
+ // inject js
1621
+ await injectScript(tab.id, filename);
1622
+ await sleep(1000);
1623
+ // crawler the search page details page
1624
+ // { links: [{ title, url }] }
1625
+ let detailLinks = await chrome.tabs.sendMessage(tab.id, {
1626
+ type: 'page:getDetailLinks',
1627
+ keyword: searchs[i].keyword,
1628
+ });
1629
+ if (!detailLinks || !detailLinks.links) {
1630
+ // TODO error
1631
+ detailLinks = { links: [] };
1632
+ }
1633
+ console.log('detailLinks: ', detailLinks);
1634
+ let links = detailLinks.links.slice(0, detailsMaxNum);
1635
+ detailLinkGroups.push({ url, links, filename });
1636
+ countDownLatch.countDown();
1637
+ chrome.tabs.remove(tab.id);
1638
+ }
1639
+ else if (obj.changeInfo.status === 'unloaded') {
1640
+ countDownLatch.countDown();
1641
+ chrome.tabs.remove(tab.id);
1642
+ tabsUpdateEvent.removeListener(eventId);
1643
+ }
1644
+ }, eventId);
1645
+ }
1646
+ catch (e) {
1647
+ console.error(e);
1648
+ countDownLatch.countDown();
1649
+ }
1650
+ }
1651
+ await countDownLatch.await(30000);
1652
+ return detailLinkGroups;
1653
+ }
1654
+ /**
1655
+ * page content
1656
+ *
1657
+ * @param {string} taskId task id
1658
+ * @param {array} detailLinkGroups details page group
1659
+ * @param {*} window
1660
+ * @returns search info
1661
+ */
1662
+ async function doPageContent(taskId, detailLinkGroups, window) {
1663
+ const searchInfo = {
1664
+ total: 0,
1665
+ running: 0,
1666
+ succeed: 0,
1667
+ failed: 0,
1668
+ failedLinks: [],
1669
+ result: detailLinkGroups,
1670
+ };
1671
+ for (let i = 0; i < detailLinkGroups.length; i++) {
1672
+ let links = detailLinkGroups[i].links;
1673
+ searchInfo.total += links.length;
1674
+ }
1675
+ let countDownLatch = new CountDownLatch(searchInfo.total);
1676
+ for (let i = 0; i < detailLinkGroups.length; i++) {
1677
+ let filename = detailLinkGroups[i].filename;
1678
+ let links = detailLinkGroups[i].links;
1679
+ for (let j = 0; j < links.length; j++) {
1680
+ let link = links[j];
1681
+ // open new tab
1682
+ let tab = await chrome.tabs.create({
1683
+ url: link.url,
1684
+ windowId: window.id,
1685
+ });
1686
+ searchInfo.running++;
1687
+ let eventId = taskId + '_' + i + '_' + j;
1688
+ // monitor Tab status
1689
+ tabsUpdateEvent.addListener(async function (obj) {
1690
+ if (obj.tabId != tab.id) {
1691
+ return;
1692
+ }
1693
+ if (obj.changeInfo.status === 'complete') {
1694
+ try {
1695
+ tabsUpdateEvent.removeListener(eventId);
1696
+ // inject js
1697
+ await injectScript(tab.id, filename);
1698
+ await sleep(1000);
1699
+ // cralwer content and comments
1700
+ // { title, content }
1701
+ let result = await chrome.tabs.sendMessage(tab.id, {
1702
+ type: 'page:getContent',
1703
+ });
1704
+ if (!result) {
1705
+ throw Error('No Result');
1706
+ }
1707
+ link.content = result.content;
1708
+ link.page_title = result.title;
1709
+ searchInfo.succeed++;
1710
+ }
1711
+ catch (e) {
1712
+ searchInfo.failed++;
1713
+ searchInfo.failedLinks.push(link);
1714
+ console.error(link.title + ' crawler error', link.url, e);
1715
+ }
1716
+ finally {
1717
+ searchInfo.running--;
1718
+ countDownLatch.countDown();
1719
+ chrome.tabs.remove(tab.id);
1720
+ tabsUpdateEvent.removeListener(eventId);
1721
+ }
1722
+ }
1723
+ else if (obj.changeInfo.status === 'unloaded') {
1724
+ searchInfo.running--;
1725
+ countDownLatch.countDown();
1726
+ chrome.tabs.remove(tab.id);
1727
+ tabsUpdateEvent.removeListener(eventId);
1728
+ }
1729
+ }, eventId);
1730
+ }
1731
+ }
1732
+ await countDownLatch.await(60000);
1733
+ return searchInfo;
1734
+ }
1735
+
1736
+ var tools = /*#__PURE__*/Object.freeze({
1737
+ __proto__: null,
1738
+ BrowserUse: BrowserUse,
1739
+ ElementClick: ElementClick,
1740
+ ExportFile: ExportFile,
1741
+ ExtractContent: ExtractContent,
1742
+ FindElementPosition: FindElementPosition,
1743
+ FormAutofill: FormAutofill,
1744
+ OpenUrl: OpenUrl,
1745
+ Screenshot: Screenshot,
1746
+ TabManagement: TabManagement,
1747
+ WebSearch: WebSearch
1748
+ });
1749
+
1750
+ async function pub(tabId, event, params) {
1751
+ return await chrome.tabs.sendMessage(tabId, {
1752
+ type: 'eko:message',
1753
+ event,
1754
+ params,
1755
+ });
1756
+ }
1757
+ async function getLLMConfig(name = 'llmConfig') {
1758
+ let result = await chrome.storage.sync.get([name]);
1759
+ return result[name];
1760
+ }
1761
+ function getAllTools() {
1762
+ let toolsMap = new Map();
1763
+ for (const key in tools) {
1764
+ let tool = tools[key];
1765
+ if (typeof tool === 'function' && tool.prototype && 'execute' in tool.prototype) {
1766
+ try {
1767
+ let instance = new tool();
1768
+ toolsMap.set(instance.name || key, instance);
1769
+ }
1770
+ catch (e) {
1771
+ console.error(`Failed to instantiate ${key}:`, e);
1772
+ }
1773
+ }
1774
+ }
1775
+ return toolsMap;
1776
+ }
1777
+
1778
+ exports.browser = browser;
1779
+ exports.getAllTools = getAllTools;
1780
+ exports.getLLMConfig = getLLMConfig;
1781
+ exports.pub = pub;
1782
+ exports.tools = tools;
1783
+ exports.utils = utils;