@eko-ai/eko 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +70 -0
  3. package/dist/core/eko.d.ts +17 -0
  4. package/dist/core/tool-registry.d.ts +13 -0
  5. package/dist/extension/content/index.d.ts +7 -0
  6. package/dist/extension/core.d.ts +11 -0
  7. package/dist/extension/index.d.ts +7 -0
  8. package/dist/extension/script/bing.js +25 -0
  9. package/dist/extension/script/build_dom_tree.js +657 -0
  10. package/dist/extension/script/common.js +204 -0
  11. package/dist/extension/script/duckduckgo.js +25 -0
  12. package/dist/extension/script/google.js +26 -0
  13. package/dist/extension/tools/browser.d.ts +21 -0
  14. package/dist/extension/tools/browser_use.d.ts +18 -0
  15. package/dist/extension/tools/element_click.d.ts +12 -0
  16. package/dist/extension/tools/export_file.d.ts +18 -0
  17. package/dist/extension/tools/extract_content.d.ts +18 -0
  18. package/dist/extension/tools/find_element_position.d.ts +12 -0
  19. package/dist/extension/tools/form_autofill.d.ts +11 -0
  20. package/dist/extension/tools/html_script.d.ts +21 -0
  21. package/dist/extension/tools/index.d.ts +11 -0
  22. package/dist/extension/tools/open_url.d.ts +18 -0
  23. package/dist/extension/tools/screenshot.d.ts +18 -0
  24. package/dist/extension/tools/tab_management.d.ts +19 -0
  25. package/dist/extension/tools/web_search.d.ts +18 -0
  26. package/dist/extension/utils.d.ts +30 -0
  27. package/dist/extension.cjs.js +1783 -0
  28. package/dist/extension.esm.js +1776 -0
  29. package/dist/extension_content_script.js +247 -0
  30. package/dist/fellou/computer.d.ts +20 -0
  31. package/dist/fellou/index.d.ts +6 -0
  32. package/dist/fellou/tools/computer_use.d.ts +18 -0
  33. package/dist/fellou/tools/index.d.ts +2 -0
  34. package/dist/fellou.cjs.js +238 -0
  35. package/dist/fellou.esm.js +235 -0
  36. package/dist/index.cjs.js +9350 -0
  37. package/dist/index.d.ts +8 -0
  38. package/dist/index.esm.js +9340 -0
  39. package/dist/models/action.d.ts +20 -0
  40. package/dist/models/workflow.d.ts +15 -0
  41. package/dist/nodejs/index.d.ts +2 -0
  42. package/dist/nodejs/tools/index.d.ts +1 -0
  43. package/dist/nodejs.cjs.js +7 -0
  44. package/dist/nodejs.esm.js +5 -0
  45. package/dist/schemas/workflow.schema.d.ts +85 -0
  46. package/dist/services/llm/claude-provider.d.ts +10 -0
  47. package/dist/services/llm/openai-provider.d.ts +10 -0
  48. package/dist/services/parser/workflow-parser.d.ts +29 -0
  49. package/dist/services/workflow/generator.d.ts +11 -0
  50. package/dist/services/workflow/templates.d.ts +7 -0
  51. package/dist/types/action.types.d.ts +36 -0
  52. package/dist/types/eko.types.d.ts +21 -0
  53. package/dist/types/framework.types.d.ts +11 -0
  54. package/dist/types/index.d.ts +6 -0
  55. package/dist/types/llm.types.d.ts +54 -0
  56. package/dist/types/parser.types.d.ts +9 -0
  57. package/dist/types/tools.types.d.ts +88 -0
  58. package/dist/types/workflow.types.d.ts +39 -0
  59. package/dist/web/index.d.ts +2 -0
  60. package/dist/web/tools/index.d.ts +1 -0
  61. package/dist/web.cjs.js +7 -0
  62. package/dist/web.esm.js +5 -0
  63. package/package.json +108 -0
@@ -0,0 +1,1776 @@
1
+ async function getWindowId(context) {
2
+ let windowId = context.variables.get('windowId');
3
+ if (windowId) {
4
+ try {
5
+ await chrome.windows.get(windowId);
6
+ }
7
+ catch (e) {
8
+ windowId = null;
9
+ context.variables.delete('windowId');
10
+ let tabId = context.variables.get('tabId');
11
+ if (tabId) {
12
+ try {
13
+ let tab = await chrome.tabs.get(tabId);
14
+ windowId = tab.windowId;
15
+ }
16
+ catch (e) {
17
+ context.variables.delete('tabId');
18
+ }
19
+ }
20
+ }
21
+ }
22
+ if (!windowId) {
23
+ const window = await chrome.windows.getCurrent();
24
+ windowId = window.id;
25
+ }
26
+ return windowId;
27
+ }
28
+ async function getTabId(context) {
29
+ let tabId = context.variables.get('tabId');
30
+ if (tabId) {
31
+ try {
32
+ await chrome.tabs.get(tabId);
33
+ }
34
+ catch (e) {
35
+ tabId = null;
36
+ context.variables.delete('tabId');
37
+ }
38
+ }
39
+ if (!tabId) {
40
+ tabId = await getCurrentTabId();
41
+ }
42
+ return tabId;
43
+ }
44
+ function getCurrentTabId() {
45
+ return new Promise((resolve) => {
46
+ chrome.tabs.query({ active: true, lastFocusedWindow: true }, function (tabs) {
47
+ if (tabs.length > 0) {
48
+ resolve(tabs[0].id);
49
+ }
50
+ else {
51
+ chrome.tabs.query({ active: true, currentWindow: true }, function (_tabs) {
52
+ if (_tabs.length > 0) {
53
+ resolve(_tabs[0].id);
54
+ return;
55
+ }
56
+ else {
57
+ chrome.tabs.query({ status: 'complete', currentWindow: true }, function (__tabs) {
58
+ resolve(__tabs.length ? __tabs[__tabs.length - 1].id : undefined);
59
+ });
60
+ }
61
+ });
62
+ }
63
+ });
64
+ });
65
+ }
66
+ async function open_new_tab(url, newWindow, windowId) {
67
+ let tabId;
68
+ if (newWindow) {
69
+ let window = await chrome.windows.create({
70
+ type: 'normal',
71
+ state: 'maximized',
72
+ url: url,
73
+ });
74
+ windowId = window.id;
75
+ let tabs = window.tabs || [
76
+ await chrome.tabs.create({
77
+ url: url,
78
+ windowId: windowId,
79
+ }),
80
+ ];
81
+ tabId = tabs[0].id;
82
+ }
83
+ else {
84
+ if (!windowId) {
85
+ const window = await chrome.windows.getCurrent();
86
+ windowId = window.id;
87
+ }
88
+ let tab = await chrome.tabs.create({
89
+ url: url,
90
+ windowId: windowId,
91
+ });
92
+ tabId = tab.id;
93
+ }
94
+ let tab = await waitForTabComplete(tabId);
95
+ await sleep(200);
96
+ return tab;
97
+ }
98
+ async function executeScript(tabId, func, args) {
99
+ let frameResults = await chrome.scripting.executeScript({
100
+ target: { tabId: tabId },
101
+ func: func,
102
+ args: args,
103
+ });
104
+ return frameResults[0].result;
105
+ }
106
+ async function waitForTabComplete(tabId, timeout = 30000) {
107
+ return new Promise(async (resolve, reject) => {
108
+ let tab = await chrome.tabs.get(tabId);
109
+ if (tab.status === 'complete') {
110
+ resolve(tab);
111
+ return;
112
+ }
113
+ const time = setTimeout(() => {
114
+ chrome.tabs.onUpdated.removeListener(listener);
115
+ reject();
116
+ }, timeout);
117
+ const listener = async (updatedTabId, changeInfo, tab) => {
118
+ if (updatedTabId === tabId && changeInfo.status === 'complete') {
119
+ chrome.tabs.onUpdated.removeListener(listener);
120
+ clearTimeout(time);
121
+ resolve(tab);
122
+ }
123
+ };
124
+ chrome.tabs.onUpdated.addListener(listener);
125
+ });
126
+ }
127
+ async function getPageSize(tabId) {
128
+ if (!tabId) {
129
+ tabId = await getCurrentTabId();
130
+ }
131
+ let injectionResult = await chrome.scripting.executeScript({
132
+ target: { tabId: tabId },
133
+ func: () => [
134
+ window.innerWidth || document.documentElement.clientWidth || document.body.clientWidth,
135
+ window.innerHeight || document.documentElement.clientHeight || document.body.clientHeight,
136
+ ],
137
+ });
138
+ return [injectionResult[0].result[0], injectionResult[0].result[1]];
139
+ }
140
+ function sleep(time) {
141
+ return new Promise((resolve) => setTimeout(() => resolve(), time));
142
+ }
143
+ async function injectScript(tabId, filename) {
144
+ let files = ['eko/script/common.js'];
145
+ if (filename) {
146
+ files.push('eko/script/' + filename);
147
+ }
148
+ await chrome.scripting.executeScript({
149
+ target: { tabId },
150
+ files: files,
151
+ });
152
+ }
153
+ class MsgEvent {
154
+ constructor() {
155
+ this.eventMap = {};
156
+ }
157
+ addListener(callback, id) {
158
+ if (!id) {
159
+ id = new Date().getTime() + '' + Math.floor(Math.random() * 10000);
160
+ }
161
+ this.eventMap[id] = callback;
162
+ return id;
163
+ }
164
+ removeListener(id) {
165
+ delete this.eventMap[id];
166
+ }
167
+ async publish(msg) {
168
+ let values = Object.values(this.eventMap);
169
+ for (let i = 0; i < values.length; i++) {
170
+ try {
171
+ let result = values[i](msg);
172
+ if (isPromise(result)) {
173
+ await result;
174
+ }
175
+ }
176
+ catch (e) {
177
+ console.error(e);
178
+ }
179
+ }
180
+ }
181
+ }
182
+ /**
183
+ * Counter (Function: Wait for all asynchronous tasks to complete)
184
+ */
185
+ class CountDownLatch {
186
+ constructor(count) {
187
+ this.resolve = undefined;
188
+ this.currentCount = count;
189
+ }
190
+ countDown() {
191
+ this.currentCount = this.currentCount - 1;
192
+ if (this.currentCount <= 0) {
193
+ this.resolve && this.resolve();
194
+ }
195
+ }
196
+ await(timeout) {
197
+ const $this = this;
198
+ return new Promise((_resolve, reject) => {
199
+ let resolve = _resolve;
200
+ if (timeout > 0) {
201
+ let timeId = setTimeout(reject, timeout);
202
+ resolve = () => {
203
+ clearTimeout(timeId);
204
+ _resolve();
205
+ };
206
+ }
207
+ $this.resolve = resolve;
208
+ if ($this.currentCount <= 0) {
209
+ resolve();
210
+ }
211
+ });
212
+ }
213
+ }
214
+ function isPromise(obj) {
215
+ return (!!obj &&
216
+ (typeof obj === 'object' || typeof obj === 'function') &&
217
+ typeof obj.then === 'function');
218
+ }
219
+
220
+ var utils = /*#__PURE__*/Object.freeze({
221
+ __proto__: null,
222
+ CountDownLatch: CountDownLatch,
223
+ MsgEvent: MsgEvent,
224
+ executeScript: executeScript,
225
+ getCurrentTabId: getCurrentTabId,
226
+ getPageSize: getPageSize,
227
+ getTabId: getTabId,
228
+ getWindowId: getWindowId,
229
+ injectScript: injectScript,
230
+ isPromise: isPromise,
231
+ open_new_tab: open_new_tab,
232
+ sleep: sleep,
233
+ waitForTabComplete: waitForTabComplete
234
+ });
235
+
236
+ async function key(tabId, key, coordinate) {
237
+ if (!coordinate) {
238
+ coordinate = (await cursor_position(tabId)).coordinate;
239
+ }
240
+ await mouse_move(tabId, coordinate);
241
+ let mapping = {};
242
+ let keys = key.replace(/\s+/g, ' ').split(' ');
243
+ let result;
244
+ for (let i = 0; i < keys.length; i++) {
245
+ let _key = keys[i];
246
+ let keyEvents = {
247
+ key: '',
248
+ ctrlKey: false,
249
+ altKey: false,
250
+ shiftKey: false,
251
+ metaKey: false,
252
+ };
253
+ if (_key.indexOf('+') > -1) {
254
+ let mapped_keys = _key.split('+').map((k) => mapping[k] || k);
255
+ for (let i = 0; i < mapped_keys.length - 1; i++) {
256
+ let k = mapped_keys[i].toLowerCase();
257
+ if (k == 'ctrl' || k == 'control') {
258
+ keyEvents.ctrlKey = true;
259
+ }
260
+ else if (k == 'alt' || k == 'option') {
261
+ keyEvents.altKey = true;
262
+ }
263
+ else if (k == 'shift') {
264
+ keyEvents.shiftKey = true;
265
+ }
266
+ else if (k == 'meta' || k == 'command') {
267
+ keyEvents.metaKey = true;
268
+ }
269
+ else {
270
+ console.log('Unknown Key: ' + k);
271
+ }
272
+ }
273
+ keyEvents.key = mapped_keys[mapped_keys.length - 1];
274
+ }
275
+ else {
276
+ keyEvents.key = mapping[_key] || _key;
277
+ }
278
+ if (!keyEvents.key) {
279
+ continue;
280
+ }
281
+ result = await chrome.tabs.sendMessage(tabId, {
282
+ type: 'computer:key',
283
+ coordinate,
284
+ ...keyEvents,
285
+ });
286
+ await sleep(100);
287
+ }
288
+ return result;
289
+ }
290
+ async function type(tabId, text, coordinate) {
291
+ if (!coordinate) {
292
+ coordinate = (await cursor_position(tabId)).coordinate;
293
+ }
294
+ await mouse_move(tabId, coordinate);
295
+ return await chrome.tabs.sendMessage(tabId, {
296
+ type: 'computer:type',
297
+ text,
298
+ coordinate,
299
+ });
300
+ }
301
+ async function type_by_xpath(tabId, text, xpath) {
302
+ return await chrome.tabs.sendMessage(tabId, {
303
+ type: 'computer:type',
304
+ text,
305
+ xpath,
306
+ });
307
+ }
308
+ async function clear_input(tabId, coordinate) {
309
+ if (!coordinate) {
310
+ coordinate = (await cursor_position(tabId)).coordinate;
311
+ }
312
+ await mouse_move(tabId, coordinate);
313
+ return await chrome.tabs.sendMessage(tabId, {
314
+ type: 'computer:type',
315
+ text: '',
316
+ coordinate,
317
+ });
318
+ }
319
+ async function clear_input_by_xpath(tabId, xpath) {
320
+ return await chrome.tabs.sendMessage(tabId, {
321
+ type: 'computer:type',
322
+ text: '',
323
+ xpath,
324
+ });
325
+ }
326
+ async function mouse_move(tabId, coordinate) {
327
+ return await chrome.tabs.sendMessage(tabId, {
328
+ type: 'computer:mouse_move',
329
+ coordinate,
330
+ });
331
+ }
332
+ async function left_click(tabId, coordinate) {
333
+ if (!coordinate) {
334
+ coordinate = (await cursor_position(tabId)).coordinate;
335
+ }
336
+ return await chrome.tabs.sendMessage(tabId, {
337
+ type: 'computer:left_click',
338
+ coordinate,
339
+ });
340
+ }
341
+ async function left_click_by_xpath(tabId, xpath) {
342
+ return await chrome.tabs.sendMessage(tabId, {
343
+ type: 'computer:left_click',
344
+ xpath,
345
+ });
346
+ }
347
+ async function left_click_drag(tabId, coordinate) {
348
+ let from_coordinate = (await cursor_position(tabId)).coordinate;
349
+ return await chrome.tabs.sendMessage(tabId, {
350
+ type: 'computer:left_click_drag',
351
+ from_coordinate,
352
+ to_coordinate: coordinate,
353
+ });
354
+ }
355
+ async function right_click(tabId, coordinate) {
356
+ if (!coordinate) {
357
+ coordinate = (await cursor_position(tabId)).coordinate;
358
+ }
359
+ return await chrome.tabs.sendMessage(tabId, {
360
+ type: 'computer:right_click',
361
+ coordinate,
362
+ });
363
+ }
364
+ async function right_click_by_xpath(tabId, xpath) {
365
+ return await chrome.tabs.sendMessage(tabId, {
366
+ type: 'computer:right_click',
367
+ xpath,
368
+ });
369
+ }
370
+ async function double_click(tabId, coordinate) {
371
+ if (!coordinate) {
372
+ coordinate = (await cursor_position(tabId)).coordinate;
373
+ }
374
+ return await chrome.tabs.sendMessage(tabId, {
375
+ type: 'computer:double_click',
376
+ coordinate,
377
+ });
378
+ }
379
+ async function double_click_by_xpath(tabId, xpath) {
380
+ return await chrome.tabs.sendMessage(tabId, {
381
+ type: 'computer:double_click',
382
+ xpath,
383
+ });
384
+ }
385
+ async function screenshot(windowId) {
386
+ let dataUrl = await chrome.tabs.captureVisibleTab(windowId, {
387
+ format: 'jpeg', // jpeg / png
388
+ quality: 50, // 0-100
389
+ });
390
+ let data = dataUrl.substring(dataUrl.indexOf('base64,') + 7);
391
+ return {
392
+ image: {
393
+ type: 'base64',
394
+ media_type: dataUrl.indexOf('image/png') > -1 ? 'image/png' : 'image/jpeg',
395
+ data: data,
396
+ },
397
+ };
398
+ }
399
+ async function scroll_to(tabId, coordinate) {
400
+ let from_coordinate = (await cursor_position(tabId)).coordinate;
401
+ return await chrome.tabs.sendMessage(tabId, {
402
+ type: 'computer:scroll_to',
403
+ from_coordinate,
404
+ to_coordinate: coordinate,
405
+ });
406
+ }
407
+ async function scroll_to_xpath(tabId, xpath) {
408
+ return await chrome.tabs.sendMessage(tabId, {
409
+ type: 'computer:scroll_to',
410
+ xpath,
411
+ });
412
+ }
413
+ async function cursor_position(tabId) {
414
+ let result = await chrome.tabs.sendMessage(tabId, {
415
+ type: 'computer:cursor_position',
416
+ });
417
+ return { coordinate: result.coordinate };
418
+ }
419
+ async function size(tabId) {
420
+ return await getPageSize(tabId);
421
+ }
422
+
423
+ var browser = /*#__PURE__*/Object.freeze({
424
+ __proto__: null,
425
+ clear_input: clear_input,
426
+ clear_input_by_xpath: clear_input_by_xpath,
427
+ cursor_position: cursor_position,
428
+ double_click: double_click,
429
+ double_click_by_xpath: double_click_by_xpath,
430
+ key: key,
431
+ left_click: left_click,
432
+ left_click_by_xpath: left_click_by_xpath,
433
+ left_click_drag: left_click_drag,
434
+ mouse_move: mouse_move,
435
+ right_click: right_click,
436
+ right_click_by_xpath: right_click_by_xpath,
437
+ screenshot: screenshot,
438
+ scroll_to: scroll_to,
439
+ scroll_to_xpath: scroll_to_xpath,
440
+ size: size,
441
+ type: type,
442
+ type_by_xpath: type_by_xpath
443
+ });
444
+
445
+ function exportFile(filename, type, content) {
446
+ const blob = new Blob([content], { type: type });
447
+ const link = document.createElement('a');
448
+ link.href = URL.createObjectURL(blob);
449
+ link.download = filename;
450
+ document.body.appendChild(link);
451
+ link.click();
452
+ document.body.removeChild(link);
453
+ URL.revokeObjectURL(link.href);
454
+ }
455
+ function getDropdownOptions(xpath) {
456
+ const select = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null)
457
+ .singleNodeValue;
458
+ if (!select) {
459
+ return null;
460
+ }
461
+ return {
462
+ options: Array.from(select.options).map((opt) => ({
463
+ index: opt.index,
464
+ text: opt.text.trim(),
465
+ value: opt.value,
466
+ })),
467
+ id: select.id,
468
+ name: select.name,
469
+ };
470
+ }
471
+ function selectDropdownOption(xpath, text) {
472
+ const select = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null)
473
+ .singleNodeValue;
474
+ if (!select || select.tagName.toUpperCase() !== 'SELECT') {
475
+ return { success: false, error: 'Select not found or invalid element type' };
476
+ }
477
+ const option = Array.from(select.options).find((opt) => opt.text.trim() === text);
478
+ if (!option) {
479
+ return {
480
+ success: false,
481
+ error: 'Option not found',
482
+ availableOptions: Array.from(select.options).map((o) => o.text.trim()),
483
+ };
484
+ }
485
+ select.value = option.value;
486
+ select.dispatchEvent(new Event('change'));
487
+ return {
488
+ success: true,
489
+ selectedValue: option.value,
490
+ selectedText: option.text.trim(),
491
+ };
492
+ }
493
+ /**
494
+ * Extract the elements related to html operability and wrap them into pseudo-html code.
495
+ */
496
+ function extractOperableElements() {
497
+ // visible
498
+ const isElementVisible = (element) => {
499
+ const style = window.getComputedStyle(element);
500
+ return (style.display !== 'none' &&
501
+ style.visibility !== 'hidden' &&
502
+ style.opacity !== '0' &&
503
+ element.offsetWidth > 0 &&
504
+ element.offsetHeight > 0);
505
+ };
506
+ // element original index
507
+ const getElementIndex = (element) => {
508
+ const xpath = document.evaluate('preceding::*', element, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
509
+ return xpath.snapshotLength;
510
+ };
511
+ // exclude
512
+ const addExclude = (excludes, children) => {
513
+ for (let i = 0; i < children.length; i++) {
514
+ excludes.push(children[i]);
515
+ if (children[i].children) {
516
+ addExclude(excludes, children[i].children);
517
+ }
518
+ }
519
+ };
520
+ // { pseudoId: element }
521
+ let elementMap = {};
522
+ let nextId = 1;
523
+ let elements = [];
524
+ let excludes = [];
525
+ // operable element
526
+ const operableSelectors = 'a, button, input, textarea, select';
527
+ document.querySelectorAll(operableSelectors).forEach((element) => {
528
+ if (isElementVisible(element) && excludes.indexOf(element) == -1) {
529
+ const id = nextId++;
530
+ elementMap[id.toString()] = element;
531
+ const tagName = element.tagName.toLowerCase();
532
+ const attributes = Array.from(element.attributes)
533
+ .filter((attr) => ['id', 'name', 'type', 'value', 'href', 'title', 'placeholder'].includes(attr.name))
534
+ .map((attr) => `${attr.name == 'id' ? 'target' : attr.name}="${attr.value}"`)
535
+ .join(' ');
536
+ elements.push({
537
+ originalIndex: getElementIndex(element),
538
+ id: id,
539
+ html: `<${tagName} id="${id}" ${attributes}>${tagName == 'select' ? element.innerHTML : element.innerText || ''}</${tagName}>`,
540
+ });
541
+ addExclude(excludes, element.children);
542
+ }
543
+ });
544
+ // short text element
545
+ const textWalker = document.createTreeWalker(document.body, NodeFilter.SHOW_ELEMENT, {
546
+ acceptNode: function (node) {
547
+ var _a;
548
+ if (node.matches(operableSelectors) || excludes.indexOf(node) != -1) {
549
+ // skip
550
+ return NodeFilter.FILTER_SKIP;
551
+ }
552
+ // text <= 100
553
+ const text = (_a = node.innerText) === null || _a === void 0 ? void 0 : _a.trim();
554
+ if (isElementVisible(node) &&
555
+ text &&
556
+ text.length <= 100 &&
557
+ text.length > 0 &&
558
+ node.children.length === 0) {
559
+ return NodeFilter.FILTER_ACCEPT;
560
+ }
561
+ // skip
562
+ return NodeFilter.FILTER_SKIP;
563
+ },
564
+ });
565
+ let currentNode;
566
+ while ((currentNode = textWalker.nextNode())) {
567
+ const id = nextId++;
568
+ elementMap[id.toString()] = currentNode;
569
+ const tagName = currentNode.tagName.toLowerCase();
570
+ elements.push({
571
+ originalIndex: getElementIndex(currentNode),
572
+ id: id,
573
+ html: `<${tagName} id="${id}">${currentNode.innerText.trim()}</${tagName}>`,
574
+ });
575
+ }
576
+ // element sort
577
+ elements.sort((a, b) => a.originalIndex - b.originalIndex);
578
+ // cache
579
+ window.operableElementMap = elementMap;
580
+ // pseudo html
581
+ return elements.map((e) => e.html).join('\n');
582
+ }
583
+ function clickOperableElement(id) {
584
+ let element = window.operableElementMap[id];
585
+ if (!element) {
586
+ return false;
587
+ }
588
+ if (element.click) {
589
+ element.click();
590
+ }
591
+ else {
592
+ element.dispatchEvent(new MouseEvent('click', {
593
+ view: window,
594
+ bubbles: true,
595
+ cancelable: true,
596
+ }));
597
+ }
598
+ return true;
599
+ }
600
+ function getOperableElementRect(id) {
601
+ let element = window.operableElementMap[id];
602
+ if (!element) {
603
+ return null;
604
+ }
605
+ const rect = element.getBoundingClientRect();
606
+ return {
607
+ left: rect.left + window.scrollX,
608
+ top: rect.top + window.scrollY,
609
+ right: rect.right + window.scrollX,
610
+ bottom: rect.bottom + window.scrollY,
611
+ width: rect.right - rect.left,
612
+ height: rect.bottom - rect.top,
613
+ };
614
+ }
615
+
616
+ /**
617
+ * Browser Use for general
618
+ */
619
+ class BrowserUse {
620
+ constructor() {
621
+ this.name = 'browser_use';
622
+ this.description = `Use structured commands to interact with the browser, manipulating page elements through screenshots and webpage element extraction.
623
+ * This is a browser GUI interface where you need to analyze webpages by taking screenshots and extracting page element structures, and specify action sequences to complete designated tasks.
624
+ * Some operations may need time to process, so you might need to wait and continuously take screenshots and extract element structures to check the operation results.
625
+ * Before any operation, you must first call the \`screenshot_extract_element\` command, which will return the browser page screenshot and structured element information, both specially processed.
626
+ * ELEMENT INTERACTION:
627
+ - Only use indexes that exist in the provided element list
628
+ - Each element has a unique index number (e.g., "[33]:<button>")
629
+ - Elements marked with "[]:" are non-interactive (for context only)
630
+ * NAVIGATION & ERROR HANDLING:
631
+ - If no suitable elements exist, use other functions to complete the task
632
+ - If stuck, try alternative approaches
633
+ - Handle popups/cookies by accepting or closing them
634
+ - Use scroll to find elements you are looking for
635
+ * Form filling:
636
+ - If you fill a input field and your action sequence is interrupted, most often a list with suggestions poped up under the field and you need to first select the right element from the suggestion list.
637
+ * ACTION SEQUENCING:
638
+ - Actions are executed in the order they appear in the list
639
+ - Each action should logically follow from the previous one
640
+ - If the page changes after an action, the sequence is interrupted and you get the new state.
641
+ - If content only disappears the sequence continues.
642
+ - Only provide the action sequence until you think the page will change.
643
+ - Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page like saving, extracting, checkboxes...
644
+ - only use multiple actions if it makes sense.`;
645
+ this.input_schema = {
646
+ type: 'object',
647
+ properties: {
648
+ action: {
649
+ type: 'string',
650
+ description: `The action to perform. The available actions are:
651
+ * \`screenshot_extract_element\`: Take a screenshot of the web page and extract operable elements.
652
+ - Screenshots are used to understand page layouts, with labeled bounding boxes corresponding to element indexes. Each bounding box and its label share the same color, with labels typically positioned in the top-right corner of the box.
653
+ - Screenshots help verify element positions and relationships. Labels may sometimes overlap, so extracted elements are used to verify the correct elements.
654
+ - In addition to screenshots, simplified information about interactive elements is returned, with element indexes corresponding to those in the screenshots.
655
+ * \`input_text\`: Enter a string in the interactive element.
656
+ * \`clear_text\`: Clear the text in the input/textarea element.
657
+ * \`click\`: Click to element.
658
+ * \`right_click\`: Right-click on the element.
659
+ * \`double_click\`: Double-click on the element.
660
+ * \`scroll_to\`: Scroll to the specified element.
661
+ * \`extract_content\`: Extract the text content of the current webpage.
662
+ * \`get_dropdown_options\`: Get all options from a native dropdown element.
663
+ * \`select_dropdown_option\`: Select dropdown option for interactive element index by the text of the option you want to select.`,
664
+ enum: [
665
+ 'screenshot_extract_element',
666
+ 'input_text',
667
+ 'clear_text',
668
+ 'click',
669
+ 'right_click',
670
+ 'double_click',
671
+ 'scroll_to',
672
+ 'extract_content',
673
+ 'get_dropdown_options',
674
+ 'select_dropdown_option',
675
+ ],
676
+ },
677
+ index: {
678
+ type: 'integer',
679
+ description: 'index of element, Operation elements must pass the corresponding index of the element',
680
+ },
681
+ text: {
682
+ type: 'string',
683
+ description: 'Required by `action=input_text` and `action=select_dropdown_option`',
684
+ },
685
+ },
686
+ required: ['action'],
687
+ };
688
+ }
689
+ /**
690
+ * browser
691
+ *
692
+ * @param {*} params { action: 'input_text', index: 1, text: 'string' }
693
+ * @returns > { success: true, image?: { type: 'base64', media_type: 'image/jpeg', data: '/9j...' }, text?: string }
694
+ */
695
+ async execute(context, params) {
696
+ var _a;
697
+ try {
698
+ if (params === null || !params.action) {
699
+ throw new Error('Invalid parameters. Expected an object with a "action" property.');
700
+ }
701
+ let tabId = await getTabId(context);
702
+ let windowId = await getWindowId(context);
703
+ let selector_map = context.variables.get('selector_map');
704
+ let selector_xpath;
705
+ if (params.index != null && selector_map) {
706
+ selector_xpath = (_a = selector_map[params.index]) === null || _a === void 0 ? void 0 : _a.xpath;
707
+ if (!selector_xpath) {
708
+ throw new Error('Element does not exist');
709
+ }
710
+ }
711
+ let result;
712
+ switch (params.action) {
713
+ case 'input_text':
714
+ if (params.index == null) {
715
+ throw new Error('index parameter is required');
716
+ }
717
+ if (params.text == null) {
718
+ throw new Error('text parameter is required');
719
+ }
720
+ result = await type_by_xpath(tabId, params.text, selector_xpath);
721
+ await sleep(200);
722
+ break;
723
+ case 'clear_text':
724
+ if (params.index == null) {
725
+ throw new Error('index parameter is required');
726
+ }
727
+ result = await clear_input_by_xpath(tabId, selector_xpath);
728
+ await sleep(100);
729
+ break;
730
+ case 'click':
731
+ if (params.index == null) {
732
+ throw new Error('index parameter is required');
733
+ }
734
+ result = await left_click_by_xpath(tabId, selector_xpath);
735
+ await sleep(100);
736
+ break;
737
+ case 'right_click':
738
+ if (params.index == null) {
739
+ throw new Error('index parameter is required');
740
+ }
741
+ result = await right_click_by_xpath(tabId, selector_xpath);
742
+ await sleep(100);
743
+ break;
744
+ case 'double_click':
745
+ if (params.index == null) {
746
+ throw new Error('index parameter is required');
747
+ }
748
+ result = await double_click_by_xpath(tabId, selector_xpath);
749
+ await sleep(100);
750
+ break;
751
+ case 'scroll_to':
752
+ if (params.index == null) {
753
+ throw new Error('index parameter is required');
754
+ }
755
+ result = await scroll_to_xpath(tabId, selector_xpath);
756
+ await sleep(500);
757
+ break;
758
+ case 'extract_content':
759
+ let tab = await chrome.tabs.get(tabId);
760
+ await injectScript(tabId);
761
+ await sleep(200);
762
+ let content = await executeScript(tabId, () => {
763
+ return eko.extractHtmlContent();
764
+ }, []);
765
+ result = {
766
+ title: tab.title,
767
+ url: tab.url,
768
+ content: content,
769
+ };
770
+ break;
771
+ case 'get_dropdown_options':
772
+ if (params.index == null) {
773
+ throw new Error('index parameter is required');
774
+ }
775
+ result = await executeScript(tabId, getDropdownOptions, [selector_xpath]);
776
+ break;
777
+ case 'select_dropdown_option':
778
+ if (params.index == null) {
779
+ throw new Error('index parameter is required');
780
+ }
781
+ if (params.text == null) {
782
+ throw new Error('text parameter is required');
783
+ }
784
+ result = await executeScript(tabId, selectDropdownOption, [selector_xpath, params.text]);
785
+ break;
786
+ case 'screenshot_extract_element':
787
+ await sleep(100);
788
+ await injectScript(tabId, 'build_dom_tree.js');
789
+ await sleep(100);
790
+ let element_result = await executeScript(tabId, () => {
791
+ return window.get_clickable_elements(true);
792
+ }, []);
793
+ context.variables.set('selector_map', element_result.selector_map);
794
+ let screenshot$1 = await screenshot(windowId);
795
+ await executeScript(tabId, () => {
796
+ return window.remove_highlight();
797
+ }, []);
798
+ result = { image: screenshot$1.image, text: element_result.element_str };
799
+ break;
800
+ default:
801
+ throw Error(`Invalid parameters. The "${params.action}" value is not included in the "action" enumeration.`);
802
+ }
803
+ if (result) {
804
+ return { success: true, ...result };
805
+ }
806
+ else {
807
+ return { success: false };
808
+ }
809
+ }
810
+ catch (e) {
811
+ return { success: false, error: e === null || e === void 0 ? void 0 : e.message };
812
+ }
813
+ }
814
+ }
815
+
816
+ /**
817
+ * Element click
818
+ */
819
+ class ElementClick {
820
+ constructor() {
821
+ this.name = 'element_click';
822
+ this.description = 'Click the element through task prompts';
823
+ this.input_schema = {
824
+ type: 'object',
825
+ properties: {
826
+ task_prompt: {
827
+ type: 'string',
828
+ description: 'Task prompt, eg: click search button',
829
+ },
830
+ },
831
+ required: ['task_prompt'],
832
+ };
833
+ }
834
+ async execute(context, params) {
835
+ if (typeof params !== 'object' || params === null || !params.task_prompt) {
836
+ throw new Error('Invalid parameters. Expected an object with a "task_prompt" property.');
837
+ }
838
+ let result;
839
+ let task_prompt = params.task_prompt;
840
+ try {
841
+ result = await executeWithHtmlElement$1(context, task_prompt);
842
+ }
843
+ catch (e) {
844
+ console.log(e);
845
+ result = false;
846
+ }
847
+ if (!result) {
848
+ result = await executeWithBrowserUse$1(context, task_prompt);
849
+ }
850
+ return result;
851
+ }
852
+ }
853
+ async function executeWithHtmlElement$1(context, task_prompt) {
854
+ let tabId = await getTabId(context);
855
+ let pseudoHtml = await executeScript(tabId, extractOperableElements, []);
856
+ let messages = [
857
+ {
858
+ role: 'user',
859
+ content: `# Task
860
+ Determine the operation intent based on user input, find the element ID that the user needs to operate on in the webpage HTML, and if the element does not exist, do nothing.
861
+ Output JSON format, no explanation required.
862
+
863
+ # User input
864
+ ${task_prompt}
865
+
866
+ # Output example (when the element exists)
867
+ {"elementId": "1", "operationType": "click"}
868
+
869
+ # Output example (when the element does not exist)
870
+ {"elementId": null, "operationType": "unknown"}
871
+
872
+ # HTML
873
+ ${pseudoHtml}
874
+ `,
875
+ },
876
+ ];
877
+ let llm_params = { maxTokens: 1024 };
878
+ let response = await context.llmProvider.generateText(messages, llm_params);
879
+ let content = typeof response.content == 'string' ? response.content : response.content[0].text;
880
+ let json = content.substring(content.indexOf('{'), content.indexOf('}') + 1);
881
+ let elementId = JSON.parse(json).elementId;
882
+ if (elementId) {
883
+ return await executeScript(tabId, clickOperableElement, [elementId]);
884
+ }
885
+ return false;
886
+ }
887
+ async function executeWithBrowserUse$1(context, task_prompt) {
888
+ let tabId = await getTabId(context);
889
+ let windowId = await getWindowId(context);
890
+ let screenshot_result = await screenshot(windowId);
891
+ let messages = [
892
+ {
893
+ role: 'user',
894
+ content: [
895
+ {
896
+ type: 'image',
897
+ source: screenshot_result.image,
898
+ },
899
+ {
900
+ type: 'text',
901
+ text: 'click: ' + task_prompt,
902
+ },
903
+ ],
904
+ },
905
+ ];
906
+ let llm_params = {
907
+ maxTokens: 1024,
908
+ toolChoice: {
909
+ type: 'tool',
910
+ name: 'left_click',
911
+ },
912
+ tools: [
913
+ {
914
+ name: 'left_click',
915
+ description: 'click element',
916
+ input_schema: {
917
+ type: 'object',
918
+ properties: {
919
+ coordinate: {
920
+ type: 'array',
921
+ description: '(x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates.',
922
+ },
923
+ },
924
+ required: ['coordinate'],
925
+ },
926
+ },
927
+ ],
928
+ };
929
+ let response = await context.llmProvider.generateText(messages, llm_params);
930
+ let input = response.toolCalls[0].input;
931
+ let coordinate = input.coordinate;
932
+ let click_result = await left_click(tabId, coordinate);
933
+ return click_result;
934
+ }
935
+
936
+ /**
937
+ * Export file
938
+ */
939
+ class ExportFile {
940
+ constructor() {
941
+ this.name = 'export_file';
942
+ this.description = 'Content exported as a file, support text format';
943
+ this.input_schema = {
944
+ type: 'object',
945
+ properties: {
946
+ fileType: {
947
+ type: 'string',
948
+ description: 'File format type',
949
+ enum: ['txt', 'csv', 'md', 'html', 'js', 'xml', 'json', 'yml', 'sql'],
950
+ },
951
+ content: {
952
+ type: 'string',
953
+ description: 'Export file content',
954
+ },
955
+ filename: {
956
+ type: 'string',
957
+ description: 'File name',
958
+ },
959
+ },
960
+ required: ['fileType', 'content'],
961
+ };
962
+ }
963
+ /**
964
+ * export
965
+ *
966
+ * @param {*} params { fileType: 'csv', content: 'field1,field2\ndata1,data2' }
967
+ * @returns > { success: true }
968
+ */
969
+ async execute(context, params) {
970
+ if (typeof params !== 'object' || params === null || !('content' in params)) {
971
+ throw new Error('Invalid parameters. Expected an object with a "content" property.');
972
+ }
973
+ let type = 'text/plain';
974
+ switch (params.fileType) {
975
+ case 'csv':
976
+ type = 'text/csv';
977
+ break;
978
+ case 'md':
979
+ type = 'text/markdown';
980
+ break;
981
+ case 'html':
982
+ type = 'text/html';
983
+ break;
984
+ case 'js':
985
+ type = 'application/javascript';
986
+ break;
987
+ case 'xml':
988
+ type = 'text/xml';
989
+ break;
990
+ case 'json':
991
+ type = 'application/json';
992
+ break;
993
+ }
994
+ let filename;
995
+ if (!params.filename) {
996
+ filename = new Date().getTime() + '.' + params.fileType;
997
+ }
998
+ else if (!(params.filename + '').endsWith(params.fileType)) {
999
+ filename = params.filename + '.' + params.fileType;
1000
+ }
1001
+ else {
1002
+ filename = params.filename;
1003
+ }
1004
+ let tabId = await getTabId(context);
1005
+ try {
1006
+ await chrome.scripting.executeScript({
1007
+ target: { tabId: tabId },
1008
+ func: exportFile,
1009
+ args: [filename, type, params.content],
1010
+ });
1011
+ }
1012
+ catch (e) {
1013
+ let tab = await open_new_tab('https://www.google.com', true);
1014
+ tabId = tab.id;
1015
+ await chrome.scripting.executeScript({
1016
+ target: { tabId: tabId },
1017
+ func: exportFile,
1018
+ args: [filename, type, params.content],
1019
+ });
1020
+ await sleep(1000);
1021
+ await chrome.tabs.remove(tabId);
1022
+ }
1023
+ return { success: true };
1024
+ }
1025
+ }
1026
+
1027
+ /**
1028
+ * Extract Page Content
1029
+ */
1030
+ class ExtractContent {
1031
+ constructor() {
1032
+ this.name = 'extract_content';
1033
+ this.description = 'Extract the text content of the current webpage';
1034
+ this.input_schema = {
1035
+ type: 'object',
1036
+ properties: {},
1037
+ };
1038
+ }
1039
+ /**
1040
+ * Extract Page Content
1041
+ *
1042
+ * @param {*} params {}
1043
+ * @returns > { tabId, result: { title, url, content }, success: true }
1044
+ */
1045
+ async execute(context, params) {
1046
+ let tabId = await getTabId(context);
1047
+ let tab = await chrome.tabs.get(tabId);
1048
+ await injectScript(tabId);
1049
+ await sleep(500);
1050
+ let content = await executeScript(tabId, () => {
1051
+ return eko.extractHtmlContent();
1052
+ }, []);
1053
+ return {
1054
+ tabId,
1055
+ result: {
1056
+ title: tab.title,
1057
+ url: tab.url,
1058
+ content: content,
1059
+ }
1060
+ };
1061
+ }
1062
+ }
1063
+
1064
+ /**
1065
+ * Find Element Position
1066
+ */
1067
+ class FindElementPosition {
1068
+ constructor() {
1069
+ this.name = 'find_element_position';
1070
+ this.description = 'Locate Element Coordinates through Task Prompts';
1071
+ this.input_schema = {
1072
+ type: 'object',
1073
+ properties: {
1074
+ task_prompt: {
1075
+ type: 'string',
1076
+ description: 'Task prompt, eg: find the search input box',
1077
+ },
1078
+ },
1079
+ required: ['task_prompt'],
1080
+ };
1081
+ }
1082
+ async execute(context, params) {
1083
+ if (typeof params !== 'object' || params === null || !params.task_prompt) {
1084
+ throw new Error('Invalid parameters. Expected an object with a "task_prompt" property.');
1085
+ }
1086
+ let result;
1087
+ let task_prompt = params.task_prompt;
1088
+ try {
1089
+ result = await executeWithHtmlElement(context, task_prompt);
1090
+ }
1091
+ catch (e) {
1092
+ console.log(e);
1093
+ result = null;
1094
+ }
1095
+ if (!result) {
1096
+ result = await executeWithBrowserUse(context, task_prompt);
1097
+ }
1098
+ return result;
1099
+ }
1100
+ }
1101
+ async function executeWithHtmlElement(context, task_prompt) {
1102
+ let tabId = await getTabId(context);
1103
+ let pseudoHtml = await executeScript(tabId, extractOperableElements, []);
1104
+ let messages = [
1105
+ {
1106
+ role: 'user',
1107
+ content: `# Task
1108
+ Find the element ID that the user needs to operate on in the webpage HTML, and if the element does not exist, do nothing.
1109
+ Output JSON format, no explanation required.
1110
+
1111
+ # User input
1112
+ ${task_prompt}
1113
+
1114
+ # Output example (when the element exists)
1115
+ {"elementId": "1"}
1116
+
1117
+ # Output example (when the element does not exist)
1118
+ {"elementId": null}
1119
+
1120
+ # HTML
1121
+ ${pseudoHtml}
1122
+ `,
1123
+ },
1124
+ ];
1125
+ let llm_params = { maxTokens: 1024 };
1126
+ let response = await context.llmProvider.generateText(messages, llm_params);
1127
+ let content = typeof response.content == 'string' ? response.content : response.content[0].text;
1128
+ let json = content.substring(content.indexOf('{'), content.indexOf('}') + 1);
1129
+ let elementId = JSON.parse(json).elementId;
1130
+ if (elementId) {
1131
+ return await executeScript(tabId, getOperableElementRect, [elementId]);
1132
+ }
1133
+ return null;
1134
+ }
1135
+ async function executeWithBrowserUse(context, task_prompt) {
1136
+ await getTabId(context);
1137
+ let windowId = await getWindowId(context);
1138
+ let screenshot_result = await screenshot(windowId);
1139
+ let messages = [
1140
+ {
1141
+ role: 'user',
1142
+ content: [
1143
+ {
1144
+ type: 'image',
1145
+ source: screenshot_result.image,
1146
+ },
1147
+ {
1148
+ type: 'text',
1149
+ text: 'Find the element: ' + task_prompt,
1150
+ },
1151
+ ],
1152
+ },
1153
+ ];
1154
+ let llm_params = {
1155
+ maxTokens: 1024,
1156
+ toolChoice: {
1157
+ type: 'tool',
1158
+ name: 'get_element_by_coordinate',
1159
+ },
1160
+ tools: [
1161
+ {
1162
+ name: 'get_element_by_coordinate',
1163
+ description: 'Retrieve element information based on coordinate',
1164
+ input_schema: {
1165
+ type: 'object',
1166
+ properties: {
1167
+ coordinate: {
1168
+ type: 'array',
1169
+ description: '(x, y): The x (pixels from the left edge) and y (pixels from the top edge) coordinates.',
1170
+ },
1171
+ },
1172
+ required: ['coordinate'],
1173
+ },
1174
+ },
1175
+ ],
1176
+ };
1177
+ let response = await context.llmProvider.generateText(messages, llm_params);
1178
+ let input = response.toolCalls[0].input;
1179
+ let coordinate = input.coordinate;
1180
+ return {
1181
+ left: coordinate[0],
1182
+ top: coordinate[1],
1183
+ };
1184
+ }
1185
+
1186
+ /**
1187
+ * Form Autofill
1188
+ */
1189
+ class FormAutofill {
1190
+ constructor() {
1191
+ this.name = 'form_autofill';
1192
+ this.description = 'Automatically fill in form data on web pages';
1193
+ this.input_schema = {
1194
+ type: 'object',
1195
+ properties: {}
1196
+ };
1197
+ }
1198
+ async execute(context, params) {
1199
+ // form -> input, textarea, select ...
1200
+ throw new Error('Not implemented');
1201
+ }
1202
+ }
1203
+
1204
+ /**
1205
+ * Open Url
1206
+ */
1207
+ class OpenUrl {
1208
+ constructor() {
1209
+ this.name = 'open_url';
1210
+ this.description = 'Open the specified URL link in browser window';
1211
+ this.input_schema = {
1212
+ type: 'object',
1213
+ properties: {
1214
+ url: {
1215
+ type: 'string',
1216
+ description: 'URL link address',
1217
+ },
1218
+ newWindow: {
1219
+ type: 'boolean',
1220
+ description: 'true: Open in a new window; false: Open in the current window.',
1221
+ },
1222
+ },
1223
+ required: ['url'],
1224
+ };
1225
+ }
1226
+ /**
1227
+ * Open Url
1228
+ *
1229
+ * @param {*} params { url: 'https://www.google.com', newWindow: true }
1230
+ * @returns > { tabId, windowId, title, success: true }
1231
+ */
1232
+ async execute(context, params) {
1233
+ if (typeof params !== 'object' || params === null || !params.url) {
1234
+ throw new Error('Invalid parameters. Expected an object with a "url" property.');
1235
+ }
1236
+ let url = params.url;
1237
+ let newWindow = params.newWindow;
1238
+ if (!newWindow && !context.variables.get('windowId') && !context.variables.get('tabId')) {
1239
+ // First mandatory opening of a new window
1240
+ newWindow = true;
1241
+ }
1242
+ let tab;
1243
+ if (newWindow) {
1244
+ tab = await open_new_tab(url, true);
1245
+ }
1246
+ else {
1247
+ let windowId = await getWindowId(context);
1248
+ tab = await open_new_tab(url, false, windowId);
1249
+ }
1250
+ let windowId = tab.windowId;
1251
+ let tabId = tab.id;
1252
+ context.variables.set('windowId', windowId);
1253
+ context.variables.set('tabId', tabId);
1254
+ if (newWindow) {
1255
+ let windowIds = context.variables.get('windowIds');
1256
+ if (windowIds) {
1257
+ windowIds.push(windowId);
1258
+ }
1259
+ else {
1260
+ context.variables.set('windowIds', [windowId]);
1261
+ }
1262
+ }
1263
+ return {
1264
+ tabId,
1265
+ windowId,
1266
+ title: tab.title,
1267
+ };
1268
+ }
1269
+ }
1270
+
1271
+ /**
1272
+ * Current Page Screenshot
1273
+ */
1274
+ class Screenshot {
1275
+ constructor() {
1276
+ this.name = 'screenshot';
1277
+ this.description = 'Screenshot the current webpage window';
1278
+ this.input_schema = {
1279
+ type: 'object',
1280
+ properties: {},
1281
+ };
1282
+ }
1283
+ /**
1284
+ * Current Page Screenshot
1285
+ *
1286
+ * @param {*} params {}
1287
+ * @returns > { image: { type: 'base64', media_type: 'image/png', data } }
1288
+ */
1289
+ async execute(context, params) {
1290
+ let windowId = await getWindowId(context);
1291
+ return await screenshot(windowId);
1292
+ }
1293
+ }
1294
+
1295
+ /**
1296
+ * Browser tab management
1297
+ */
1298
+ class TabManagement {
1299
+ constructor() {
1300
+ this.name = 'tab_management';
1301
+ this.description = 'Browser tab management, view and operate tabs';
1302
+ this.input_schema = {
1303
+ type: 'object',
1304
+ properties: {
1305
+ commond: {
1306
+ type: 'string',
1307
+ description: `The commond to perform. The available commonds are:
1308
+ * \`tab_all\`: View all tabs and return the tabId and title.
1309
+ * \`current_tab\`: Get current tab information (tabId, url, title).
1310
+ * \`go_back\`: Go back to the previous page in the current tab.
1311
+ * \`change_url [url]\`: open URL in the current tab, eg: \`change_url https://www.google.com\`.
1312
+ * \`close_tab\`: Close the current tab.
1313
+ * \`switch_tab [tabId]\`: Switch to the specified tab using tabId, eg: \`switch_tab 1000\`.
1314
+ * \`new_tab [url]\`: Open a new tab window and open the URL, eg: \`new_tab https://www.google.com\``,
1315
+ },
1316
+ },
1317
+ required: ['commond'],
1318
+ };
1319
+ }
1320
+ /**
1321
+ * Tab management
1322
+ *
1323
+ * @param {*} params { commond: `new_tab [url]` | 'tab_all' | 'current_tab' | 'go_back' | 'close_tab' | 'switch_tab [tabId]' | `change_url [url]` }
1324
+ * @returns > { result, success: true }
1325
+ */
1326
+ async execute(context, params) {
1327
+ if (params === null || !params.commond) {
1328
+ throw new Error('Invalid parameters. Expected an object with a "commond" property.');
1329
+ }
1330
+ let windowId = await getWindowId(context);
1331
+ let commond = params.commond.trim();
1332
+ if (commond.startsWith('`')) {
1333
+ commond = commond.substring(1);
1334
+ }
1335
+ if (commond.endsWith('`')) {
1336
+ commond = commond.substring(0, commond.length - 1);
1337
+ }
1338
+ let result;
1339
+ if (commond == 'tab_all') {
1340
+ result = [];
1341
+ let tabs = await chrome.tabs.query({ windowId: windowId });
1342
+ for (let i = 0; i < tabs.length; i++) {
1343
+ let tab = tabs[i];
1344
+ let tabInfo = {
1345
+ tabId: tab.id,
1346
+ windowId: tab.windowId,
1347
+ title: tab.title,
1348
+ url: tab.url,
1349
+ };
1350
+ if (tab.active) {
1351
+ tabInfo.active = true;
1352
+ }
1353
+ result.push(tabInfo);
1354
+ }
1355
+ }
1356
+ else if (commond == 'current_tab') {
1357
+ let tabId = await getTabId(context);
1358
+ let tab = await chrome.tabs.get(tabId);
1359
+ let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1360
+ result = tabInfo;
1361
+ }
1362
+ else if (commond == 'go_back') {
1363
+ let tabId = await getTabId(context);
1364
+ await chrome.tabs.goBack(tabId);
1365
+ let tab = await chrome.tabs.get(tabId);
1366
+ let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1367
+ result = tabInfo;
1368
+ }
1369
+ else if (commond == 'close_tab') {
1370
+ let closedTabId = await getTabId(context);
1371
+ await chrome.tabs.remove(closedTabId);
1372
+ await sleep(100);
1373
+ let tabs = await chrome.tabs.query({ active: true, currentWindow: true });
1374
+ if (tabs.length == 0) {
1375
+ tabs = await chrome.tabs.query({ status: 'complete', currentWindow: true });
1376
+ }
1377
+ let tab = tabs[tabs.length - 1];
1378
+ if (!tab.active) {
1379
+ await chrome.tabs.update(tab.id, { active: true });
1380
+ }
1381
+ let newTabId = tab.id;
1382
+ context.variables.set('tabId', tab.id);
1383
+ context.variables.set('windowId', tab.windowId);
1384
+ let closeTabInfo = { closedTabId, newTabId, newTabTitle: tab.title };
1385
+ result = closeTabInfo;
1386
+ }
1387
+ else if (commond.startsWith('switch_tab')) {
1388
+ let tabId = parseInt(commond.replace('switch_tab', '').replace('[', '').replace(']', ''));
1389
+ let tab = await chrome.tabs.update(tabId, { active: true });
1390
+ context.variables.set('tabId', tab.id);
1391
+ context.variables.set('windowId', tab.windowId);
1392
+ let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1393
+ result = tabInfo;
1394
+ }
1395
+ else if (commond.startsWith('change_url')) {
1396
+ let url = commond.substring('change_url'.length).replace('[', '').replace(']', '').trim();
1397
+ let tabId = await getTabId(context);
1398
+ // await chrome.tabs.update(tabId, { url: url });
1399
+ await executeScript(tabId, () => {
1400
+ location.href = url;
1401
+ }, []);
1402
+ let tab = await waitForTabComplete(tabId);
1403
+ let tabInfo = { tabId, windowId: tab.windowId, title: tab.title, url: tab.url };
1404
+ result = tabInfo;
1405
+ }
1406
+ else if (commond.startsWith('new_tab')) {
1407
+ let url = commond.replace('new_tab', '').replace('[', '').replace(']', '').replace(/"/g, '');
1408
+ // First mandatory opening of a new window
1409
+ let newWindow = !context.variables.get('windowId') && !context.variables.get('tabId');
1410
+ let tab;
1411
+ if (newWindow) {
1412
+ tab = await open_new_tab(url, true);
1413
+ }
1414
+ else {
1415
+ let windowId = await getWindowId(context);
1416
+ tab = await open_new_tab(url, false, windowId);
1417
+ }
1418
+ let windowId = tab.windowId;
1419
+ let tabId = tab.id;
1420
+ context.variables.set('windowId', windowId);
1421
+ context.variables.set('tabId', tabId);
1422
+ if (newWindow) {
1423
+ let windowIds = context.variables.get('windowIds');
1424
+ if (windowIds) {
1425
+ windowIds.push(windowId);
1426
+ }
1427
+ else {
1428
+ context.variables.set('windowIds', [windowId]);
1429
+ }
1430
+ }
1431
+ let tabInfo = {
1432
+ tabId: tab.id,
1433
+ windowId: tab.windowId,
1434
+ title: tab.title,
1435
+ url: tab.url,
1436
+ };
1437
+ result = tabInfo;
1438
+ }
1439
+ else {
1440
+ throw Error('Unknown commond: ' + commond);
1441
+ }
1442
+ return result;
1443
+ }
1444
+ destroy(context) {
1445
+ let windowIds = context.variables.get('windowIds');
1446
+ if (windowIds) {
1447
+ for (let i = 0; i < windowIds.length; i++) {
1448
+ chrome.windows.remove(windowIds[i]);
1449
+ }
1450
+ }
1451
+ }
1452
+ }
1453
+
1454
+ /**
1455
+ * Web Search
1456
+ */
1457
+ class WebSearch {
1458
+ constructor() {
1459
+ this.name = 'web_search';
1460
+ this.description = 'Use web search to return search results';
1461
+ this.input_schema = {
1462
+ type: 'object',
1463
+ properties: {
1464
+ query: {
1465
+ type: 'string',
1466
+ description: 'search for keywords',
1467
+ },
1468
+ maxResults: {
1469
+ type: 'integer',
1470
+ description: 'Maximum search results, default 5',
1471
+ },
1472
+ },
1473
+ required: ['query'],
1474
+ };
1475
+ }
1476
+ /**
1477
+ * search
1478
+ *
1479
+ * @param {*} params { url: 'https://www.google.com', query: 'ai agent', maxResults: 5 }
1480
+ * @returns > [{ title, url, content }]
1481
+ */
1482
+ async execute(context, params) {
1483
+ var _a;
1484
+ if (typeof params !== 'object' || params === null || !params.query) {
1485
+ throw new Error('Invalid parameters. Expected an object with a "query" property.');
1486
+ }
1487
+ let url = params.url;
1488
+ let query = params.query;
1489
+ let maxResults = params.maxResults;
1490
+ if (!url) {
1491
+ url = 'https://www.google.com';
1492
+ }
1493
+ let taskId = new Date().getTime() + '';
1494
+ let searchs = [{ url: url, keyword: query }];
1495
+ let searchInfo = await deepSearch(taskId, searchs, maxResults || 5);
1496
+ let links = ((_a = searchInfo.result[0]) === null || _a === void 0 ? void 0 : _a.links) || [];
1497
+ return links.filter((s) => s.content);
1498
+ }
1499
+ }
1500
+ const deepSearchInjects = {
1501
+ 'bing.com': {
1502
+ filename: 'bing.js',
1503
+ buildSearchUrl: function (url, keyword) {
1504
+ return 'https://bing.com/search?q=' + encodeURI(keyword);
1505
+ },
1506
+ },
1507
+ 'duckduckgo.com': {
1508
+ filename: 'duckduckgo.js',
1509
+ buildSearchUrl: function (url, keyword) {
1510
+ return 'https://duckduckgo.com/?q=' + encodeURI(keyword);
1511
+ },
1512
+ },
1513
+ 'google.com': {
1514
+ filename: 'google.js',
1515
+ buildSearchUrl: function (url, keyword) {
1516
+ return 'https://www.google.com/search?q=' + encodeURI(keyword);
1517
+ },
1518
+ },
1519
+ default: {
1520
+ filename: 'google.js',
1521
+ buildSearchUrl: function (url, keyword) {
1522
+ url = url.trim();
1523
+ let idx = url.indexOf('//');
1524
+ if (idx > -1) {
1525
+ url = url.substring(idx + 2);
1526
+ }
1527
+ idx = url.indexOf('/', 2);
1528
+ if (idx > -1) {
1529
+ url = url.substring(0, idx);
1530
+ }
1531
+ keyword = 'site:' + url + ' ' + keyword;
1532
+ return 'https://www.google.com/search?q=' + encodeURIComponent(keyword);
1533
+ },
1534
+ },
1535
+ };
1536
+ function buildDeepSearchUrl(url, keyword) {
1537
+ let idx = url.indexOf('/', url.indexOf('//') + 2);
1538
+ let baseUrl = idx > -1 ? url.substring(0, idx) : url;
1539
+ let domains = Object.keys(deepSearchInjects);
1540
+ let inject = null;
1541
+ for (let j = 0; j < domains.length; j++) {
1542
+ let domain = domains[j];
1543
+ if (baseUrl == domain || baseUrl.endsWith('.' + domain) || baseUrl.endsWith('/' + domain)) {
1544
+ inject = deepSearchInjects[domain];
1545
+ break;
1546
+ }
1547
+ }
1548
+ if (!inject) {
1549
+ inject = deepSearchInjects['default'];
1550
+ }
1551
+ return {
1552
+ filename: inject.filename,
1553
+ url: inject.buildSearchUrl(url, keyword),
1554
+ };
1555
+ }
1556
+ // Event
1557
+ const tabsUpdateEvent = new MsgEvent();
1558
+ chrome.tabs.onUpdated.addListener(async function (tabId, changeInfo, tab) {
1559
+ await tabsUpdateEvent.publish({ tabId, changeInfo, tab });
1560
+ });
1561
+ /**
1562
+ * deep search
1563
+ *
1564
+ * @param {string} taskId task id
1565
+ * @param {array} searchs search list => [{ url: 'https://bing.com', keyword: 'ai' }]
1566
+ * @param {number} detailsMaxNum Maximum crawling quantity per search detail page
1567
+ */
1568
+ async function deepSearch(taskId, searchs, detailsMaxNum, window) {
1569
+ let closeWindow = false;
1570
+ if (!window) {
1571
+ // open new window
1572
+ window = await chrome.windows.create({
1573
+ type: 'normal',
1574
+ state: 'maximized',
1575
+ url: null,
1576
+ });
1577
+ closeWindow = true;
1578
+ }
1579
+ // crawler the search page details page link
1580
+ // [{ links: [{ title, url }] }]
1581
+ let detailLinkGroups = await doDetailLinkGroups(taskId, searchs, detailsMaxNum, window);
1582
+ // crawler all details page content and comments
1583
+ let searchInfo = await doPageContent(taskId, detailLinkGroups, window);
1584
+ console.log('searchInfo: ', searchInfo);
1585
+ // close window
1586
+ closeWindow && chrome.windows.remove(window.id);
1587
+ return searchInfo;
1588
+ }
1589
+ /**
1590
+ * crawler the search page details page link
1591
+ *
1592
+ * @param {string} taskId task id
1593
+ * @param {array} searchs search list => [{ url: 'https://bing.com', keyword: 'ai' }]
1594
+ * @param {number} detailsMaxNum Maximum crawling quantity per search detail page
1595
+ * @param {*} window
1596
+ * @returns [{ links: [{ title, url }] }]
1597
+ */
1598
+ async function doDetailLinkGroups(taskId, searchs, detailsMaxNum, window) {
1599
+ let detailLinkGroups = [];
1600
+ let countDownLatch = new CountDownLatch(searchs.length);
1601
+ for (let i = 0; i < searchs.length; i++) {
1602
+ try {
1603
+ // script name & build search URL
1604
+ const { filename, url } = buildDeepSearchUrl(searchs[i].url, searchs[i].keyword);
1605
+ // open new Tab
1606
+ let tab = await chrome.tabs.create({
1607
+ url: url,
1608
+ windowId: window.id,
1609
+ });
1610
+ let eventId = taskId + '_' + i;
1611
+ // monitor Tab status
1612
+ tabsUpdateEvent.addListener(async function (obj) {
1613
+ if (obj.tabId != tab.id) {
1614
+ return;
1615
+ }
1616
+ if (obj.changeInfo.status === 'complete') {
1617
+ tabsUpdateEvent.removeListener(eventId);
1618
+ // inject js
1619
+ await injectScript(tab.id, filename);
1620
+ await sleep(1000);
1621
+ // crawler the search page details page
1622
+ // { links: [{ title, url }] }
1623
+ let detailLinks = await chrome.tabs.sendMessage(tab.id, {
1624
+ type: 'page:getDetailLinks',
1625
+ keyword: searchs[i].keyword,
1626
+ });
1627
+ if (!detailLinks || !detailLinks.links) {
1628
+ // TODO error
1629
+ detailLinks = { links: [] };
1630
+ }
1631
+ console.log('detailLinks: ', detailLinks);
1632
+ let links = detailLinks.links.slice(0, detailsMaxNum);
1633
+ detailLinkGroups.push({ url, links, filename });
1634
+ countDownLatch.countDown();
1635
+ chrome.tabs.remove(tab.id);
1636
+ }
1637
+ else if (obj.changeInfo.status === 'unloaded') {
1638
+ countDownLatch.countDown();
1639
+ chrome.tabs.remove(tab.id);
1640
+ tabsUpdateEvent.removeListener(eventId);
1641
+ }
1642
+ }, eventId);
1643
+ }
1644
+ catch (e) {
1645
+ console.error(e);
1646
+ countDownLatch.countDown();
1647
+ }
1648
+ }
1649
+ await countDownLatch.await(30000);
1650
+ return detailLinkGroups;
1651
+ }
1652
+ /**
1653
+ * page content
1654
+ *
1655
+ * @param {string} taskId task id
1656
+ * @param {array} detailLinkGroups details page group
1657
+ * @param {*} window
1658
+ * @returns search info
1659
+ */
1660
+ async function doPageContent(taskId, detailLinkGroups, window) {
1661
+ const searchInfo = {
1662
+ total: 0,
1663
+ running: 0,
1664
+ succeed: 0,
1665
+ failed: 0,
1666
+ failedLinks: [],
1667
+ result: detailLinkGroups,
1668
+ };
1669
+ for (let i = 0; i < detailLinkGroups.length; i++) {
1670
+ let links = detailLinkGroups[i].links;
1671
+ searchInfo.total += links.length;
1672
+ }
1673
+ let countDownLatch = new CountDownLatch(searchInfo.total);
1674
+ for (let i = 0; i < detailLinkGroups.length; i++) {
1675
+ let filename = detailLinkGroups[i].filename;
1676
+ let links = detailLinkGroups[i].links;
1677
+ for (let j = 0; j < links.length; j++) {
1678
+ let link = links[j];
1679
+ // open new tab
1680
+ let tab = await chrome.tabs.create({
1681
+ url: link.url,
1682
+ windowId: window.id,
1683
+ });
1684
+ searchInfo.running++;
1685
+ let eventId = taskId + '_' + i + '_' + j;
1686
+ // monitor Tab status
1687
+ tabsUpdateEvent.addListener(async function (obj) {
1688
+ if (obj.tabId != tab.id) {
1689
+ return;
1690
+ }
1691
+ if (obj.changeInfo.status === 'complete') {
1692
+ try {
1693
+ tabsUpdateEvent.removeListener(eventId);
1694
+ // inject js
1695
+ await injectScript(tab.id, filename);
1696
+ await sleep(1000);
1697
+ // cralwer content and comments
1698
+ // { title, content }
1699
+ let result = await chrome.tabs.sendMessage(tab.id, {
1700
+ type: 'page:getContent',
1701
+ });
1702
+ if (!result) {
1703
+ throw Error('No Result');
1704
+ }
1705
+ link.content = result.content;
1706
+ link.page_title = result.title;
1707
+ searchInfo.succeed++;
1708
+ }
1709
+ catch (e) {
1710
+ searchInfo.failed++;
1711
+ searchInfo.failedLinks.push(link);
1712
+ console.error(link.title + ' crawler error', link.url, e);
1713
+ }
1714
+ finally {
1715
+ searchInfo.running--;
1716
+ countDownLatch.countDown();
1717
+ chrome.tabs.remove(tab.id);
1718
+ tabsUpdateEvent.removeListener(eventId);
1719
+ }
1720
+ }
1721
+ else if (obj.changeInfo.status === 'unloaded') {
1722
+ searchInfo.running--;
1723
+ countDownLatch.countDown();
1724
+ chrome.tabs.remove(tab.id);
1725
+ tabsUpdateEvent.removeListener(eventId);
1726
+ }
1727
+ }, eventId);
1728
+ }
1729
+ }
1730
+ await countDownLatch.await(60000);
1731
+ return searchInfo;
1732
+ }
1733
+
1734
+ var tools = /*#__PURE__*/Object.freeze({
1735
+ __proto__: null,
1736
+ BrowserUse: BrowserUse,
1737
+ ElementClick: ElementClick,
1738
+ ExportFile: ExportFile,
1739
+ ExtractContent: ExtractContent,
1740
+ FindElementPosition: FindElementPosition,
1741
+ FormAutofill: FormAutofill,
1742
+ OpenUrl: OpenUrl,
1743
+ Screenshot: Screenshot,
1744
+ TabManagement: TabManagement,
1745
+ WebSearch: WebSearch
1746
+ });
1747
+
1748
+ async function pub(tabId, event, params) {
1749
+ return await chrome.tabs.sendMessage(tabId, {
1750
+ type: 'eko:message',
1751
+ event,
1752
+ params,
1753
+ });
1754
+ }
1755
+ async function getLLMConfig(name = 'llmConfig') {
1756
+ let result = await chrome.storage.sync.get([name]);
1757
+ return result[name];
1758
+ }
1759
+ function getAllTools() {
1760
+ let toolsMap = new Map();
1761
+ for (const key in tools) {
1762
+ let tool = tools[key];
1763
+ if (typeof tool === 'function' && tool.prototype && 'execute' in tool.prototype) {
1764
+ try {
1765
+ let instance = new tool();
1766
+ toolsMap.set(instance.name || key, instance);
1767
+ }
1768
+ catch (e) {
1769
+ console.error(`Failed to instantiate ${key}:`, e);
1770
+ }
1771
+ }
1772
+ }
1773
+ return toolsMap;
1774
+ }
1775
+
1776
+ export { browser, getAllTools, getLLMConfig, pub, tools, utils };