@eko-ai/eko 3.1.0 → 3.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.esm.js CHANGED
@@ -1,5 +1,6 @@
1
1
  const config$1 = {
2
2
  name: "Eko",
3
+ mode: "normal",
3
4
  platform: "mac",
4
5
  maxReactNum: 500,
5
6
  maxTokens: 16000,
@@ -12,6 +13,7 @@ const config$1 = {
12
13
  maxDialogueImgFileNum: 1,
13
14
  toolResultMultimodal: true,
14
15
  parallelToolCalls: true,
16
+ markImageMode: "dom",
15
17
  expertMode: false,
16
18
  expertModeTodoLoopNum: 10,
17
19
  };
@@ -31937,7 +31939,7 @@ class Eko {
31937
31939
  results.push(agent_results.join("\n\n"));
31938
31940
  }
31939
31941
  context.conversation.splice(0, context.conversation.length);
31940
- if (config$1.expertMode &&
31942
+ if ((config$1.mode == "expert" || config$1.expertMode) &&
31941
31943
  !workflow.modified &&
31942
31944
  agentTree.nextAgent &&
31943
31945
  lastAgent?.AgentContext &&
@@ -34184,12 +34186,12 @@ class Agent {
34184
34186
  const finalResult = await this.handleCallResult(agentContext, messages, agentTools, results);
34185
34187
  loopNum++;
34186
34188
  if (!finalResult) {
34187
- if (config$1.expertMode && loopNum % config$1.expertModeTodoLoopNum == 0) {
34189
+ if ((config$1.mode == "expert" || config$1.expertMode) && loopNum % config$1.expertModeTodoLoopNum == 0) {
34188
34190
  await doTodoListManager(agentContext, rlm, messages, llm_tools);
34189
34191
  }
34190
34192
  continue;
34191
34193
  }
34192
- if (config$1.expertMode && checkNum == 0) {
34194
+ if ((config$1.mode == "expert" || config$1.expertMode) && checkNum == 0) {
34193
34195
  checkNum++;
34194
34196
  const { completionStatus } = await doTaskResultCheck(agentContext, rlm, messages, llm_tools);
34195
34197
  if (completionStatus == "incomplete") {
@@ -35081,10 +35083,140 @@ function extract_page_content(max_url_length = 200, max_content_length = 50000)
35081
35083
  result = result.replace(/\s*\n/g, "\n").replace(/\n+/g, "\n").trim();
35082
35084
  if (result.length > max_content_length) {
35083
35085
  // result = result.slice(0, max_content_length) + "...";
35084
- result = Array.from(result).slice(0, max_content_length).join('') + "...";
35086
+ result = Array.from(result).slice(0, max_content_length).join("") + "...";
35085
35087
  }
35086
35088
  return result;
35087
35089
  }
35090
+ function mark_screenshot_highlight_elements(screenshot, area_map, client_rect) {
35091
+ return new Promise(async (resolve, reject) => {
35092
+ try {
35093
+ // Convert base64 to Blob
35094
+ const base64Data = screenshot.imageBase64;
35095
+ const binaryString = atob(base64Data);
35096
+ const bytes = new Uint8Array(binaryString.length);
35097
+ for (let i = 0; i < binaryString.length; i++) {
35098
+ bytes[i] = binaryString.charCodeAt(i);
35099
+ }
35100
+ const blob = new Blob([bytes], { type: screenshot.imageType });
35101
+ const imageBitmap = await createImageBitmap(blob, {
35102
+ resizeQuality: "high",
35103
+ resizeWidth: client_rect.width,
35104
+ resizeHeight: client_rect.height,
35105
+ });
35106
+ const canvas = new OffscreenCanvas(imageBitmap.width, imageBitmap.height);
35107
+ const ctx = canvas.getContext("2d");
35108
+ if (!ctx) {
35109
+ reject(new Error("Failed to get canvas context"));
35110
+ return;
35111
+ }
35112
+ ctx.imageSmoothingEnabled = true;
35113
+ ctx.imageSmoothingQuality = "high";
35114
+ ctx.drawImage(imageBitmap, 0, 0);
35115
+ const sortedEntries = Object.entries(area_map)
35116
+ .filter(([id, area]) => area.width > 0 && area.height > 0)
35117
+ .sort((a, b) => {
35118
+ const areaA = a[1].width * a[1].height;
35119
+ const areaB = b[1].width * b[1].height;
35120
+ return areaA - areaB;
35121
+ });
35122
+ const colors = [
35123
+ "#FF0000",
35124
+ "#00FF00",
35125
+ "#0000FF",
35126
+ "#FFA500",
35127
+ "#800080",
35128
+ "#008080",
35129
+ "#FF69B4",
35130
+ "#4B0082",
35131
+ "#FF4500",
35132
+ "#2E8B57",
35133
+ "#DC143C",
35134
+ "#4682B4",
35135
+ ];
35136
+ sortedEntries.forEach(([id, area], index) => {
35137
+ const color = colors[index % colors.length];
35138
+ // Draw a border
35139
+ ctx.strokeStyle = color;
35140
+ ctx.lineWidth = 2;
35141
+ ctx.strokeRect(area.x, area.y, area.width, area.height);
35142
+ // Draw ID tag background
35143
+ const fontSize = Math.min(12, Math.max(8, area.height / 2));
35144
+ ctx.font = `${fontSize}px sans-serif`;
35145
+ const textMetrics = ctx.measureText(id);
35146
+ const padding = 4;
35147
+ const labelWidth = textMetrics.width + padding * 2;
35148
+ const labelHeight = fontSize + padding * 2;
35149
+ // The tag position is in the upper right corner.
35150
+ const labelX = area.x + area.width - labelWidth;
35151
+ const labelY = area.y;
35152
+ // Draw label background
35153
+ ctx.fillStyle = color;
35154
+ ctx.fillRect(labelX, labelY, labelWidth, labelHeight);
35155
+ // Draw ID text
35156
+ ctx.fillStyle = "#FFFFFF";
35157
+ ctx.textBaseline = "top";
35158
+ ctx.fillText(id, labelX + padding, labelY + padding);
35159
+ });
35160
+ // Convert OffscreenCanvas to Blob, then to base64
35161
+ const resultBlob = await canvas.convertToBlob({
35162
+ type: screenshot.imageType,
35163
+ });
35164
+ const reader = new FileReader();
35165
+ reader.onloadend = () => {
35166
+ const resultBase64 = reader.result;
35167
+ resolve(resultBase64);
35168
+ };
35169
+ reader.onerror = () => {
35170
+ reject(new Error("Failed to convert blob to base64"));
35171
+ };
35172
+ reader.readAsDataURL(resultBlob);
35173
+ }
35174
+ catch (error) {
35175
+ reject(error);
35176
+ }
35177
+ });
35178
+ }
35179
+ async function compress_image(imageBase64, imageType, compress, quality = 1) {
35180
+ const base64Data = imageBase64;
35181
+ const binaryString = atob(base64Data);
35182
+ const bytes = new Uint8Array(binaryString.length);
35183
+ for (let i = 0; i < binaryString.length; i++) {
35184
+ bytes[i] = binaryString.charCodeAt(i);
35185
+ }
35186
+ const blob = new Blob([bytes], { type: imageType });
35187
+ const bitmap = await createImageBitmap(blob);
35188
+ const width = compress.scale
35189
+ ? bitmap.width * compress.scale
35190
+ : compress.resizeWidth;
35191
+ const height = compress.scale
35192
+ ? bitmap.height * compress.scale
35193
+ : compress.resizeHeight;
35194
+ if (bitmap.width == width && bitmap.height == height && quality == 1) {
35195
+ return {
35196
+ imageBase64: imageBase64,
35197
+ imageType: imageType,
35198
+ };
35199
+ }
35200
+ const canvas = new OffscreenCanvas(width, height);
35201
+ const ctx = canvas.getContext("2d");
35202
+ ctx.drawImage(bitmap, 0, 0, width, height);
35203
+ const resultBlob = await canvas.convertToBlob({
35204
+ type: "image/jpeg",
35205
+ quality: quality,
35206
+ });
35207
+ return new Promise((resolve) => {
35208
+ const reader = new FileReader();
35209
+ reader.onloadend = () => {
35210
+ let imageDataUrl = reader.result;
35211
+ let imageBase64 = imageDataUrl.substring(imageDataUrl.indexOf("base64,") + 7);
35212
+ resolve({
35213
+ imageBase64: imageBase64,
35214
+ imageType: "image/jpeg",
35215
+ });
35216
+ };
35217
+ reader.readAsDataURL(resultBlob);
35218
+ });
35219
+ }
35088
35220
 
35089
35221
  const AGENT_NAME = "Browser";
35090
35222
  class BaseBrowserAgent extends Agent {
@@ -35266,19 +35398,30 @@ function run_build_dom_tree() {
35266
35398
  /**
35267
35399
  * Get clickable elements on the page
35268
35400
  *
35269
- * @param {*} doHighlightElements Is highlighted
35401
+ * @param {*} markHighlightElements Is mark highlighted
35270
35402
  * @param {*} includeAttributes [attr_names...]
35271
- * @returns { element_str, selector_map }
35403
+ * @returns { element_str, client_rect, selector_map, area_map }
35272
35404
  */
35273
- function get_clickable_elements(doHighlightElements = true, includeAttributes) {
35405
+ function get_clickable_elements(markHighlightElements = true, includeAttributes) {
35274
35406
  window.clickable_elements = {};
35275
35407
  computedStyleCache = new WeakMap();
35276
35408
  document.querySelectorAll("[eko-user-highlight-id]").forEach(ele => ele.removeAttribute("eko-user-highlight-id"));
35277
- let page_tree = build_dom_tree(doHighlightElements);
35409
+ let page_tree = build_dom_tree(markHighlightElements);
35278
35410
  let element_tree = parse_node(page_tree);
35279
- let selector_map = create_selector_map(element_tree);
35280
35411
  let element_str = clickable_elements_to_string(element_tree, includeAttributes);
35281
- return { element_str, selector_map };
35412
+ let client_rect = {
35413
+ width: window.innerWidth || document.documentElement.clientWidth,
35414
+ height: window.innerHeight || document.documentElement.clientHeight,
35415
+ };
35416
+ if (markHighlightElements) {
35417
+ let selector_map = {};
35418
+ // selector_map = create_selector_map(element_tree);
35419
+ return { element_str, client_rect, selector_map };
35420
+ }
35421
+ else {
35422
+ let area_map = create_area_map(element_tree);
35423
+ return { element_str, client_rect, area_map };
35424
+ }
35282
35425
  }
35283
35426
  function get_highlight_element(highlightIndex) {
35284
35427
  let element = document.querySelector(`[eko-user-highlight-id="eko-highlight-${highlightIndex}"]`);
@@ -35377,12 +35520,13 @@ function run_build_dom_tree() {
35377
35520
  process_node(element_tree);
35378
35521
  return formatted_text.join('\n');
35379
35522
  }
35380
- function create_selector_map(element_tree) {
35381
- let selector_map = {};
35523
+ function create_area_map(element_tree) {
35524
+ let area_map = {};
35382
35525
  function process_node(node) {
35383
35526
  if (node.tagName) {
35384
35527
  if (node.highlightIndex != null) {
35385
- selector_map[node.highlightIndex] = node;
35528
+ const element = window.clickable_elements[node.highlightIndex];
35529
+ area_map[node.highlightIndex] = get_element_real_bounding_rect(element);
35386
35530
  }
35387
35531
  for (let i = 0; i < node.children.length; i++) {
35388
35532
  process_node(node.children[i]);
@@ -35390,7 +35534,38 @@ function run_build_dom_tree() {
35390
35534
  }
35391
35535
  }
35392
35536
  process_node(element_tree);
35393
- return selector_map;
35537
+ return area_map;
35538
+ }
35539
+ function get_element_real_bounding_rect(element) {
35540
+ if (!element || !(element instanceof Element)) {
35541
+ return { x: 0, y: 0, width: 0, height: 0 };
35542
+ }
35543
+ let rect = element.getBoundingClientRect();
35544
+ let x = rect.left;
35545
+ let y = rect.top;
35546
+ let width = rect.width;
35547
+ let height = rect.height;
35548
+ let win = element.ownerDocument.defaultView;
35549
+ let maxDepth = 10;
35550
+ let depth = 0;
35551
+ while (win && win !== win.parent && depth < maxDepth) {
35552
+ depth++;
35553
+ const frameElement = win.frameElement;
35554
+ if (!frameElement) {
35555
+ break;
35556
+ }
35557
+ const frameRect = frameElement.getBoundingClientRect();
35558
+ x += frameRect.left;
35559
+ y += frameRect.top;
35560
+ // Consider the border and padding of the iframe.
35561
+ const frameStyle = getCachedComputedStyle(frameElement);
35562
+ x += parseFloat(frameStyle.borderLeftWidth) || 0;
35563
+ y += parseFloat(frameStyle.borderTopWidth) || 0;
35564
+ x += parseFloat(frameStyle.paddingLeft) || 0;
35565
+ y += parseFloat(frameStyle.paddingTop) || 0;
35566
+ win = win.parent;
35567
+ }
35568
+ return { x, y, width, height };
35394
35569
  }
35395
35570
  function parse_node(node_data, parent) {
35396
35571
  if (!node_data) {
@@ -35430,7 +35605,7 @@ function run_build_dom_tree() {
35430
35605
  }
35431
35606
  return element_node;
35432
35607
  }
35433
- function build_dom_tree(doHighlightElements) {
35608
+ function build_dom_tree(markHighlightElements) {
35434
35609
  let highlightIndex = 0; // Reset highlight index
35435
35610
  function highlightElement(element, index, parentIframe = null) {
35436
35611
  // Create or get highlight container
@@ -35821,7 +35996,7 @@ function run_build_dom_tree() {
35821
35996
  if (shouldHighlight) {
35822
35997
  nodeData.highlightIndex = highlightIndex++;
35823
35998
  window.clickable_elements[nodeData.highlightIndex] = node;
35824
- if (doHighlightElements) {
35999
+ if (markHighlightElements) {
35825
36000
  highlightElement(node, nodeData.highlightIndex, parentIframe);
35826
36001
  }
35827
36002
  }
@@ -35982,26 +36157,37 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
35982
36157
  }
35983
36158
  async screenshot_and_html(agentContext) {
35984
36159
  try {
35985
- let element_result = null;
36160
+ let element_result;
36161
+ let double_screenshots;
35986
36162
  for (let i = 0; i < 5; i++) {
35987
36163
  await sleep(200);
35988
36164
  await this.execute_script(agentContext, run_build_dom_tree, []);
35989
36165
  await sleep(50);
35990
- element_result = (await this.execute_script(agentContext, () => {
35991
- return window.get_clickable_elements(true);
35992
- }, []));
36166
+ element_result = (await this.execute_script(agentContext, (markHighlightElements) => {
36167
+ return window.get_clickable_elements(markHighlightElements);
36168
+ }, [config$1.mode != "fast" && config$1.markImageMode == "dom"]));
35993
36169
  if (element_result) {
35994
36170
  break;
35995
36171
  }
35996
36172
  }
35997
36173
  await sleep(100);
35998
- let screenshot = await this.screenshot(agentContext);
35999
- // agentContext.variables.set("selector_map", element_result.selector_map);
36000
- let pseudoHtml = element_result?.element_str || "";
36174
+ const screenshot = config$1.mode == "fast"
36175
+ ? undefined
36176
+ : await this.screenshot_and_compress(agentContext, element_result.client_rect);
36177
+ if (config$1.markImageMode == "draw" &&
36178
+ screenshot?.imageBase64 &&
36179
+ element_result.area_map) {
36180
+ double_screenshots = { ...screenshot };
36181
+ const markImageBase64 = await mark_screenshot_highlight_elements(screenshot, element_result.area_map, element_result.client_rect);
36182
+ screenshot.imageBase64 = markImageBase64;
36183
+ }
36184
+ const pseudoHtml = element_result.element_str || "";
36001
36185
  return {
36002
- imageBase64: screenshot.imageBase64,
36003
- imageType: screenshot.imageType,
36186
+ double_screenshots: double_screenshots,
36187
+ imageBase64: screenshot?.imageBase64,
36188
+ imageType: screenshot?.imageType,
36004
36189
  pseudoHtml: pseudoHtml,
36190
+ client_rect: element_result.client_rect,
36005
36191
  };
36006
36192
  }
36007
36193
  finally {
@@ -36013,6 +36199,20 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
36013
36199
  catch (e) { }
36014
36200
  }
36015
36201
  }
36202
+ async screenshot_and_compress(agentContext, client_rect) {
36203
+ const screenshot = await this.screenshot(agentContext);
36204
+ if (!client_rect || !screenshot) {
36205
+ return screenshot;
36206
+ }
36207
+ const compressedImage = await compress_image(screenshot.imageBase64, screenshot.imageType, {
36208
+ resizeWidth: client_rect.width,
36209
+ resizeHeight: client_rect.height,
36210
+ });
36211
+ return {
36212
+ imageBase64: compressedImage.imageBase64,
36213
+ imageType: compressedImage.imageType,
36214
+ };
36215
+ }
36016
36216
  get_element_script(index) {
36017
36217
  return `window.get_highlight_element(${index});`;
36018
36218
  }
@@ -36272,7 +36472,7 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
36272
36472
  ];
36273
36473
  }
36274
36474
  async double_screenshots(agentContext, messages, tools) {
36275
- return true;
36475
+ return config$1.mode != "fast";
36276
36476
  }
36277
36477
  async handleMessages(agentContext, messages, tools) {
36278
36478
  const pseudoHtmlDescription = "This is the environmental information after the operation, including the latest browser screenshot and page elements. Please perform the next operation based on the environmental information. Do not output the following elements and index information in your response.\n\nIndex and elements:\n";
@@ -36282,23 +36482,27 @@ class BaseBrowserLabelsAgent extends BaseBrowserAgent {
36282
36482
  lastTool.toolName !== "get_all_tabs" &&
36283
36483
  lastTool.toolName !== "variable_storage") {
36284
36484
  await sleep(300);
36285
- let image_contents = [];
36485
+ const image_contents = [];
36486
+ const result = await this.screenshot_and_html(agentContext);
36286
36487
  if (await this.double_screenshots(agentContext, messages, tools)) {
36287
- let imageResult = await this.screenshot(agentContext);
36288
- let image = toImage(imageResult.imageBase64);
36488
+ const imageResult = result.double_screenshots
36489
+ ? result.double_screenshots
36490
+ : await this.screenshot_and_compress(agentContext, result.client_rect);
36491
+ const image = toImage(imageResult.imageBase64);
36289
36492
  image_contents.push({
36290
36493
  type: "file",
36291
36494
  data: image,
36292
36495
  mediaType: imageResult.imageType,
36293
36496
  });
36294
36497
  }
36295
- let result = await this.screenshot_and_html(agentContext);
36296
- let image = toImage(result.imageBase64);
36297
- image_contents.push({
36298
- type: "file",
36299
- data: image,
36300
- mediaType: result.imageType,
36301
- });
36498
+ if (result.imageBase64) {
36499
+ const image = toImage(result.imageBase64);
36500
+ image_contents.push({
36501
+ type: "file",
36502
+ data: image,
36503
+ mediaType: result.imageType || "image/png",
36504
+ });
36505
+ }
36302
36506
  messages.push({
36303
36507
  role: "user",
36304
36508
  content: [
@@ -36447,7 +36651,7 @@ function do_click(params) {
36447
36651
  cancelable: true,
36448
36652
  button, // 0 left; 1 middle; 2 right
36449
36653
  });
36450
- if (eventType === 'click' && element.click) {
36654
+ if (eventType === "click" && element.click) {
36451
36655
  // support shadow dom element
36452
36656
  element.click();
36453
36657
  }