misoai-web 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +352 -352
  3. package/bin/midscene-playground +2 -2
  4. package/dist/es/agent.js +247 -63
  5. package/dist/es/agent.js.map +1 -1
  6. package/dist/es/bridge-mode-browser.js.map +1 -1
  7. package/dist/es/bridge-mode.js +247 -63
  8. package/dist/es/bridge-mode.js.map +1 -1
  9. package/dist/es/chrome-extension.js +247 -63
  10. package/dist/es/chrome-extension.js.map +1 -1
  11. package/dist/es/index.js +247 -63
  12. package/dist/es/index.js.map +1 -1
  13. package/dist/es/midscene-playground.js +247 -63
  14. package/dist/es/midscene-playground.js.map +1 -1
  15. package/dist/es/midscene-server.js.map +1 -1
  16. package/dist/es/playground.js +247 -63
  17. package/dist/es/playground.js.map +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +247 -63
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +247 -63
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +247 -63
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js.map +1 -1
  28. package/dist/lib/agent.js +247 -63
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/bridge-mode-browser.js.map +1 -1
  31. package/dist/lib/bridge-mode.js +247 -63
  32. package/dist/lib/bridge-mode.js.map +1 -1
  33. package/dist/lib/chrome-extension.js +247 -63
  34. package/dist/lib/chrome-extension.js.map +1 -1
  35. package/dist/lib/index.js +247 -63
  36. package/dist/lib/index.js.map +1 -1
  37. package/dist/lib/midscene-playground.js +247 -63
  38. package/dist/lib/midscene-playground.js.map +1 -1
  39. package/dist/lib/midscene-server.js.map +1 -1
  40. package/dist/lib/playground.js +247 -63
  41. package/dist/lib/playground.js.map +1 -1
  42. package/dist/lib/playwright-report.js.map +1 -1
  43. package/dist/lib/playwright.js +247 -63
  44. package/dist/lib/playwright.js.map +1 -1
  45. package/dist/lib/puppeteer-agent-launcher.js +247 -63
  46. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  47. package/dist/lib/puppeteer.js +247 -63
  48. package/dist/lib/puppeteer.js.map +1 -1
  49. package/dist/lib/ui-utils.js.map +1 -1
  50. package/dist/lib/utils.js.map +1 -1
  51. package/dist/lib/yaml.js.map +1 -1
  52. package/dist/types/agent.d.ts +19 -4
  53. package/dist/types/bridge-mode-browser.d.ts +2 -2
  54. package/dist/types/bridge-mode.d.ts +2 -2
  55. package/dist/types/{browser-a1877d18.d.ts → browser-aec1055d.d.ts} +1 -1
  56. package/dist/types/chrome-extension.d.ts +2 -2
  57. package/dist/types/index.d.ts +1 -1
  58. package/dist/types/midscene-server.d.ts +1 -1
  59. package/dist/types/{page-663ece08.d.ts → page-86ab0fe1.d.ts} +34 -34
  60. package/dist/types/playground.d.ts +2 -2
  61. package/dist/types/playwright.d.ts +1 -1
  62. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  63. package/dist/types/puppeteer.d.ts +1 -1
  64. package/dist/types/utils.d.ts +1 -1
  65. package/dist/types/yaml.d.ts +1 -1
  66. package/package.json +3 -3
@@ -1753,44 +1753,70 @@ var TaskCache = class {
1753
1753
  this.cache = cacheContent;
1754
1754
  this.cacheOriginalLength = this.cache.caches.length;
1755
1755
  }
1756
- matchCache(prompt, type) {
1756
+ matchCache(prompt, type, contextData) {
1757
+ const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1757
1758
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1758
1759
  const item = this.cache.caches[i];
1759
1760
  const key = `${type}:${prompt}:${i}`;
1760
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1761
- this.matchedCacheIndices.add(key);
1762
- debug3(
1763
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1764
- type,
1765
- prompt,
1766
- i
1767
- );
1768
- return {
1769
- cacheContent: item,
1770
- updateFn: (cb) => {
1771
- debug3(
1772
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1773
- type,
1774
- prompt,
1775
- i
1776
- );
1777
- cb(item);
1778
- debug3(
1779
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1780
- type,
1781
- prompt,
1782
- i
1783
- );
1784
- this.flushCacheToFile();
1761
+ if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1762
+ continue;
1763
+ }
1764
+ if (type === "plan" && item.type === "plan") {
1765
+ const planItem = item;
1766
+ if (contextHash && planItem.contextHash) {
1767
+ if (contextHash !== planItem.contextHash) {
1768
+ debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1769
+ continue;
1785
1770
  }
1786
- };
1771
+ } else if (contextHash || planItem.contextHash) {
1772
+ debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1773
+ continue;
1774
+ }
1787
1775
  }
1776
+ this.matchedCacheIndices.add(key);
1777
+ debug3(
1778
+ "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1779
+ type,
1780
+ prompt,
1781
+ i,
1782
+ contextHash ? "yes" : "no-context"
1783
+ );
1784
+ return {
1785
+ cacheContent: item,
1786
+ updateFn: (cb) => {
1787
+ debug3(
1788
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1789
+ type,
1790
+ prompt,
1791
+ i
1792
+ );
1793
+ cb(item);
1794
+ debug3(
1795
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1796
+ type,
1797
+ prompt,
1798
+ i
1799
+ );
1800
+ this.flushCacheToFile();
1801
+ }
1802
+ };
1788
1803
  }
1789
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1804
+ debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1790
1805
  return void 0;
1791
1806
  }
1792
- matchPlanCache(prompt) {
1793
- return this.matchCache(prompt, "plan");
1807
+ generateContextHash(contextData) {
1808
+ const sortedKeys = Object.keys(contextData).sort();
1809
+ const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1810
+ let hash = 0;
1811
+ for (let i = 0; i < stableString.length; i++) {
1812
+ const char = stableString.charCodeAt(i);
1813
+ hash = (hash << 5) - hash + char;
1814
+ hash = hash & hash;
1815
+ }
1816
+ return hash.toString(36);
1817
+ }
1818
+ matchPlanCache(prompt, contextData) {
1819
+ return this.matchCache(prompt, "plan", contextData);
1794
1820
  }
1795
1821
  matchLocateCache(prompt) {
1796
1822
  return this.matchCache(prompt, "locate");
@@ -1866,11 +1892,16 @@ cache file: ${cacheFile}`
1866
1892
  );
1867
1893
  }
1868
1894
  }
1869
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1895
+ updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1870
1896
  if (cachedRecord) {
1871
1897
  if (newRecord.type === "plan") {
1872
1898
  cachedRecord.updateFn((cache) => {
1873
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1899
+ const planCache = cache;
1900
+ planCache.yamlWorkflow = newRecord.yamlWorkflow;
1901
+ if (contextData) {
1902
+ planCache.contextHash = this.generateContextHash(contextData);
1903
+ planCache.contextData = { ...contextData };
1904
+ }
1874
1905
  });
1875
1906
  } else {
1876
1907
  cachedRecord.updateFn((cache) => {
@@ -1878,6 +1909,11 @@ cache file: ${cacheFile}`
1878
1909
  });
1879
1910
  }
1880
1911
  } else {
1912
+ if (newRecord.type === "plan" && contextData) {
1913
+ const planRecord = newRecord;
1914
+ planRecord.contextHash = this.generateContextHash(contextData);
1915
+ planRecord.contextData = { ...contextData };
1916
+ }
1881
1917
  this.appendCache(newRecord);
1882
1918
  }
1883
1919
  }
@@ -2238,34 +2274,35 @@ var PageAgent = class {
2238
2274
  };
2239
2275
  }
2240
2276
  async aiAction(taskPrompt, opt) {
2277
+ const originalPrompt = taskPrompt;
2278
+ let processedPrompt = taskPrompt;
2241
2279
  if (this.opts.enableCumulativeContext && this.contextStore) {
2242
2280
  try {
2243
- const originalPrompt = taskPrompt;
2244
- const processedPrompt = this.contextStore.replaceAllReferences(taskPrompt, "action");
2245
- if (originalPrompt !== processedPrompt) {
2246
- debug4("Context replacement in aiAction:", {
2247
- original: originalPrompt,
2248
- processed: processedPrompt,
2249
- storedData: this.contextStore.getAllData()
2281
+ const storedData = this.contextStore.getAllData();
2282
+ if (Object.keys(storedData).length > 0) {
2283
+ debug4("Available data for aiAction:", {
2284
+ prompt: taskPrompt,
2285
+ availableData: storedData
2250
2286
  });
2251
2287
  }
2252
- this.contextStore.addStep({
2253
- type: "action",
2254
- summary: `Action: ${processedPrompt}`,
2255
- prompt: processedPrompt
2256
- });
2257
- debug4("Added action step to context store:", {
2258
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2259
- totalSteps: this.contextStore.getRecentSteps(100).length
2260
- });
2261
- taskPrompt = processedPrompt;
2262
2288
  } catch (error) {
2263
2289
  debug4("Context store operation failed:", error);
2264
2290
  }
2265
2291
  }
2266
2292
  const cacheable = opt?.cacheable;
2267
2293
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2268
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2294
+ let contextData;
2295
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2296
+ try {
2297
+ contextData = this.contextStore.getAllData();
2298
+ if (contextData && Object.keys(contextData).length === 0) {
2299
+ contextData = void 0;
2300
+ }
2301
+ } catch (error) {
2302
+ debug4("Failed to get context data for cache:", error);
2303
+ }
2304
+ }
2305
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2269
2306
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2270
2307
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2271
2308
  taskPrompt,
@@ -2275,6 +2312,28 @@ var PageAgent = class {
2275
2312
  debug4("matched cache, will call .runYaml to run the action");
2276
2313
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2277
2314
  const result = await this.runYaml(yaml5);
2315
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2316
+ try {
2317
+ const executionResult = {
2318
+ success: true,
2319
+ actionType: "cached",
2320
+ description: `Executed cached action: ${processedPrompt}`,
2321
+ timing: result.metadata?.totalTime
2322
+ };
2323
+ this.contextStore.addStep({
2324
+ type: "action",
2325
+ summary: `Action: ${processedPrompt} (cached)`,
2326
+ prompt: processedPrompt,
2327
+ executionResult
2328
+ });
2329
+ debug4("Added cached action step to context store:", {
2330
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2331
+ totalSteps: this.contextStore.getRecentSteps(100).length
2332
+ });
2333
+ } catch (error) {
2334
+ debug4("Failed to add cached action step:", error);
2335
+ }
2336
+ }
2278
2337
  return {
2279
2338
  result: result.result,
2280
2339
  metadata: metadata2
@@ -2299,10 +2358,39 @@ var PageAgent = class {
2299
2358
  prompt: taskPrompt,
2300
2359
  yamlWorkflow: yamlFlowStr
2301
2360
  },
2302
- matchedCache
2361
+ matchedCache,
2362
+ contextData
2363
+ // Pass context data for cache creation
2303
2364
  );
2304
2365
  }
2305
2366
  const metadata = this.afterTaskRunning(executor);
2367
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2368
+ try {
2369
+ const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2370
+ this.contextStore.addStep({
2371
+ type: "action",
2372
+ summary: `Action: ${processedPrompt}`,
2373
+ prompt: processedPrompt,
2374
+ executionResult
2375
+ });
2376
+ debug4("Added action step with execution result to context store:", {
2377
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2378
+ totalSteps: this.contextStore.getRecentSteps(100).length,
2379
+ executionResult
2380
+ });
2381
+ } catch (error) {
2382
+ debug4("Failed to analyze execution results, adding step without execution result:", error);
2383
+ try {
2384
+ this.contextStore.addStep({
2385
+ type: "action",
2386
+ summary: `Action: ${processedPrompt}`,
2387
+ prompt: processedPrompt
2388
+ });
2389
+ } catch (stepError) {
2390
+ debug4("Failed to add action step:", stepError);
2391
+ }
2392
+ }
2393
+ }
2306
2394
  return {
2307
2395
  result: output,
2308
2396
  metadata
@@ -2487,23 +2575,40 @@ var PageAgent = class {
2487
2575
  };
2488
2576
  }
2489
2577
  async aiAssert(assertion, msg, opt) {
2490
- let processedAssertion = assertion;
2578
+ let executionContext = "";
2491
2579
  if (this.opts.enableCumulativeContext && this.contextStore) {
2492
2580
  try {
2493
- const originalAssertion = assertion;
2494
- processedAssertion = this.contextStore.replaceAllReferences(assertion, "assertion");
2495
- if (originalAssertion !== processedAssertion) {
2496
- debug4("Context replacement in aiAssert:", {
2497
- original: originalAssertion,
2498
- processed: processedAssertion,
2499
- context: "assertion",
2500
- storedData: this.contextStore.getAllData()
2581
+ const recentSteps = this.contextStore.getRecentSteps(3);
2582
+ const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2583
+ const storedData = this.contextStore.getAllData();
2584
+ if (stepsWithExecutionResults.length > 0) {
2585
+ const recentActions = stepsWithExecutionResults.map((step) => {
2586
+ const result = step.executionResult;
2587
+ return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2588
+ }).join("\n");
2589
+ executionContext = `
2590
+
2591
+ Recent actions performed:
2592
+ ${recentActions}
2593
+
2594
+ This context may help verify the assertion.`;
2595
+ }
2596
+ if (storedData && Object.keys(storedData).length > 0) {
2597
+ executionContext += `
2598
+
2599
+ Available data for reference:
2600
+ ${JSON.stringify(storedData, null, 2)}
2601
+
2602
+ Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2603
+ debug4("Available data for aiAssert:", {
2604
+ assertion,
2605
+ availableData: storedData
2501
2606
  });
2502
2607
  }
2503
2608
  this.contextStore.addStep({
2504
2609
  type: "assertion",
2505
- summary: `Assertion: ${processedAssertion}`,
2506
- prompt: processedAssertion
2610
+ summary: `Assertion: ${assertion}`,
2611
+ prompt: assertion
2507
2612
  });
2508
2613
  debug4("Added assertion step to context store:", {
2509
2614
  totalSteps: this.contextStore.getRecentSteps(100).length
@@ -2519,7 +2624,13 @@ var PageAgent = class {
2519
2624
  } catch (e) {
2520
2625
  }
2521
2626
  }
2522
- const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2627
+ let assertionWithContext = assertion;
2628
+ if (currentUrl) {
2629
+ assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2630
+ }
2631
+ if (executionContext) {
2632
+ assertionWithContext += executionContext;
2633
+ }
2523
2634
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2524
2635
  const metadata = this.afterTaskRunning(executor, true);
2525
2636
  if (output && opt?.keepRawResponse) {
@@ -2733,6 +2844,79 @@ ${errors}`);
2733
2844
  async destroy() {
2734
2845
  await this.page.destroy();
2735
2846
  }
2847
+ /**
2848
+ * Analyze execution results from executor to generate meaningful descriptions
2849
+ */
2850
+ analyzeExecutionResults(executor, originalPrompt) {
2851
+ const tasks = executor.tasks;
2852
+ const success = !executor.isInErrorState();
2853
+ if (!success) {
2854
+ const errorTask = executor.latestErrorTask();
2855
+ return {
2856
+ success: false,
2857
+ actionType: "error",
2858
+ description: `Failed to execute: ${originalPrompt}`,
2859
+ error: errorTask?.error
2860
+ };
2861
+ }
2862
+ const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2863
+ const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2864
+ const lastAction = actionTasks[actionTasks.length - 1];
2865
+ const lastLocate = locateTasks[locateTasks.length - 1];
2866
+ if (!lastAction) {
2867
+ return {
2868
+ success: true,
2869
+ actionType: "unknown",
2870
+ description: `Completed: ${originalPrompt}`
2871
+ };
2872
+ }
2873
+ const actionType = lastAction.subType || "unknown";
2874
+ const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2875
+ const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2876
+ return {
2877
+ success: true,
2878
+ actionType,
2879
+ description,
2880
+ elementInfo,
2881
+ timing: lastAction.timing?.cost
2882
+ };
2883
+ }
2884
+ /**
2885
+ * Extract element information from locate task
2886
+ */
2887
+ extractElementInfo(locateTask, _actionTask) {
2888
+ if (!locateTask?.output?.element)
2889
+ return void 0;
2890
+ const element = locateTask.output.element;
2891
+ return {
2892
+ type: element.attributes?.nodeType || "unknown",
2893
+ text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2894
+ location: `(${element.center[0]}, ${element.center[1]})`
2895
+ };
2896
+ }
2897
+ /**
2898
+ * Generate natural language description for actions
2899
+ */
2900
+ generateActionDescription(actionType, param, elementInfo) {
2901
+ const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2902
+ switch (actionType) {
2903
+ case "Tap":
2904
+ return `Clicked on ${elementDesc}`;
2905
+ case "Input":
2906
+ const inputValue = param?.value || "";
2907
+ return `Entered "${inputValue}" into ${elementDesc}`;
2908
+ case "KeyboardPress":
2909
+ return `Pressed ${param?.value || "key"}`;
2910
+ case "Scroll":
2911
+ return `Scrolled ${param?.direction || "on page"}`;
2912
+ case "Hover":
2913
+ return `Hovered over ${elementDesc}`;
2914
+ case "Drag":
2915
+ return `Dragged ${elementDesc}`;
2916
+ default:
2917
+ return `Performed ${actionType} action on ${elementDesc}`;
2918
+ }
2919
+ }
2736
2920
  };
2737
2921
 
2738
2922
  // src/chrome-extension/agent.ts