misoai-web 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +352 -352
  3. package/bin/midscene-playground +2 -2
  4. package/dist/es/agent.js +247 -63
  5. package/dist/es/agent.js.map +1 -1
  6. package/dist/es/bridge-mode-browser.js.map +1 -1
  7. package/dist/es/bridge-mode.js +247 -63
  8. package/dist/es/bridge-mode.js.map +1 -1
  9. package/dist/es/chrome-extension.js +247 -63
  10. package/dist/es/chrome-extension.js.map +1 -1
  11. package/dist/es/index.js +247 -63
  12. package/dist/es/index.js.map +1 -1
  13. package/dist/es/midscene-playground.js +247 -63
  14. package/dist/es/midscene-playground.js.map +1 -1
  15. package/dist/es/midscene-server.js.map +1 -1
  16. package/dist/es/playground.js +247 -63
  17. package/dist/es/playground.js.map +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +247 -63
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +247 -63
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +247 -63
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js.map +1 -1
  28. package/dist/lib/agent.js +247 -63
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/bridge-mode-browser.js.map +1 -1
  31. package/dist/lib/bridge-mode.js +247 -63
  32. package/dist/lib/bridge-mode.js.map +1 -1
  33. package/dist/lib/chrome-extension.js +247 -63
  34. package/dist/lib/chrome-extension.js.map +1 -1
  35. package/dist/lib/index.js +247 -63
  36. package/dist/lib/index.js.map +1 -1
  37. package/dist/lib/midscene-playground.js +247 -63
  38. package/dist/lib/midscene-playground.js.map +1 -1
  39. package/dist/lib/midscene-server.js.map +1 -1
  40. package/dist/lib/playground.js +247 -63
  41. package/dist/lib/playground.js.map +1 -1
  42. package/dist/lib/playwright-report.js.map +1 -1
  43. package/dist/lib/playwright.js +247 -63
  44. package/dist/lib/playwright.js.map +1 -1
  45. package/dist/lib/puppeteer-agent-launcher.js +247 -63
  46. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  47. package/dist/lib/puppeteer.js +247 -63
  48. package/dist/lib/puppeteer.js.map +1 -1
  49. package/dist/lib/ui-utils.js.map +1 -1
  50. package/dist/lib/utils.js.map +1 -1
  51. package/dist/lib/yaml.js.map +1 -1
  52. package/dist/types/agent.d.ts +19 -4
  53. package/dist/types/bridge-mode-browser.d.ts +2 -2
  54. package/dist/types/bridge-mode.d.ts +2 -2
  55. package/dist/types/{browser-a1877d18.d.ts → browser-aec1055d.d.ts} +1 -1
  56. package/dist/types/chrome-extension.d.ts +2 -2
  57. package/dist/types/index.d.ts +1 -1
  58. package/dist/types/midscene-server.d.ts +1 -1
  59. package/dist/types/{page-663ece08.d.ts → page-86ab0fe1.d.ts} +34 -34
  60. package/dist/types/playground.d.ts +2 -2
  61. package/dist/types/playwright.d.ts +1 -1
  62. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  63. package/dist/types/puppeteer.d.ts +1 -1
  64. package/dist/types/utils.d.ts +1 -1
  65. package/dist/types/yaml.d.ts +1 -1
  66. package/package.json +3 -3
package/dist/es/index.js CHANGED
@@ -1716,44 +1716,70 @@ var TaskCache = class {
1716
1716
  this.cache = cacheContent;
1717
1717
  this.cacheOriginalLength = this.cache.caches.length;
1718
1718
  }
1719
- matchCache(prompt, type) {
1719
+ matchCache(prompt, type, contextData) {
1720
+ const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1720
1721
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1721
1722
  const item = this.cache.caches[i];
1722
1723
  const key = `${type}:${prompt}:${i}`;
1723
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1724
- this.matchedCacheIndices.add(key);
1725
- debug3(
1726
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1727
- type,
1728
- prompt,
1729
- i
1730
- );
1731
- return {
1732
- cacheContent: item,
1733
- updateFn: (cb) => {
1734
- debug3(
1735
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1736
- type,
1737
- prompt,
1738
- i
1739
- );
1740
- cb(item);
1741
- debug3(
1742
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1743
- type,
1744
- prompt,
1745
- i
1746
- );
1747
- this.flushCacheToFile();
1724
+ if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1725
+ continue;
1726
+ }
1727
+ if (type === "plan" && item.type === "plan") {
1728
+ const planItem = item;
1729
+ if (contextHash && planItem.contextHash) {
1730
+ if (contextHash !== planItem.contextHash) {
1731
+ debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1732
+ continue;
1748
1733
  }
1749
- };
1734
+ } else if (contextHash || planItem.contextHash) {
1735
+ debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1736
+ continue;
1737
+ }
1750
1738
  }
1739
+ this.matchedCacheIndices.add(key);
1740
+ debug3(
1741
+ "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1742
+ type,
1743
+ prompt,
1744
+ i,
1745
+ contextHash ? "yes" : "no-context"
1746
+ );
1747
+ return {
1748
+ cacheContent: item,
1749
+ updateFn: (cb) => {
1750
+ debug3(
1751
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1752
+ type,
1753
+ prompt,
1754
+ i
1755
+ );
1756
+ cb(item);
1757
+ debug3(
1758
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1759
+ type,
1760
+ prompt,
1761
+ i
1762
+ );
1763
+ this.flushCacheToFile();
1764
+ }
1765
+ };
1751
1766
  }
1752
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1767
+ debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1753
1768
  return void 0;
1754
1769
  }
1755
- matchPlanCache(prompt) {
1756
- return this.matchCache(prompt, "plan");
1770
+ generateContextHash(contextData) {
1771
+ const sortedKeys = Object.keys(contextData).sort();
1772
+ const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1773
+ let hash = 0;
1774
+ for (let i = 0; i < stableString.length; i++) {
1775
+ const char = stableString.charCodeAt(i);
1776
+ hash = (hash << 5) - hash + char;
1777
+ hash = hash & hash;
1778
+ }
1779
+ return hash.toString(36);
1780
+ }
1781
+ matchPlanCache(prompt, contextData) {
1782
+ return this.matchCache(prompt, "plan", contextData);
1757
1783
  }
1758
1784
  matchLocateCache(prompt) {
1759
1785
  return this.matchCache(prompt, "locate");
@@ -1829,11 +1855,16 @@ cache file: ${cacheFile}`
1829
1855
  );
1830
1856
  }
1831
1857
  }
1832
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1858
+ updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1833
1859
  if (cachedRecord) {
1834
1860
  if (newRecord.type === "plan") {
1835
1861
  cachedRecord.updateFn((cache) => {
1836
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1862
+ const planCache = cache;
1863
+ planCache.yamlWorkflow = newRecord.yamlWorkflow;
1864
+ if (contextData) {
1865
+ planCache.contextHash = this.generateContextHash(contextData);
1866
+ planCache.contextData = { ...contextData };
1867
+ }
1837
1868
  });
1838
1869
  } else {
1839
1870
  cachedRecord.updateFn((cache) => {
@@ -1841,6 +1872,11 @@ cache file: ${cacheFile}`
1841
1872
  });
1842
1873
  }
1843
1874
  } else {
1875
+ if (newRecord.type === "plan" && contextData) {
1876
+ const planRecord = newRecord;
1877
+ planRecord.contextHash = this.generateContextHash(contextData);
1878
+ planRecord.contextData = { ...contextData };
1879
+ }
1844
1880
  this.appendCache(newRecord);
1845
1881
  }
1846
1882
  }
@@ -2201,34 +2237,35 @@ var PageAgent = class {
2201
2237
  };
2202
2238
  }
2203
2239
  async aiAction(taskPrompt, opt) {
2240
+ const originalPrompt = taskPrompt;
2241
+ let processedPrompt = taskPrompt;
2204
2242
  if (this.opts.enableCumulativeContext && this.contextStore) {
2205
2243
  try {
2206
- const originalPrompt = taskPrompt;
2207
- const processedPrompt = this.contextStore.replaceAllReferences(taskPrompt, "action");
2208
- if (originalPrompt !== processedPrompt) {
2209
- debug4("Context replacement in aiAction:", {
2210
- original: originalPrompt,
2211
- processed: processedPrompt,
2212
- storedData: this.contextStore.getAllData()
2244
+ const storedData = this.contextStore.getAllData();
2245
+ if (Object.keys(storedData).length > 0) {
2246
+ debug4("Available data for aiAction:", {
2247
+ prompt: taskPrompt,
2248
+ availableData: storedData
2213
2249
  });
2214
2250
  }
2215
- this.contextStore.addStep({
2216
- type: "action",
2217
- summary: `Action: ${processedPrompt}`,
2218
- prompt: processedPrompt
2219
- });
2220
- debug4("Added action step to context store:", {
2221
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2222
- totalSteps: this.contextStore.getRecentSteps(100).length
2223
- });
2224
- taskPrompt = processedPrompt;
2225
2251
  } catch (error) {
2226
2252
  debug4("Context store operation failed:", error);
2227
2253
  }
2228
2254
  }
2229
2255
  const cacheable = opt?.cacheable;
2230
2256
  const isVlmUiTars = vlLocateMode() === "vlm-ui-tars";
2231
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2257
+ let contextData;
2258
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2259
+ try {
2260
+ contextData = this.contextStore.getAllData();
2261
+ if (contextData && Object.keys(contextData).length === 0) {
2262
+ contextData = void 0;
2263
+ }
2264
+ } catch (error) {
2265
+ debug4("Failed to get context data for cache:", error);
2266
+ }
2267
+ }
2268
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2232
2269
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2233
2270
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2234
2271
  taskPrompt,
@@ -2238,6 +2275,28 @@ var PageAgent = class {
2238
2275
  debug4("matched cache, will call .runYaml to run the action");
2239
2276
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2240
2277
  const result = await this.runYaml(yaml5);
2278
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2279
+ try {
2280
+ const executionResult = {
2281
+ success: true,
2282
+ actionType: "cached",
2283
+ description: `Executed cached action: ${processedPrompt}`,
2284
+ timing: result.metadata?.totalTime
2285
+ };
2286
+ this.contextStore.addStep({
2287
+ type: "action",
2288
+ summary: `Action: ${processedPrompt} (cached)`,
2289
+ prompt: processedPrompt,
2290
+ executionResult
2291
+ });
2292
+ debug4("Added cached action step to context store:", {
2293
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2294
+ totalSteps: this.contextStore.getRecentSteps(100).length
2295
+ });
2296
+ } catch (error) {
2297
+ debug4("Failed to add cached action step:", error);
2298
+ }
2299
+ }
2241
2300
  return {
2242
2301
  result: result.result,
2243
2302
  metadata: metadata2
@@ -2262,10 +2321,39 @@ var PageAgent = class {
2262
2321
  prompt: taskPrompt,
2263
2322
  yamlWorkflow: yamlFlowStr
2264
2323
  },
2265
- matchedCache
2324
+ matchedCache,
2325
+ contextData
2326
+ // Pass context data for cache creation
2266
2327
  );
2267
2328
  }
2268
2329
  const metadata = this.afterTaskRunning(executor);
2330
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2331
+ try {
2332
+ const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2333
+ this.contextStore.addStep({
2334
+ type: "action",
2335
+ summary: `Action: ${processedPrompt}`,
2336
+ prompt: processedPrompt,
2337
+ executionResult
2338
+ });
2339
+ debug4("Added action step with execution result to context store:", {
2340
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2341
+ totalSteps: this.contextStore.getRecentSteps(100).length,
2342
+ executionResult
2343
+ });
2344
+ } catch (error) {
2345
+ debug4("Failed to analyze execution results, adding step without execution result:", error);
2346
+ try {
2347
+ this.contextStore.addStep({
2348
+ type: "action",
2349
+ summary: `Action: ${processedPrompt}`,
2350
+ prompt: processedPrompt
2351
+ });
2352
+ } catch (stepError) {
2353
+ debug4("Failed to add action step:", stepError);
2354
+ }
2355
+ }
2356
+ }
2269
2357
  return {
2270
2358
  result: output,
2271
2359
  metadata
@@ -2450,23 +2538,40 @@ var PageAgent = class {
2450
2538
  };
2451
2539
  }
2452
2540
  async aiAssert(assertion, msg, opt) {
2453
- let processedAssertion = assertion;
2541
+ let executionContext = "";
2454
2542
  if (this.opts.enableCumulativeContext && this.contextStore) {
2455
2543
  try {
2456
- const originalAssertion = assertion;
2457
- processedAssertion = this.contextStore.replaceAllReferences(assertion, "assertion");
2458
- if (originalAssertion !== processedAssertion) {
2459
- debug4("Context replacement in aiAssert:", {
2460
- original: originalAssertion,
2461
- processed: processedAssertion,
2462
- context: "assertion",
2463
- storedData: this.contextStore.getAllData()
2544
+ const recentSteps = this.contextStore.getRecentSteps(3);
2545
+ const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2546
+ const storedData = this.contextStore.getAllData();
2547
+ if (stepsWithExecutionResults.length > 0) {
2548
+ const recentActions = stepsWithExecutionResults.map((step) => {
2549
+ const result = step.executionResult;
2550
+ return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2551
+ }).join("\n");
2552
+ executionContext = `
2553
+
2554
+ Recent actions performed:
2555
+ ${recentActions}
2556
+
2557
+ This context may help verify the assertion.`;
2558
+ }
2559
+ if (storedData && Object.keys(storedData).length > 0) {
2560
+ executionContext += `
2561
+
2562
+ Available data for reference:
2563
+ ${JSON.stringify(storedData, null, 2)}
2564
+
2565
+ Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2566
+ debug4("Available data for aiAssert:", {
2567
+ assertion,
2568
+ availableData: storedData
2464
2569
  });
2465
2570
  }
2466
2571
  this.contextStore.addStep({
2467
2572
  type: "assertion",
2468
- summary: `Assertion: ${processedAssertion}`,
2469
- prompt: processedAssertion
2573
+ summary: `Assertion: ${assertion}`,
2574
+ prompt: assertion
2470
2575
  });
2471
2576
  debug4("Added assertion step to context store:", {
2472
2577
  totalSteps: this.contextStore.getRecentSteps(100).length
@@ -2482,7 +2587,13 @@ var PageAgent = class {
2482
2587
  } catch (e) {
2483
2588
  }
2484
2589
  }
2485
- const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2590
+ let assertionWithContext = assertion;
2591
+ if (currentUrl) {
2592
+ assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2593
+ }
2594
+ if (executionContext) {
2595
+ assertionWithContext += executionContext;
2596
+ }
2486
2597
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2487
2598
  const metadata = this.afterTaskRunning(executor, true);
2488
2599
  if (output && opt?.keepRawResponse) {
@@ -2696,6 +2807,79 @@ ${errors}`);
2696
2807
  async destroy() {
2697
2808
  await this.page.destroy();
2698
2809
  }
2810
+ /**
2811
+ * Analyze execution results from executor to generate meaningful descriptions
2812
+ */
2813
+ analyzeExecutionResults(executor, originalPrompt) {
2814
+ const tasks = executor.tasks;
2815
+ const success = !executor.isInErrorState();
2816
+ if (!success) {
2817
+ const errorTask = executor.latestErrorTask();
2818
+ return {
2819
+ success: false,
2820
+ actionType: "error",
2821
+ description: `Failed to execute: ${originalPrompt}`,
2822
+ error: errorTask?.error
2823
+ };
2824
+ }
2825
+ const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2826
+ const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2827
+ const lastAction = actionTasks[actionTasks.length - 1];
2828
+ const lastLocate = locateTasks[locateTasks.length - 1];
2829
+ if (!lastAction) {
2830
+ return {
2831
+ success: true,
2832
+ actionType: "unknown",
2833
+ description: `Completed: ${originalPrompt}`
2834
+ };
2835
+ }
2836
+ const actionType = lastAction.subType || "unknown";
2837
+ const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2838
+ const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2839
+ return {
2840
+ success: true,
2841
+ actionType,
2842
+ description,
2843
+ elementInfo,
2844
+ timing: lastAction.timing?.cost
2845
+ };
2846
+ }
2847
+ /**
2848
+ * Extract element information from locate task
2849
+ */
2850
+ extractElementInfo(locateTask, _actionTask) {
2851
+ if (!locateTask?.output?.element)
2852
+ return void 0;
2853
+ const element = locateTask.output.element;
2854
+ return {
2855
+ type: element.attributes?.nodeType || "unknown",
2856
+ text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2857
+ location: `(${element.center[0]}, ${element.center[1]})`
2858
+ };
2859
+ }
2860
+ /**
2861
+ * Generate natural language description for actions
2862
+ */
2863
+ generateActionDescription(actionType, param, elementInfo) {
2864
+ const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2865
+ switch (actionType) {
2866
+ case "Tap":
2867
+ return `Clicked on ${elementDesc}`;
2868
+ case "Input":
2869
+ const inputValue = param?.value || "";
2870
+ return `Entered "${inputValue}" into ${elementDesc}`;
2871
+ case "KeyboardPress":
2872
+ return `Pressed ${param?.value || "key"}`;
2873
+ case "Scroll":
2874
+ return `Scrolled ${param?.direction || "on page"}`;
2875
+ case "Hover":
2876
+ return `Hovered over ${elementDesc}`;
2877
+ case "Drag":
2878
+ return `Dragged ${elementDesc}`;
2879
+ default:
2880
+ return `Performed ${actionType} action on ${elementDesc}`;
2881
+ }
2882
+ }
2699
2883
  };
2700
2884
 
2701
2885
  // src/puppeteer/base-page.ts