misoai-web 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +352 -352
  3. package/bin/midscene-playground +2 -2
  4. package/dist/es/agent.js +247 -63
  5. package/dist/es/agent.js.map +1 -1
  6. package/dist/es/bridge-mode-browser.js.map +1 -1
  7. package/dist/es/bridge-mode.js +247 -63
  8. package/dist/es/bridge-mode.js.map +1 -1
  9. package/dist/es/chrome-extension.js +247 -63
  10. package/dist/es/chrome-extension.js.map +1 -1
  11. package/dist/es/index.js +247 -63
  12. package/dist/es/index.js.map +1 -1
  13. package/dist/es/midscene-playground.js +247 -63
  14. package/dist/es/midscene-playground.js.map +1 -1
  15. package/dist/es/midscene-server.js.map +1 -1
  16. package/dist/es/playground.js +247 -63
  17. package/dist/es/playground.js.map +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +247 -63
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +247 -63
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +247 -63
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js.map +1 -1
  28. package/dist/lib/agent.js +247 -63
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/bridge-mode-browser.js.map +1 -1
  31. package/dist/lib/bridge-mode.js +247 -63
  32. package/dist/lib/bridge-mode.js.map +1 -1
  33. package/dist/lib/chrome-extension.js +247 -63
  34. package/dist/lib/chrome-extension.js.map +1 -1
  35. package/dist/lib/index.js +247 -63
  36. package/dist/lib/index.js.map +1 -1
  37. package/dist/lib/midscene-playground.js +247 -63
  38. package/dist/lib/midscene-playground.js.map +1 -1
  39. package/dist/lib/midscene-server.js.map +1 -1
  40. package/dist/lib/playground.js +247 -63
  41. package/dist/lib/playground.js.map +1 -1
  42. package/dist/lib/playwright-report.js.map +1 -1
  43. package/dist/lib/playwright.js +247 -63
  44. package/dist/lib/playwright.js.map +1 -1
  45. package/dist/lib/puppeteer-agent-launcher.js +247 -63
  46. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  47. package/dist/lib/puppeteer.js +247 -63
  48. package/dist/lib/puppeteer.js.map +1 -1
  49. package/dist/lib/ui-utils.js.map +1 -1
  50. package/dist/lib/utils.js.map +1 -1
  51. package/dist/lib/yaml.js.map +1 -1
  52. package/dist/types/agent.d.ts +19 -4
  53. package/dist/types/bridge-mode-browser.d.ts +2 -2
  54. package/dist/types/bridge-mode.d.ts +2 -2
  55. package/dist/types/{browser-a1877d18.d.ts → browser-aec1055d.d.ts} +1 -1
  56. package/dist/types/chrome-extension.d.ts +2 -2
  57. package/dist/types/index.d.ts +1 -1
  58. package/dist/types/midscene-server.d.ts +1 -1
  59. package/dist/types/{page-663ece08.d.ts → page-86ab0fe1.d.ts} +34 -34
  60. package/dist/types/playground.d.ts +2 -2
  61. package/dist/types/playwright.d.ts +1 -1
  62. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  63. package/dist/types/puppeteer.d.ts +1 -1
  64. package/dist/types/utils.d.ts +1 -1
  65. package/dist/types/yaml.d.ts +1 -1
  66. package/package.json +3 -3
@@ -1721,44 +1721,70 @@ var TaskCache = class {
1721
1721
  this.cache = cacheContent;
1722
1722
  this.cacheOriginalLength = this.cache.caches.length;
1723
1723
  }
1724
- matchCache(prompt, type) {
1724
+ matchCache(prompt, type, contextData) {
1725
+ const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1725
1726
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1726
1727
  const item = this.cache.caches[i];
1727
1728
  const key = `${type}:${prompt}:${i}`;
1728
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1729
- this.matchedCacheIndices.add(key);
1730
- debug3(
1731
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1732
- type,
1733
- prompt,
1734
- i
1735
- );
1736
- return {
1737
- cacheContent: item,
1738
- updateFn: (cb) => {
1739
- debug3(
1740
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1741
- type,
1742
- prompt,
1743
- i
1744
- );
1745
- cb(item);
1746
- debug3(
1747
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1748
- type,
1749
- prompt,
1750
- i
1751
- );
1752
- this.flushCacheToFile();
1729
+ if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1730
+ continue;
1731
+ }
1732
+ if (type === "plan" && item.type === "plan") {
1733
+ const planItem = item;
1734
+ if (contextHash && planItem.contextHash) {
1735
+ if (contextHash !== planItem.contextHash) {
1736
+ debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1737
+ continue;
1753
1738
  }
1754
- };
1739
+ } else if (contextHash || planItem.contextHash) {
1740
+ debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1741
+ continue;
1742
+ }
1755
1743
  }
1744
+ this.matchedCacheIndices.add(key);
1745
+ debug3(
1746
+ "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1747
+ type,
1748
+ prompt,
1749
+ i,
1750
+ contextHash ? "yes" : "no-context"
1751
+ );
1752
+ return {
1753
+ cacheContent: item,
1754
+ updateFn: (cb) => {
1755
+ debug3(
1756
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1757
+ type,
1758
+ prompt,
1759
+ i
1760
+ );
1761
+ cb(item);
1762
+ debug3(
1763
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1764
+ type,
1765
+ prompt,
1766
+ i
1767
+ );
1768
+ this.flushCacheToFile();
1769
+ }
1770
+ };
1756
1771
  }
1757
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1772
+ debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1758
1773
  return void 0;
1759
1774
  }
1760
- matchPlanCache(prompt) {
1761
- return this.matchCache(prompt, "plan");
1775
+ generateContextHash(contextData) {
1776
+ const sortedKeys = Object.keys(contextData).sort();
1777
+ const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1778
+ let hash = 0;
1779
+ for (let i = 0; i < stableString.length; i++) {
1780
+ const char = stableString.charCodeAt(i);
1781
+ hash = (hash << 5) - hash + char;
1782
+ hash = hash & hash;
1783
+ }
1784
+ return hash.toString(36);
1785
+ }
1786
+ matchPlanCache(prompt, contextData) {
1787
+ return this.matchCache(prompt, "plan", contextData);
1762
1788
  }
1763
1789
  matchLocateCache(prompt) {
1764
1790
  return this.matchCache(prompt, "locate");
@@ -1834,11 +1860,16 @@ cache file: ${cacheFile}`
1834
1860
  );
1835
1861
  }
1836
1862
  }
1837
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1863
+ updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1838
1864
  if (cachedRecord) {
1839
1865
  if (newRecord.type === "plan") {
1840
1866
  cachedRecord.updateFn((cache) => {
1841
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1867
+ const planCache = cache;
1868
+ planCache.yamlWorkflow = newRecord.yamlWorkflow;
1869
+ if (contextData) {
1870
+ planCache.contextHash = this.generateContextHash(contextData);
1871
+ planCache.contextData = { ...contextData };
1872
+ }
1842
1873
  });
1843
1874
  } else {
1844
1875
  cachedRecord.updateFn((cache) => {
@@ -1846,6 +1877,11 @@ cache file: ${cacheFile}`
1846
1877
  });
1847
1878
  }
1848
1879
  } else {
1880
+ if (newRecord.type === "plan" && contextData) {
1881
+ const planRecord = newRecord;
1882
+ planRecord.contextHash = this.generateContextHash(contextData);
1883
+ planRecord.contextData = { ...contextData };
1884
+ }
1849
1885
  this.appendCache(newRecord);
1850
1886
  }
1851
1887
  }
@@ -2206,34 +2242,35 @@ var PageAgent = class {
2206
2242
  };
2207
2243
  }
2208
2244
  async aiAction(taskPrompt, opt) {
2245
+ const originalPrompt = taskPrompt;
2246
+ let processedPrompt = taskPrompt;
2209
2247
  if (this.opts.enableCumulativeContext && this.contextStore) {
2210
2248
  try {
2211
- const originalPrompt = taskPrompt;
2212
- const processedPrompt = this.contextStore.replaceAllReferences(taskPrompt, "action");
2213
- if (originalPrompt !== processedPrompt) {
2214
- debug4("Context replacement in aiAction:", {
2215
- original: originalPrompt,
2216
- processed: processedPrompt,
2217
- storedData: this.contextStore.getAllData()
2249
+ const storedData = this.contextStore.getAllData();
2250
+ if (Object.keys(storedData).length > 0) {
2251
+ debug4("Available data for aiAction:", {
2252
+ prompt: taskPrompt,
2253
+ availableData: storedData
2218
2254
  });
2219
2255
  }
2220
- this.contextStore.addStep({
2221
- type: "action",
2222
- summary: `Action: ${processedPrompt}`,
2223
- prompt: processedPrompt
2224
- });
2225
- debug4("Added action step to context store:", {
2226
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2227
- totalSteps: this.contextStore.getRecentSteps(100).length
2228
- });
2229
- taskPrompt = processedPrompt;
2230
2256
  } catch (error) {
2231
2257
  debug4("Context store operation failed:", error);
2232
2258
  }
2233
2259
  }
2234
2260
  const cacheable = opt?.cacheable;
2235
2261
  const isVlmUiTars = vlLocateMode() === "vlm-ui-tars";
2236
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2262
+ let contextData;
2263
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2264
+ try {
2265
+ contextData = this.contextStore.getAllData();
2266
+ if (contextData && Object.keys(contextData).length === 0) {
2267
+ contextData = void 0;
2268
+ }
2269
+ } catch (error) {
2270
+ debug4("Failed to get context data for cache:", error);
2271
+ }
2272
+ }
2273
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2237
2274
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2238
2275
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2239
2276
  taskPrompt,
@@ -2243,6 +2280,28 @@ var PageAgent = class {
2243
2280
  debug4("matched cache, will call .runYaml to run the action");
2244
2281
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2245
2282
  const result = await this.runYaml(yaml5);
2283
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2284
+ try {
2285
+ const executionResult = {
2286
+ success: true,
2287
+ actionType: "cached",
2288
+ description: `Executed cached action: ${processedPrompt}`,
2289
+ timing: result.metadata?.totalTime
2290
+ };
2291
+ this.contextStore.addStep({
2292
+ type: "action",
2293
+ summary: `Action: ${processedPrompt} (cached)`,
2294
+ prompt: processedPrompt,
2295
+ executionResult
2296
+ });
2297
+ debug4("Added cached action step to context store:", {
2298
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2299
+ totalSteps: this.contextStore.getRecentSteps(100).length
2300
+ });
2301
+ } catch (error) {
2302
+ debug4("Failed to add cached action step:", error);
2303
+ }
2304
+ }
2246
2305
  return {
2247
2306
  result: result.result,
2248
2307
  metadata: metadata2
@@ -2267,10 +2326,39 @@ var PageAgent = class {
2267
2326
  prompt: taskPrompt,
2268
2327
  yamlWorkflow: yamlFlowStr
2269
2328
  },
2270
- matchedCache
2329
+ matchedCache,
2330
+ contextData
2331
+ // Pass context data for cache creation
2271
2332
  );
2272
2333
  }
2273
2334
  const metadata = this.afterTaskRunning(executor);
2335
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2336
+ try {
2337
+ const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2338
+ this.contextStore.addStep({
2339
+ type: "action",
2340
+ summary: `Action: ${processedPrompt}`,
2341
+ prompt: processedPrompt,
2342
+ executionResult
2343
+ });
2344
+ debug4("Added action step with execution result to context store:", {
2345
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2346
+ totalSteps: this.contextStore.getRecentSteps(100).length,
2347
+ executionResult
2348
+ });
2349
+ } catch (error) {
2350
+ debug4("Failed to analyze execution results, adding step without execution result:", error);
2351
+ try {
2352
+ this.contextStore.addStep({
2353
+ type: "action",
2354
+ summary: `Action: ${processedPrompt}`,
2355
+ prompt: processedPrompt
2356
+ });
2357
+ } catch (stepError) {
2358
+ debug4("Failed to add action step:", stepError);
2359
+ }
2360
+ }
2361
+ }
2274
2362
  return {
2275
2363
  result: output,
2276
2364
  metadata
@@ -2455,23 +2543,40 @@ var PageAgent = class {
2455
2543
  };
2456
2544
  }
2457
2545
  async aiAssert(assertion, msg, opt) {
2458
- let processedAssertion = assertion;
2546
+ let executionContext = "";
2459
2547
  if (this.opts.enableCumulativeContext && this.contextStore) {
2460
2548
  try {
2461
- const originalAssertion = assertion;
2462
- processedAssertion = this.contextStore.replaceAllReferences(assertion, "assertion");
2463
- if (originalAssertion !== processedAssertion) {
2464
- debug4("Context replacement in aiAssert:", {
2465
- original: originalAssertion,
2466
- processed: processedAssertion,
2467
- context: "assertion",
2468
- storedData: this.contextStore.getAllData()
2549
+ const recentSteps = this.contextStore.getRecentSteps(3);
2550
+ const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2551
+ const storedData = this.contextStore.getAllData();
2552
+ if (stepsWithExecutionResults.length > 0) {
2553
+ const recentActions = stepsWithExecutionResults.map((step) => {
2554
+ const result = step.executionResult;
2555
+ return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2556
+ }).join("\n");
2557
+ executionContext = `
2558
+
2559
+ Recent actions performed:
2560
+ ${recentActions}
2561
+
2562
+ This context may help verify the assertion.`;
2563
+ }
2564
+ if (storedData && Object.keys(storedData).length > 0) {
2565
+ executionContext += `
2566
+
2567
+ Available data for reference:
2568
+ ${JSON.stringify(storedData, null, 2)}
2569
+
2570
+ Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2571
+ debug4("Available data for aiAssert:", {
2572
+ assertion,
2573
+ availableData: storedData
2469
2574
  });
2470
2575
  }
2471
2576
  this.contextStore.addStep({
2472
2577
  type: "assertion",
2473
- summary: `Assertion: ${processedAssertion}`,
2474
- prompt: processedAssertion
2578
+ summary: `Assertion: ${assertion}`,
2579
+ prompt: assertion
2475
2580
  });
2476
2581
  debug4("Added assertion step to context store:", {
2477
2582
  totalSteps: this.contextStore.getRecentSteps(100).length
@@ -2487,7 +2592,13 @@ var PageAgent = class {
2487
2592
  } catch (e) {
2488
2593
  }
2489
2594
  }
2490
- const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2595
+ let assertionWithContext = assertion;
2596
+ if (currentUrl) {
2597
+ assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2598
+ }
2599
+ if (executionContext) {
2600
+ assertionWithContext += executionContext;
2601
+ }
2491
2602
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2492
2603
  const metadata = this.afterTaskRunning(executor, true);
2493
2604
  if (output && opt?.keepRawResponse) {
@@ -2701,6 +2812,79 @@ ${errors}`);
2701
2812
  async destroy() {
2702
2813
  await this.page.destroy();
2703
2814
  }
2815
+ /**
2816
+ * Analyze execution results from executor to generate meaningful descriptions
2817
+ */
2818
+ analyzeExecutionResults(executor, originalPrompt) {
2819
+ const tasks = executor.tasks;
2820
+ const success = !executor.isInErrorState();
2821
+ if (!success) {
2822
+ const errorTask = executor.latestErrorTask();
2823
+ return {
2824
+ success: false,
2825
+ actionType: "error",
2826
+ description: `Failed to execute: ${originalPrompt}`,
2827
+ error: errorTask?.error
2828
+ };
2829
+ }
2830
+ const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2831
+ const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2832
+ const lastAction = actionTasks[actionTasks.length - 1];
2833
+ const lastLocate = locateTasks[locateTasks.length - 1];
2834
+ if (!lastAction) {
2835
+ return {
2836
+ success: true,
2837
+ actionType: "unknown",
2838
+ description: `Completed: ${originalPrompt}`
2839
+ };
2840
+ }
2841
+ const actionType = lastAction.subType || "unknown";
2842
+ const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2843
+ const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2844
+ return {
2845
+ success: true,
2846
+ actionType,
2847
+ description,
2848
+ elementInfo,
2849
+ timing: lastAction.timing?.cost
2850
+ };
2851
+ }
2852
+ /**
2853
+ * Extract element information from locate task
2854
+ */
2855
+ extractElementInfo(locateTask, _actionTask) {
2856
+ if (!locateTask?.output?.element)
2857
+ return void 0;
2858
+ const element = locateTask.output.element;
2859
+ return {
2860
+ type: element.attributes?.nodeType || "unknown",
2861
+ text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2862
+ location: `(${element.center[0]}, ${element.center[1]})`
2863
+ };
2864
+ }
2865
+ /**
2866
+ * Generate natural language description for actions
2867
+ */
2868
+ generateActionDescription(actionType, param, elementInfo) {
2869
+ const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2870
+ switch (actionType) {
2871
+ case "Tap":
2872
+ return `Clicked on ${elementDesc}`;
2873
+ case "Input":
2874
+ const inputValue = param?.value || "";
2875
+ return `Entered "${inputValue}" into ${elementDesc}`;
2876
+ case "KeyboardPress":
2877
+ return `Pressed ${param?.value || "key"}`;
2878
+ case "Scroll":
2879
+ return `Scrolled ${param?.direction || "on page"}`;
2880
+ case "Hover":
2881
+ return `Hovered over ${elementDesc}`;
2882
+ case "Drag":
2883
+ return `Dragged ${elementDesc}`;
2884
+ default:
2885
+ return `Performed ${actionType} action on ${elementDesc}`;
2886
+ }
2887
+ }
2704
2888
  };
2705
2889
 
2706
2890
  // src/puppeteer/index.ts