misoai-web 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +352 -352
  3. package/bin/midscene-playground +2 -2
  4. package/dist/es/agent.js +247 -63
  5. package/dist/es/agent.js.map +1 -1
  6. package/dist/es/bridge-mode-browser.js.map +1 -1
  7. package/dist/es/bridge-mode.js +247 -63
  8. package/dist/es/bridge-mode.js.map +1 -1
  9. package/dist/es/chrome-extension.js +247 -63
  10. package/dist/es/chrome-extension.js.map +1 -1
  11. package/dist/es/index.js +247 -63
  12. package/dist/es/index.js.map +1 -1
  13. package/dist/es/midscene-playground.js +247 -63
  14. package/dist/es/midscene-playground.js.map +1 -1
  15. package/dist/es/midscene-server.js.map +1 -1
  16. package/dist/es/playground.js +247 -63
  17. package/dist/es/playground.js.map +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +247 -63
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +247 -63
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +247 -63
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js.map +1 -1
  28. package/dist/lib/agent.js +247 -63
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/bridge-mode-browser.js.map +1 -1
  31. package/dist/lib/bridge-mode.js +247 -63
  32. package/dist/lib/bridge-mode.js.map +1 -1
  33. package/dist/lib/chrome-extension.js +247 -63
  34. package/dist/lib/chrome-extension.js.map +1 -1
  35. package/dist/lib/index.js +247 -63
  36. package/dist/lib/index.js.map +1 -1
  37. package/dist/lib/midscene-playground.js +247 -63
  38. package/dist/lib/midscene-playground.js.map +1 -1
  39. package/dist/lib/midscene-server.js.map +1 -1
  40. package/dist/lib/playground.js +247 -63
  41. package/dist/lib/playground.js.map +1 -1
  42. package/dist/lib/playwright-report.js.map +1 -1
  43. package/dist/lib/playwright.js +247 -63
  44. package/dist/lib/playwright.js.map +1 -1
  45. package/dist/lib/puppeteer-agent-launcher.js +247 -63
  46. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  47. package/dist/lib/puppeteer.js +247 -63
  48. package/dist/lib/puppeteer.js.map +1 -1
  49. package/dist/lib/ui-utils.js.map +1 -1
  50. package/dist/lib/utils.js.map +1 -1
  51. package/dist/lib/yaml.js.map +1 -1
  52. package/dist/types/agent.d.ts +19 -4
  53. package/dist/types/bridge-mode-browser.d.ts +2 -2
  54. package/dist/types/bridge-mode.d.ts +2 -2
  55. package/dist/types/{browser-a1877d18.d.ts → browser-aec1055d.d.ts} +1 -1
  56. package/dist/types/chrome-extension.d.ts +2 -2
  57. package/dist/types/index.d.ts +1 -1
  58. package/dist/types/midscene-server.d.ts +1 -1
  59. package/dist/types/{page-663ece08.d.ts → page-86ab0fe1.d.ts} +34 -34
  60. package/dist/types/playground.d.ts +2 -2
  61. package/dist/types/playwright.d.ts +1 -1
  62. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  63. package/dist/types/puppeteer.d.ts +1 -1
  64. package/dist/types/utils.d.ts +1 -1
  65. package/dist/types/yaml.d.ts +1 -1
  66. package/package.json +3 -3
@@ -1734,44 +1734,70 @@ var TaskCache = class {
1734
1734
  this.cache = cacheContent;
1735
1735
  this.cacheOriginalLength = this.cache.caches.length;
1736
1736
  }
1737
- matchCache(prompt, type) {
1737
+ matchCache(prompt, type, contextData) {
1738
+ const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1738
1739
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1739
1740
  const item = this.cache.caches[i];
1740
1741
  const key = `${type}:${prompt}:${i}`;
1741
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1742
- this.matchedCacheIndices.add(key);
1743
- debug3(
1744
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1745
- type,
1746
- prompt,
1747
- i
1748
- );
1749
- return {
1750
- cacheContent: item,
1751
- updateFn: (cb) => {
1752
- debug3(
1753
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1754
- type,
1755
- prompt,
1756
- i
1757
- );
1758
- cb(item);
1759
- debug3(
1760
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1761
- type,
1762
- prompt,
1763
- i
1764
- );
1765
- this.flushCacheToFile();
1742
+ if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1743
+ continue;
1744
+ }
1745
+ if (type === "plan" && item.type === "plan") {
1746
+ const planItem = item;
1747
+ if (contextHash && planItem.contextHash) {
1748
+ if (contextHash !== planItem.contextHash) {
1749
+ debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1750
+ continue;
1766
1751
  }
1767
- };
1752
+ } else if (contextHash || planItem.contextHash) {
1753
+ debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1754
+ continue;
1755
+ }
1768
1756
  }
1757
+ this.matchedCacheIndices.add(key);
1758
+ debug3(
1759
+ "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1760
+ type,
1761
+ prompt,
1762
+ i,
1763
+ contextHash ? "yes" : "no-context"
1764
+ );
1765
+ return {
1766
+ cacheContent: item,
1767
+ updateFn: (cb) => {
1768
+ debug3(
1769
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1770
+ type,
1771
+ prompt,
1772
+ i
1773
+ );
1774
+ cb(item);
1775
+ debug3(
1776
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1777
+ type,
1778
+ prompt,
1779
+ i
1780
+ );
1781
+ this.flushCacheToFile();
1782
+ }
1783
+ };
1769
1784
  }
1770
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1785
+ debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1771
1786
  return void 0;
1772
1787
  }
1773
- matchPlanCache(prompt) {
1774
- return this.matchCache(prompt, "plan");
1788
+ generateContextHash(contextData) {
1789
+ const sortedKeys = Object.keys(contextData).sort();
1790
+ const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1791
+ let hash = 0;
1792
+ for (let i = 0; i < stableString.length; i++) {
1793
+ const char = stableString.charCodeAt(i);
1794
+ hash = (hash << 5) - hash + char;
1795
+ hash = hash & hash;
1796
+ }
1797
+ return hash.toString(36);
1798
+ }
1799
+ matchPlanCache(prompt, contextData) {
1800
+ return this.matchCache(prompt, "plan", contextData);
1775
1801
  }
1776
1802
  matchLocateCache(prompt) {
1777
1803
  return this.matchCache(prompt, "locate");
@@ -1847,11 +1873,16 @@ cache file: ${cacheFile}`
1847
1873
  );
1848
1874
  }
1849
1875
  }
1850
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1876
+ updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1851
1877
  if (cachedRecord) {
1852
1878
  if (newRecord.type === "plan") {
1853
1879
  cachedRecord.updateFn((cache) => {
1854
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1880
+ const planCache = cache;
1881
+ planCache.yamlWorkflow = newRecord.yamlWorkflow;
1882
+ if (contextData) {
1883
+ planCache.contextHash = this.generateContextHash(contextData);
1884
+ planCache.contextData = { ...contextData };
1885
+ }
1855
1886
  });
1856
1887
  } else {
1857
1888
  cachedRecord.updateFn((cache) => {
@@ -1859,6 +1890,11 @@ cache file: ${cacheFile}`
1859
1890
  });
1860
1891
  }
1861
1892
  } else {
1893
+ if (newRecord.type === "plan" && contextData) {
1894
+ const planRecord = newRecord;
1895
+ planRecord.contextHash = this.generateContextHash(contextData);
1896
+ planRecord.contextData = { ...contextData };
1897
+ }
1862
1898
  this.appendCache(newRecord);
1863
1899
  }
1864
1900
  }
@@ -2219,34 +2255,35 @@ var PageAgent = class {
2219
2255
  };
2220
2256
  }
2221
2257
  async aiAction(taskPrompt, opt) {
2258
+ const originalPrompt = taskPrompt;
2259
+ let processedPrompt = taskPrompt;
2222
2260
  if (this.opts.enableCumulativeContext && this.contextStore) {
2223
2261
  try {
2224
- const originalPrompt = taskPrompt;
2225
- const processedPrompt = this.contextStore.replaceAllReferences(taskPrompt, "action");
2226
- if (originalPrompt !== processedPrompt) {
2227
- debug4("Context replacement in aiAction:", {
2228
- original: originalPrompt,
2229
- processed: processedPrompt,
2230
- storedData: this.contextStore.getAllData()
2262
+ const storedData = this.contextStore.getAllData();
2263
+ if (Object.keys(storedData).length > 0) {
2264
+ debug4("Available data for aiAction:", {
2265
+ prompt: taskPrompt,
2266
+ availableData: storedData
2231
2267
  });
2232
2268
  }
2233
- this.contextStore.addStep({
2234
- type: "action",
2235
- summary: `Action: ${processedPrompt}`,
2236
- prompt: processedPrompt
2237
- });
2238
- debug4("Added action step to context store:", {
2239
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2240
- totalSteps: this.contextStore.getRecentSteps(100).length
2241
- });
2242
- taskPrompt = processedPrompt;
2243
2269
  } catch (error) {
2244
2270
  debug4("Context store operation failed:", error);
2245
2271
  }
2246
2272
  }
2247
2273
  const cacheable = opt?.cacheable;
2248
2274
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2249
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2275
+ let contextData;
2276
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2277
+ try {
2278
+ contextData = this.contextStore.getAllData();
2279
+ if (contextData && Object.keys(contextData).length === 0) {
2280
+ contextData = void 0;
2281
+ }
2282
+ } catch (error) {
2283
+ debug4("Failed to get context data for cache:", error);
2284
+ }
2285
+ }
2286
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2250
2287
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2251
2288
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2252
2289
  taskPrompt,
@@ -2256,6 +2293,28 @@ var PageAgent = class {
2256
2293
  debug4("matched cache, will call .runYaml to run the action");
2257
2294
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2258
2295
  const result = await this.runYaml(yaml5);
2296
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2297
+ try {
2298
+ const executionResult = {
2299
+ success: true,
2300
+ actionType: "cached",
2301
+ description: `Executed cached action: ${processedPrompt}`,
2302
+ timing: result.metadata?.totalTime
2303
+ };
2304
+ this.contextStore.addStep({
2305
+ type: "action",
2306
+ summary: `Action: ${processedPrompt} (cached)`,
2307
+ prompt: processedPrompt,
2308
+ executionResult
2309
+ });
2310
+ debug4("Added cached action step to context store:", {
2311
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2312
+ totalSteps: this.contextStore.getRecentSteps(100).length
2313
+ });
2314
+ } catch (error) {
2315
+ debug4("Failed to add cached action step:", error);
2316
+ }
2317
+ }
2259
2318
  return {
2260
2319
  result: result.result,
2261
2320
  metadata: metadata2
@@ -2280,10 +2339,39 @@ var PageAgent = class {
2280
2339
  prompt: taskPrompt,
2281
2340
  yamlWorkflow: yamlFlowStr
2282
2341
  },
2283
- matchedCache
2342
+ matchedCache,
2343
+ contextData
2344
+ // Pass context data for cache creation
2284
2345
  );
2285
2346
  }
2286
2347
  const metadata = this.afterTaskRunning(executor);
2348
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2349
+ try {
2350
+ const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2351
+ this.contextStore.addStep({
2352
+ type: "action",
2353
+ summary: `Action: ${processedPrompt}`,
2354
+ prompt: processedPrompt,
2355
+ executionResult
2356
+ });
2357
+ debug4("Added action step with execution result to context store:", {
2358
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2359
+ totalSteps: this.contextStore.getRecentSteps(100).length,
2360
+ executionResult
2361
+ });
2362
+ } catch (error) {
2363
+ debug4("Failed to analyze execution results, adding step without execution result:", error);
2364
+ try {
2365
+ this.contextStore.addStep({
2366
+ type: "action",
2367
+ summary: `Action: ${processedPrompt}`,
2368
+ prompt: processedPrompt
2369
+ });
2370
+ } catch (stepError) {
2371
+ debug4("Failed to add action step:", stepError);
2372
+ }
2373
+ }
2374
+ }
2287
2375
  return {
2288
2376
  result: output,
2289
2377
  metadata
@@ -2468,23 +2556,40 @@ var PageAgent = class {
2468
2556
  };
2469
2557
  }
2470
2558
  async aiAssert(assertion, msg, opt) {
2471
- let processedAssertion = assertion;
2559
+ let executionContext = "";
2472
2560
  if (this.opts.enableCumulativeContext && this.contextStore) {
2473
2561
  try {
2474
- const originalAssertion = assertion;
2475
- processedAssertion = this.contextStore.replaceAllReferences(assertion, "assertion");
2476
- if (originalAssertion !== processedAssertion) {
2477
- debug4("Context replacement in aiAssert:", {
2478
- original: originalAssertion,
2479
- processed: processedAssertion,
2480
- context: "assertion",
2481
- storedData: this.contextStore.getAllData()
2562
+ const recentSteps = this.contextStore.getRecentSteps(3);
2563
+ const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2564
+ const storedData = this.contextStore.getAllData();
2565
+ if (stepsWithExecutionResults.length > 0) {
2566
+ const recentActions = stepsWithExecutionResults.map((step) => {
2567
+ const result = step.executionResult;
2568
+ return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2569
+ }).join("\n");
2570
+ executionContext = `
2571
+
2572
+ Recent actions performed:
2573
+ ${recentActions}
2574
+
2575
+ This context may help verify the assertion.`;
2576
+ }
2577
+ if (storedData && Object.keys(storedData).length > 0) {
2578
+ executionContext += `
2579
+
2580
+ Available data for reference:
2581
+ ${JSON.stringify(storedData, null, 2)}
2582
+
2583
+ Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2584
+ debug4("Available data for aiAssert:", {
2585
+ assertion,
2586
+ availableData: storedData
2482
2587
  });
2483
2588
  }
2484
2589
  this.contextStore.addStep({
2485
2590
  type: "assertion",
2486
- summary: `Assertion: ${processedAssertion}`,
2487
- prompt: processedAssertion
2591
+ summary: `Assertion: ${assertion}`,
2592
+ prompt: assertion
2488
2593
  });
2489
2594
  debug4("Added assertion step to context store:", {
2490
2595
  totalSteps: this.contextStore.getRecentSteps(100).length
@@ -2500,7 +2605,13 @@ var PageAgent = class {
2500
2605
  } catch (e) {
2501
2606
  }
2502
2607
  }
2503
- const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2608
+ let assertionWithContext = assertion;
2609
+ if (currentUrl) {
2610
+ assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2611
+ }
2612
+ if (executionContext) {
2613
+ assertionWithContext += executionContext;
2614
+ }
2504
2615
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2505
2616
  const metadata = this.afterTaskRunning(executor, true);
2506
2617
  if (output && opt?.keepRawResponse) {
@@ -2714,6 +2825,79 @@ ${errors}`);
2714
2825
  async destroy() {
2715
2826
  await this.page.destroy();
2716
2827
  }
2828
+ /**
2829
+ * Analyze execution results from executor to generate meaningful descriptions
2830
+ */
2831
+ analyzeExecutionResults(executor, originalPrompt) {
2832
+ const tasks = executor.tasks;
2833
+ const success = !executor.isInErrorState();
2834
+ if (!success) {
2835
+ const errorTask = executor.latestErrorTask();
2836
+ return {
2837
+ success: false,
2838
+ actionType: "error",
2839
+ description: `Failed to execute: ${originalPrompt}`,
2840
+ error: errorTask?.error
2841
+ };
2842
+ }
2843
+ const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2844
+ const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2845
+ const lastAction = actionTasks[actionTasks.length - 1];
2846
+ const lastLocate = locateTasks[locateTasks.length - 1];
2847
+ if (!lastAction) {
2848
+ return {
2849
+ success: true,
2850
+ actionType: "unknown",
2851
+ description: `Completed: ${originalPrompt}`
2852
+ };
2853
+ }
2854
+ const actionType = lastAction.subType || "unknown";
2855
+ const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2856
+ const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2857
+ return {
2858
+ success: true,
2859
+ actionType,
2860
+ description,
2861
+ elementInfo,
2862
+ timing: lastAction.timing?.cost
2863
+ };
2864
+ }
2865
+ /**
2866
+ * Extract element information from locate task
2867
+ */
2868
+ extractElementInfo(locateTask, _actionTask) {
2869
+ if (!locateTask?.output?.element)
2870
+ return void 0;
2871
+ const element = locateTask.output.element;
2872
+ return {
2873
+ type: element.attributes?.nodeType || "unknown",
2874
+ text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2875
+ location: `(${element.center[0]}, ${element.center[1]})`
2876
+ };
2877
+ }
2878
+ /**
2879
+ * Generate natural language description for actions
2880
+ */
2881
+ generateActionDescription(actionType, param, elementInfo) {
2882
+ const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2883
+ switch (actionType) {
2884
+ case "Tap":
2885
+ return `Clicked on ${elementDesc}`;
2886
+ case "Input":
2887
+ const inputValue = param?.value || "";
2888
+ return `Entered "${inputValue}" into ${elementDesc}`;
2889
+ case "KeyboardPress":
2890
+ return `Pressed ${param?.value || "key"}`;
2891
+ case "Scroll":
2892
+ return `Scrolled ${param?.direction || "on page"}`;
2893
+ case "Hover":
2894
+ return `Hovered over ${elementDesc}`;
2895
+ case "Drag":
2896
+ return `Dragged ${elementDesc}`;
2897
+ default:
2898
+ return `Performed ${actionType} action on ${elementDesc}`;
2899
+ }
2900
+ }
2717
2901
  };
2718
2902
 
2719
2903
  // src/puppeteer/index.ts