misoai-web 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +352 -352
  3. package/bin/midscene-playground +2 -2
  4. package/dist/es/agent.js +247 -63
  5. package/dist/es/agent.js.map +1 -1
  6. package/dist/es/bridge-mode-browser.js.map +1 -1
  7. package/dist/es/bridge-mode.js +247 -63
  8. package/dist/es/bridge-mode.js.map +1 -1
  9. package/dist/es/chrome-extension.js +247 -63
  10. package/dist/es/chrome-extension.js.map +1 -1
  11. package/dist/es/index.js +247 -63
  12. package/dist/es/index.js.map +1 -1
  13. package/dist/es/midscene-playground.js +247 -63
  14. package/dist/es/midscene-playground.js.map +1 -1
  15. package/dist/es/midscene-server.js.map +1 -1
  16. package/dist/es/playground.js +247 -63
  17. package/dist/es/playground.js.map +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +247 -63
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +247 -63
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +247 -63
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js.map +1 -1
  28. package/dist/lib/agent.js +247 -63
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/bridge-mode-browser.js.map +1 -1
  31. package/dist/lib/bridge-mode.js +247 -63
  32. package/dist/lib/bridge-mode.js.map +1 -1
  33. package/dist/lib/chrome-extension.js +247 -63
  34. package/dist/lib/chrome-extension.js.map +1 -1
  35. package/dist/lib/index.js +247 -63
  36. package/dist/lib/index.js.map +1 -1
  37. package/dist/lib/midscene-playground.js +247 -63
  38. package/dist/lib/midscene-playground.js.map +1 -1
  39. package/dist/lib/midscene-server.js.map +1 -1
  40. package/dist/lib/playground.js +247 -63
  41. package/dist/lib/playground.js.map +1 -1
  42. package/dist/lib/playwright-report.js.map +1 -1
  43. package/dist/lib/playwright.js +247 -63
  44. package/dist/lib/playwright.js.map +1 -1
  45. package/dist/lib/puppeteer-agent-launcher.js +247 -63
  46. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  47. package/dist/lib/puppeteer.js +247 -63
  48. package/dist/lib/puppeteer.js.map +1 -1
  49. package/dist/lib/ui-utils.js.map +1 -1
  50. package/dist/lib/utils.js.map +1 -1
  51. package/dist/lib/yaml.js.map +1 -1
  52. package/dist/types/agent.d.ts +19 -4
  53. package/dist/types/bridge-mode-browser.d.ts +2 -2
  54. package/dist/types/bridge-mode.d.ts +2 -2
  55. package/dist/types/{browser-a1877d18.d.ts → browser-aec1055d.d.ts} +1 -1
  56. package/dist/types/chrome-extension.d.ts +2 -2
  57. package/dist/types/index.d.ts +1 -1
  58. package/dist/types/midscene-server.d.ts +1 -1
  59. package/dist/types/{page-663ece08.d.ts → page-86ab0fe1.d.ts} +34 -34
  60. package/dist/types/playground.d.ts +2 -2
  61. package/dist/types/playwright.d.ts +1 -1
  62. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  63. package/dist/types/puppeteer.d.ts +1 -1
  64. package/dist/types/utils.d.ts +1 -1
  65. package/dist/types/yaml.d.ts +1 -1
  66. package/package.json +3 -3
@@ -1735,44 +1735,70 @@ var TaskCache = class {
1735
1735
  this.cache = cacheContent;
1736
1736
  this.cacheOriginalLength = this.cache.caches.length;
1737
1737
  }
1738
- matchCache(prompt, type) {
1738
+ matchCache(prompt, type, contextData) {
1739
+ const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1739
1740
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1740
1741
  const item = this.cache.caches[i];
1741
1742
  const key = `${type}:${prompt}:${i}`;
1742
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1743
- this.matchedCacheIndices.add(key);
1744
- debug3(
1745
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1746
- type,
1747
- prompt,
1748
- i
1749
- );
1750
- return {
1751
- cacheContent: item,
1752
- updateFn: (cb) => {
1753
- debug3(
1754
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1755
- type,
1756
- prompt,
1757
- i
1758
- );
1759
- cb(item);
1760
- debug3(
1761
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1762
- type,
1763
- prompt,
1764
- i
1765
- );
1766
- this.flushCacheToFile();
1743
+ if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1744
+ continue;
1745
+ }
1746
+ if (type === "plan" && item.type === "plan") {
1747
+ const planItem = item;
1748
+ if (contextHash && planItem.contextHash) {
1749
+ if (contextHash !== planItem.contextHash) {
1750
+ debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1751
+ continue;
1767
1752
  }
1768
- };
1753
+ } else if (contextHash || planItem.contextHash) {
1754
+ debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1755
+ continue;
1756
+ }
1769
1757
  }
1758
+ this.matchedCacheIndices.add(key);
1759
+ debug3(
1760
+ "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1761
+ type,
1762
+ prompt,
1763
+ i,
1764
+ contextHash ? "yes" : "no-context"
1765
+ );
1766
+ return {
1767
+ cacheContent: item,
1768
+ updateFn: (cb) => {
1769
+ debug3(
1770
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1771
+ type,
1772
+ prompt,
1773
+ i
1774
+ );
1775
+ cb(item);
1776
+ debug3(
1777
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1778
+ type,
1779
+ prompt,
1780
+ i
1781
+ );
1782
+ this.flushCacheToFile();
1783
+ }
1784
+ };
1770
1785
  }
1771
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1786
+ debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1772
1787
  return void 0;
1773
1788
  }
1774
- matchPlanCache(prompt) {
1775
- return this.matchCache(prompt, "plan");
1789
+ generateContextHash(contextData) {
1790
+ const sortedKeys = Object.keys(contextData).sort();
1791
+ const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1792
+ let hash = 0;
1793
+ for (let i = 0; i < stableString.length; i++) {
1794
+ const char = stableString.charCodeAt(i);
1795
+ hash = (hash << 5) - hash + char;
1796
+ hash = hash & hash;
1797
+ }
1798
+ return hash.toString(36);
1799
+ }
1800
+ matchPlanCache(prompt, contextData) {
1801
+ return this.matchCache(prompt, "plan", contextData);
1776
1802
  }
1777
1803
  matchLocateCache(prompt) {
1778
1804
  return this.matchCache(prompt, "locate");
@@ -1848,11 +1874,16 @@ cache file: ${cacheFile}`
1848
1874
  );
1849
1875
  }
1850
1876
  }
1851
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1877
+ updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1852
1878
  if (cachedRecord) {
1853
1879
  if (newRecord.type === "plan") {
1854
1880
  cachedRecord.updateFn((cache) => {
1855
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1881
+ const planCache = cache;
1882
+ planCache.yamlWorkflow = newRecord.yamlWorkflow;
1883
+ if (contextData) {
1884
+ planCache.contextHash = this.generateContextHash(contextData);
1885
+ planCache.contextData = { ...contextData };
1886
+ }
1856
1887
  });
1857
1888
  } else {
1858
1889
  cachedRecord.updateFn((cache) => {
@@ -1860,6 +1891,11 @@ cache file: ${cacheFile}`
1860
1891
  });
1861
1892
  }
1862
1893
  } else {
1894
+ if (newRecord.type === "plan" && contextData) {
1895
+ const planRecord = newRecord;
1896
+ planRecord.contextHash = this.generateContextHash(contextData);
1897
+ planRecord.contextData = { ...contextData };
1898
+ }
1863
1899
  this.appendCache(newRecord);
1864
1900
  }
1865
1901
  }
@@ -2220,34 +2256,35 @@ var PageAgent = class {
2220
2256
  };
2221
2257
  }
2222
2258
  async aiAction(taskPrompt, opt) {
2259
+ const originalPrompt = taskPrompt;
2260
+ let processedPrompt = taskPrompt;
2223
2261
  if (this.opts.enableCumulativeContext && this.contextStore) {
2224
2262
  try {
2225
- const originalPrompt = taskPrompt;
2226
- const processedPrompt = this.contextStore.replaceAllReferences(taskPrompt, "action");
2227
- if (originalPrompt !== processedPrompt) {
2228
- debug4("Context replacement in aiAction:", {
2229
- original: originalPrompt,
2230
- processed: processedPrompt,
2231
- storedData: this.contextStore.getAllData()
2263
+ const storedData = this.contextStore.getAllData();
2264
+ if (Object.keys(storedData).length > 0) {
2265
+ debug4("Available data for aiAction:", {
2266
+ prompt: taskPrompt,
2267
+ availableData: storedData
2232
2268
  });
2233
2269
  }
2234
- this.contextStore.addStep({
2235
- type: "action",
2236
- summary: `Action: ${processedPrompt}`,
2237
- prompt: processedPrompt
2238
- });
2239
- debug4("Added action step to context store:", {
2240
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2241
- totalSteps: this.contextStore.getRecentSteps(100).length
2242
- });
2243
- taskPrompt = processedPrompt;
2244
2270
  } catch (error) {
2245
2271
  debug4("Context store operation failed:", error);
2246
2272
  }
2247
2273
  }
2248
2274
  const cacheable = opt?.cacheable;
2249
2275
  const isVlmUiTars = vlLocateMode() === "vlm-ui-tars";
2250
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2276
+ let contextData;
2277
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2278
+ try {
2279
+ contextData = this.contextStore.getAllData();
2280
+ if (contextData && Object.keys(contextData).length === 0) {
2281
+ contextData = void 0;
2282
+ }
2283
+ } catch (error) {
2284
+ debug4("Failed to get context data for cache:", error);
2285
+ }
2286
+ }
2287
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2251
2288
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2252
2289
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2253
2290
  taskPrompt,
@@ -2257,6 +2294,28 @@ var PageAgent = class {
2257
2294
  debug4("matched cache, will call .runYaml to run the action");
2258
2295
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2259
2296
  const result = await this.runYaml(yaml5);
2297
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2298
+ try {
2299
+ const executionResult = {
2300
+ success: true,
2301
+ actionType: "cached",
2302
+ description: `Executed cached action: ${processedPrompt}`,
2303
+ timing: result.metadata?.totalTime
2304
+ };
2305
+ this.contextStore.addStep({
2306
+ type: "action",
2307
+ summary: `Action: ${processedPrompt} (cached)`,
2308
+ prompt: processedPrompt,
2309
+ executionResult
2310
+ });
2311
+ debug4("Added cached action step to context store:", {
2312
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2313
+ totalSteps: this.contextStore.getRecentSteps(100).length
2314
+ });
2315
+ } catch (error) {
2316
+ debug4("Failed to add cached action step:", error);
2317
+ }
2318
+ }
2260
2319
  return {
2261
2320
  result: result.result,
2262
2321
  metadata: metadata2
@@ -2281,10 +2340,39 @@ var PageAgent = class {
2281
2340
  prompt: taskPrompt,
2282
2341
  yamlWorkflow: yamlFlowStr
2283
2342
  },
2284
- matchedCache
2343
+ matchedCache,
2344
+ contextData
2345
+ // Pass context data for cache creation
2285
2346
  );
2286
2347
  }
2287
2348
  const metadata = this.afterTaskRunning(executor);
2349
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2350
+ try {
2351
+ const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2352
+ this.contextStore.addStep({
2353
+ type: "action",
2354
+ summary: `Action: ${processedPrompt}`,
2355
+ prompt: processedPrompt,
2356
+ executionResult
2357
+ });
2358
+ debug4("Added action step with execution result to context store:", {
2359
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2360
+ totalSteps: this.contextStore.getRecentSteps(100).length,
2361
+ executionResult
2362
+ });
2363
+ } catch (error) {
2364
+ debug4("Failed to analyze execution results, adding step without execution result:", error);
2365
+ try {
2366
+ this.contextStore.addStep({
2367
+ type: "action",
2368
+ summary: `Action: ${processedPrompt}`,
2369
+ prompt: processedPrompt
2370
+ });
2371
+ } catch (stepError) {
2372
+ debug4("Failed to add action step:", stepError);
2373
+ }
2374
+ }
2375
+ }
2288
2376
  return {
2289
2377
  result: output,
2290
2378
  metadata
@@ -2469,23 +2557,40 @@ var PageAgent = class {
2469
2557
  };
2470
2558
  }
2471
2559
  async aiAssert(assertion, msg, opt) {
2472
- let processedAssertion = assertion;
2560
+ let executionContext = "";
2473
2561
  if (this.opts.enableCumulativeContext && this.contextStore) {
2474
2562
  try {
2475
- const originalAssertion = assertion;
2476
- processedAssertion = this.contextStore.replaceAllReferences(assertion, "assertion");
2477
- if (originalAssertion !== processedAssertion) {
2478
- debug4("Context replacement in aiAssert:", {
2479
- original: originalAssertion,
2480
- processed: processedAssertion,
2481
- context: "assertion",
2482
- storedData: this.contextStore.getAllData()
2563
+ const recentSteps = this.contextStore.getRecentSteps(3);
2564
+ const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2565
+ const storedData = this.contextStore.getAllData();
2566
+ if (stepsWithExecutionResults.length > 0) {
2567
+ const recentActions = stepsWithExecutionResults.map((step) => {
2568
+ const result = step.executionResult;
2569
+ return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2570
+ }).join("\n");
2571
+ executionContext = `
2572
+
2573
+ Recent actions performed:
2574
+ ${recentActions}
2575
+
2576
+ This context may help verify the assertion.`;
2577
+ }
2578
+ if (storedData && Object.keys(storedData).length > 0) {
2579
+ executionContext += `
2580
+
2581
+ Available data for reference:
2582
+ ${JSON.stringify(storedData, null, 2)}
2583
+
2584
+ Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2585
+ debug4("Available data for aiAssert:", {
2586
+ assertion,
2587
+ availableData: storedData
2483
2588
  });
2484
2589
  }
2485
2590
  this.contextStore.addStep({
2486
2591
  type: "assertion",
2487
- summary: `Assertion: ${processedAssertion}`,
2488
- prompt: processedAssertion
2592
+ summary: `Assertion: ${assertion}`,
2593
+ prompt: assertion
2489
2594
  });
2490
2595
  debug4("Added assertion step to context store:", {
2491
2596
  totalSteps: this.contextStore.getRecentSteps(100).length
@@ -2501,7 +2606,13 @@ var PageAgent = class {
2501
2606
  } catch (e) {
2502
2607
  }
2503
2608
  }
2504
- const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2609
+ let assertionWithContext = assertion;
2610
+ if (currentUrl) {
2611
+ assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2612
+ }
2613
+ if (executionContext) {
2614
+ assertionWithContext += executionContext;
2615
+ }
2505
2616
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2506
2617
  const metadata = this.afterTaskRunning(executor, true);
2507
2618
  if (output && opt?.keepRawResponse) {
@@ -2715,6 +2826,79 @@ ${errors}`);
2715
2826
  async destroy() {
2716
2827
  await this.page.destroy();
2717
2828
  }
2829
+ /**
2830
+ * Analyze execution results from executor to generate meaningful descriptions
2831
+ */
2832
+ analyzeExecutionResults(executor, originalPrompt) {
2833
+ const tasks = executor.tasks;
2834
+ const success = !executor.isInErrorState();
2835
+ if (!success) {
2836
+ const errorTask = executor.latestErrorTask();
2837
+ return {
2838
+ success: false,
2839
+ actionType: "error",
2840
+ description: `Failed to execute: ${originalPrompt}`,
2841
+ error: errorTask?.error
2842
+ };
2843
+ }
2844
+ const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2845
+ const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2846
+ const lastAction = actionTasks[actionTasks.length - 1];
2847
+ const lastLocate = locateTasks[locateTasks.length - 1];
2848
+ if (!lastAction) {
2849
+ return {
2850
+ success: true,
2851
+ actionType: "unknown",
2852
+ description: `Completed: ${originalPrompt}`
2853
+ };
2854
+ }
2855
+ const actionType = lastAction.subType || "unknown";
2856
+ const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2857
+ const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2858
+ return {
2859
+ success: true,
2860
+ actionType,
2861
+ description,
2862
+ elementInfo,
2863
+ timing: lastAction.timing?.cost
2864
+ };
2865
+ }
2866
+ /**
2867
+ * Extract element information from locate task
2868
+ */
2869
+ extractElementInfo(locateTask, _actionTask) {
2870
+ if (!locateTask?.output?.element)
2871
+ return void 0;
2872
+ const element = locateTask.output.element;
2873
+ return {
2874
+ type: element.attributes?.nodeType || "unknown",
2875
+ text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2876
+ location: `(${element.center[0]}, ${element.center[1]})`
2877
+ };
2878
+ }
2879
+ /**
2880
+ * Generate natural language description for actions
2881
+ */
2882
+ generateActionDescription(actionType, param, elementInfo) {
2883
+ const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2884
+ switch (actionType) {
2885
+ case "Tap":
2886
+ return `Clicked on ${elementDesc}`;
2887
+ case "Input":
2888
+ const inputValue = param?.value || "";
2889
+ return `Entered "${inputValue}" into ${elementDesc}`;
2890
+ case "KeyboardPress":
2891
+ return `Pressed ${param?.value || "key"}`;
2892
+ case "Scroll":
2893
+ return `Scrolled ${param?.direction || "on page"}`;
2894
+ case "Hover":
2895
+ return `Hovered over ${elementDesc}`;
2896
+ case "Drag":
2897
+ return `Dragged ${elementDesc}`;
2898
+ default:
2899
+ return `Performed ${actionType} action on ${elementDesc}`;
2900
+ }
2901
+ }
2718
2902
  };
2719
2903
 
2720
2904
  // src/chrome-extension/agent.ts