misoai-web 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +352 -352
  3. package/bin/midscene-playground +2 -2
  4. package/dist/es/agent.js +247 -63
  5. package/dist/es/agent.js.map +1 -1
  6. package/dist/es/bridge-mode-browser.js.map +1 -1
  7. package/dist/es/bridge-mode.js +247 -63
  8. package/dist/es/bridge-mode.js.map +1 -1
  9. package/dist/es/chrome-extension.js +247 -63
  10. package/dist/es/chrome-extension.js.map +1 -1
  11. package/dist/es/index.js +247 -63
  12. package/dist/es/index.js.map +1 -1
  13. package/dist/es/midscene-playground.js +247 -63
  14. package/dist/es/midscene-playground.js.map +1 -1
  15. package/dist/es/midscene-server.js.map +1 -1
  16. package/dist/es/playground.js +247 -63
  17. package/dist/es/playground.js.map +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +247 -63
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +247 -63
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +247 -63
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js.map +1 -1
  28. package/dist/lib/agent.js +247 -63
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/bridge-mode-browser.js.map +1 -1
  31. package/dist/lib/bridge-mode.js +247 -63
  32. package/dist/lib/bridge-mode.js.map +1 -1
  33. package/dist/lib/chrome-extension.js +247 -63
  34. package/dist/lib/chrome-extension.js.map +1 -1
  35. package/dist/lib/index.js +247 -63
  36. package/dist/lib/index.js.map +1 -1
  37. package/dist/lib/midscene-playground.js +247 -63
  38. package/dist/lib/midscene-playground.js.map +1 -1
  39. package/dist/lib/midscene-server.js.map +1 -1
  40. package/dist/lib/playground.js +247 -63
  41. package/dist/lib/playground.js.map +1 -1
  42. package/dist/lib/playwright-report.js.map +1 -1
  43. package/dist/lib/playwright.js +247 -63
  44. package/dist/lib/playwright.js.map +1 -1
  45. package/dist/lib/puppeteer-agent-launcher.js +247 -63
  46. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  47. package/dist/lib/puppeteer.js +247 -63
  48. package/dist/lib/puppeteer.js.map +1 -1
  49. package/dist/lib/ui-utils.js.map +1 -1
  50. package/dist/lib/utils.js.map +1 -1
  51. package/dist/lib/yaml.js.map +1 -1
  52. package/dist/types/agent.d.ts +19 -4
  53. package/dist/types/bridge-mode-browser.d.ts +2 -2
  54. package/dist/types/bridge-mode.d.ts +2 -2
  55. package/dist/types/{browser-a1877d18.d.ts → browser-aec1055d.d.ts} +1 -1
  56. package/dist/types/chrome-extension.d.ts +2 -2
  57. package/dist/types/index.d.ts +1 -1
  58. package/dist/types/midscene-server.d.ts +1 -1
  59. package/dist/types/{page-663ece08.d.ts → page-86ab0fe1.d.ts} +34 -34
  60. package/dist/types/playground.d.ts +2 -2
  61. package/dist/types/playwright.d.ts +1 -1
  62. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  63. package/dist/types/puppeteer.d.ts +1 -1
  64. package/dist/types/utils.d.ts +1 -1
  65. package/dist/types/yaml.d.ts +1 -1
  66. package/package.json +3 -3
@@ -1689,44 +1689,70 @@ var TaskCache = class {
1689
1689
  this.cache = cacheContent;
1690
1690
  this.cacheOriginalLength = this.cache.caches.length;
1691
1691
  }
1692
- matchCache(prompt, type) {
1692
+ matchCache(prompt, type, contextData) {
1693
+ const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1693
1694
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1694
1695
  const item = this.cache.caches[i];
1695
1696
  const key = `${type}:${prompt}:${i}`;
1696
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1697
- this.matchedCacheIndices.add(key);
1698
- debug3(
1699
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1700
- type,
1701
- prompt,
1702
- i
1703
- );
1704
- return {
1705
- cacheContent: item,
1706
- updateFn: (cb) => {
1707
- debug3(
1708
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1709
- type,
1710
- prompt,
1711
- i
1712
- );
1713
- cb(item);
1714
- debug3(
1715
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1716
- type,
1717
- prompt,
1718
- i
1719
- );
1720
- this.flushCacheToFile();
1697
+ if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1698
+ continue;
1699
+ }
1700
+ if (type === "plan" && item.type === "plan") {
1701
+ const planItem = item;
1702
+ if (contextHash && planItem.contextHash) {
1703
+ if (contextHash !== planItem.contextHash) {
1704
+ debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1705
+ continue;
1721
1706
  }
1722
- };
1707
+ } else if (contextHash || planItem.contextHash) {
1708
+ debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1709
+ continue;
1710
+ }
1723
1711
  }
1712
+ this.matchedCacheIndices.add(key);
1713
+ debug3(
1714
+ "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1715
+ type,
1716
+ prompt,
1717
+ i,
1718
+ contextHash ? "yes" : "no-context"
1719
+ );
1720
+ return {
1721
+ cacheContent: item,
1722
+ updateFn: (cb) => {
1723
+ debug3(
1724
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1725
+ type,
1726
+ prompt,
1727
+ i
1728
+ );
1729
+ cb(item);
1730
+ debug3(
1731
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1732
+ type,
1733
+ prompt,
1734
+ i
1735
+ );
1736
+ this.flushCacheToFile();
1737
+ }
1738
+ };
1724
1739
  }
1725
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1740
+ debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1726
1741
  return void 0;
1727
1742
  }
1728
- matchPlanCache(prompt) {
1729
- return this.matchCache(prompt, "plan");
1743
+ generateContextHash(contextData) {
1744
+ const sortedKeys = Object.keys(contextData).sort();
1745
+ const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1746
+ let hash = 0;
1747
+ for (let i = 0; i < stableString.length; i++) {
1748
+ const char = stableString.charCodeAt(i);
1749
+ hash = (hash << 5) - hash + char;
1750
+ hash = hash & hash;
1751
+ }
1752
+ return hash.toString(36);
1753
+ }
1754
+ matchPlanCache(prompt, contextData) {
1755
+ return this.matchCache(prompt, "plan", contextData);
1730
1756
  }
1731
1757
  matchLocateCache(prompt) {
1732
1758
  return this.matchCache(prompt, "locate");
@@ -1802,11 +1828,16 @@ cache file: ${cacheFile}`
1802
1828
  );
1803
1829
  }
1804
1830
  }
1805
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1831
+ updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1806
1832
  if (cachedRecord) {
1807
1833
  if (newRecord.type === "plan") {
1808
1834
  cachedRecord.updateFn((cache) => {
1809
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1835
+ const planCache = cache;
1836
+ planCache.yamlWorkflow = newRecord.yamlWorkflow;
1837
+ if (contextData) {
1838
+ planCache.contextHash = this.generateContextHash(contextData);
1839
+ planCache.contextData = { ...contextData };
1840
+ }
1810
1841
  });
1811
1842
  } else {
1812
1843
  cachedRecord.updateFn((cache) => {
@@ -1814,6 +1845,11 @@ cache file: ${cacheFile}`
1814
1845
  });
1815
1846
  }
1816
1847
  } else {
1848
+ if (newRecord.type === "plan" && contextData) {
1849
+ const planRecord = newRecord;
1850
+ planRecord.contextHash = this.generateContextHash(contextData);
1851
+ planRecord.contextData = { ...contextData };
1852
+ }
1817
1853
  this.appendCache(newRecord);
1818
1854
  }
1819
1855
  }
@@ -2174,34 +2210,35 @@ var PageAgent = class {
2174
2210
  };
2175
2211
  }
2176
2212
  async aiAction(taskPrompt, opt) {
2213
+ const originalPrompt = taskPrompt;
2214
+ let processedPrompt = taskPrompt;
2177
2215
  if (this.opts.enableCumulativeContext && this.contextStore) {
2178
2216
  try {
2179
- const originalPrompt = taskPrompt;
2180
- const processedPrompt = this.contextStore.replaceAllReferences(taskPrompt, "action");
2181
- if (originalPrompt !== processedPrompt) {
2182
- debug4("Context replacement in aiAction:", {
2183
- original: originalPrompt,
2184
- processed: processedPrompt,
2185
- storedData: this.contextStore.getAllData()
2217
+ const storedData = this.contextStore.getAllData();
2218
+ if (Object.keys(storedData).length > 0) {
2219
+ debug4("Available data for aiAction:", {
2220
+ prompt: taskPrompt,
2221
+ availableData: storedData
2186
2222
  });
2187
2223
  }
2188
- this.contextStore.addStep({
2189
- type: "action",
2190
- summary: `Action: ${processedPrompt}`,
2191
- prompt: processedPrompt
2192
- });
2193
- debug4("Added action step to context store:", {
2194
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2195
- totalSteps: this.contextStore.getRecentSteps(100).length
2196
- });
2197
- taskPrompt = processedPrompt;
2198
2224
  } catch (error) {
2199
2225
  debug4("Context store operation failed:", error);
2200
2226
  }
2201
2227
  }
2202
2228
  const cacheable = opt?.cacheable;
2203
2229
  const isVlmUiTars = vlLocateMode() === "vlm-ui-tars";
2204
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2230
+ let contextData;
2231
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2232
+ try {
2233
+ contextData = this.contextStore.getAllData();
2234
+ if (contextData && Object.keys(contextData).length === 0) {
2235
+ contextData = void 0;
2236
+ }
2237
+ } catch (error) {
2238
+ debug4("Failed to get context data for cache:", error);
2239
+ }
2240
+ }
2241
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2205
2242
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2206
2243
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2207
2244
  taskPrompt,
@@ -2211,6 +2248,28 @@ var PageAgent = class {
2211
2248
  debug4("matched cache, will call .runYaml to run the action");
2212
2249
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2213
2250
  const result = await this.runYaml(yaml5);
2251
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2252
+ try {
2253
+ const executionResult = {
2254
+ success: true,
2255
+ actionType: "cached",
2256
+ description: `Executed cached action: ${processedPrompt}`,
2257
+ timing: result.metadata?.totalTime
2258
+ };
2259
+ this.contextStore.addStep({
2260
+ type: "action",
2261
+ summary: `Action: ${processedPrompt} (cached)`,
2262
+ prompt: processedPrompt,
2263
+ executionResult
2264
+ });
2265
+ debug4("Added cached action step to context store:", {
2266
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2267
+ totalSteps: this.contextStore.getRecentSteps(100).length
2268
+ });
2269
+ } catch (error) {
2270
+ debug4("Failed to add cached action step:", error);
2271
+ }
2272
+ }
2214
2273
  return {
2215
2274
  result: result.result,
2216
2275
  metadata: metadata2
@@ -2235,10 +2294,39 @@ var PageAgent = class {
2235
2294
  prompt: taskPrompt,
2236
2295
  yamlWorkflow: yamlFlowStr
2237
2296
  },
2238
- matchedCache
2297
+ matchedCache,
2298
+ contextData
2299
+ // Pass context data for cache creation
2239
2300
  );
2240
2301
  }
2241
2302
  const metadata = this.afterTaskRunning(executor);
2303
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2304
+ try {
2305
+ const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2306
+ this.contextStore.addStep({
2307
+ type: "action",
2308
+ summary: `Action: ${processedPrompt}`,
2309
+ prompt: processedPrompt,
2310
+ executionResult
2311
+ });
2312
+ debug4("Added action step with execution result to context store:", {
2313
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2314
+ totalSteps: this.contextStore.getRecentSteps(100).length,
2315
+ executionResult
2316
+ });
2317
+ } catch (error) {
2318
+ debug4("Failed to analyze execution results, adding step without execution result:", error);
2319
+ try {
2320
+ this.contextStore.addStep({
2321
+ type: "action",
2322
+ summary: `Action: ${processedPrompt}`,
2323
+ prompt: processedPrompt
2324
+ });
2325
+ } catch (stepError) {
2326
+ debug4("Failed to add action step:", stepError);
2327
+ }
2328
+ }
2329
+ }
2242
2330
  return {
2243
2331
  result: output,
2244
2332
  metadata
@@ -2423,23 +2511,40 @@ var PageAgent = class {
2423
2511
  };
2424
2512
  }
2425
2513
  async aiAssert(assertion, msg, opt) {
2426
- let processedAssertion = assertion;
2514
+ let executionContext = "";
2427
2515
  if (this.opts.enableCumulativeContext && this.contextStore) {
2428
2516
  try {
2429
- const originalAssertion = assertion;
2430
- processedAssertion = this.contextStore.replaceAllReferences(assertion, "assertion");
2431
- if (originalAssertion !== processedAssertion) {
2432
- debug4("Context replacement in aiAssert:", {
2433
- original: originalAssertion,
2434
- processed: processedAssertion,
2435
- context: "assertion",
2436
- storedData: this.contextStore.getAllData()
2517
+ const recentSteps = this.contextStore.getRecentSteps(3);
2518
+ const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2519
+ const storedData = this.contextStore.getAllData();
2520
+ if (stepsWithExecutionResults.length > 0) {
2521
+ const recentActions = stepsWithExecutionResults.map((step) => {
2522
+ const result = step.executionResult;
2523
+ return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2524
+ }).join("\n");
2525
+ executionContext = `
2526
+
2527
+ Recent actions performed:
2528
+ ${recentActions}
2529
+
2530
+ This context may help verify the assertion.`;
2531
+ }
2532
+ if (storedData && Object.keys(storedData).length > 0) {
2533
+ executionContext += `
2534
+
2535
+ Available data for reference:
2536
+ ${JSON.stringify(storedData, null, 2)}
2537
+
2538
+ Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2539
+ debug4("Available data for aiAssert:", {
2540
+ assertion,
2541
+ availableData: storedData
2437
2542
  });
2438
2543
  }
2439
2544
  this.contextStore.addStep({
2440
2545
  type: "assertion",
2441
- summary: `Assertion: ${processedAssertion}`,
2442
- prompt: processedAssertion
2546
+ summary: `Assertion: ${assertion}`,
2547
+ prompt: assertion
2443
2548
  });
2444
2549
  debug4("Added assertion step to context store:", {
2445
2550
  totalSteps: this.contextStore.getRecentSteps(100).length
@@ -2455,7 +2560,13 @@ var PageAgent = class {
2455
2560
  } catch (e) {
2456
2561
  }
2457
2562
  }
2458
- const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2563
+ let assertionWithContext = assertion;
2564
+ if (currentUrl) {
2565
+ assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2566
+ }
2567
+ if (executionContext) {
2568
+ assertionWithContext += executionContext;
2569
+ }
2459
2570
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2460
2571
  const metadata = this.afterTaskRunning(executor, true);
2461
2572
  if (output && opt?.keepRawResponse) {
@@ -2669,6 +2780,79 @@ ${errors}`);
2669
2780
  async destroy() {
2670
2781
  await this.page.destroy();
2671
2782
  }
2783
+ /**
2784
+ * Analyze execution results from executor to generate meaningful descriptions
2785
+ */
2786
+ analyzeExecutionResults(executor, originalPrompt) {
2787
+ const tasks = executor.tasks;
2788
+ const success = !executor.isInErrorState();
2789
+ if (!success) {
2790
+ const errorTask = executor.latestErrorTask();
2791
+ return {
2792
+ success: false,
2793
+ actionType: "error",
2794
+ description: `Failed to execute: ${originalPrompt}`,
2795
+ error: errorTask?.error
2796
+ };
2797
+ }
2798
+ const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2799
+ const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2800
+ const lastAction = actionTasks[actionTasks.length - 1];
2801
+ const lastLocate = locateTasks[locateTasks.length - 1];
2802
+ if (!lastAction) {
2803
+ return {
2804
+ success: true,
2805
+ actionType: "unknown",
2806
+ description: `Completed: ${originalPrompt}`
2807
+ };
2808
+ }
2809
+ const actionType = lastAction.subType || "unknown";
2810
+ const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2811
+ const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2812
+ return {
2813
+ success: true,
2814
+ actionType,
2815
+ description,
2816
+ elementInfo,
2817
+ timing: lastAction.timing?.cost
2818
+ };
2819
+ }
2820
+ /**
2821
+ * Extract element information from locate task
2822
+ */
2823
+ extractElementInfo(locateTask, _actionTask) {
2824
+ if (!locateTask?.output?.element)
2825
+ return void 0;
2826
+ const element = locateTask.output.element;
2827
+ return {
2828
+ type: element.attributes?.nodeType || "unknown",
2829
+ text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2830
+ location: `(${element.center[0]}, ${element.center[1]})`
2831
+ };
2832
+ }
2833
+ /**
2834
+ * Generate natural language description for actions
2835
+ */
2836
+ generateActionDescription(actionType, param, elementInfo) {
2837
+ const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2838
+ switch (actionType) {
2839
+ case "Tap":
2840
+ return `Clicked on ${elementDesc}`;
2841
+ case "Input":
2842
+ const inputValue = param?.value || "";
2843
+ return `Entered "${inputValue}" into ${elementDesc}`;
2844
+ case "KeyboardPress":
2845
+ return `Pressed ${param?.value || "key"}`;
2846
+ case "Scroll":
2847
+ return `Scrolled ${param?.direction || "on page"}`;
2848
+ case "Hover":
2849
+ return `Hovered over ${elementDesc}`;
2850
+ case "Drag":
2851
+ return `Dragged ${elementDesc}`;
2852
+ default:
2853
+ return `Performed ${actionType} action on ${elementDesc}`;
2854
+ }
2855
+ }
2672
2856
  };
2673
2857
 
2674
2858
  // src/playground/agent.ts