misoai-web 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +352 -352
  3. package/bin/midscene-playground +2 -2
  4. package/dist/es/agent.js +247 -63
  5. package/dist/es/agent.js.map +1 -1
  6. package/dist/es/bridge-mode-browser.js.map +1 -1
  7. package/dist/es/bridge-mode.js +247 -63
  8. package/dist/es/bridge-mode.js.map +1 -1
  9. package/dist/es/chrome-extension.js +247 -63
  10. package/dist/es/chrome-extension.js.map +1 -1
  11. package/dist/es/index.js +247 -63
  12. package/dist/es/index.js.map +1 -1
  13. package/dist/es/midscene-playground.js +247 -63
  14. package/dist/es/midscene-playground.js.map +1 -1
  15. package/dist/es/midscene-server.js.map +1 -1
  16. package/dist/es/playground.js +247 -63
  17. package/dist/es/playground.js.map +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +247 -63
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +247 -63
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +247 -63
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js.map +1 -1
  28. package/dist/lib/agent.js +247 -63
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/bridge-mode-browser.js.map +1 -1
  31. package/dist/lib/bridge-mode.js +247 -63
  32. package/dist/lib/bridge-mode.js.map +1 -1
  33. package/dist/lib/chrome-extension.js +247 -63
  34. package/dist/lib/chrome-extension.js.map +1 -1
  35. package/dist/lib/index.js +247 -63
  36. package/dist/lib/index.js.map +1 -1
  37. package/dist/lib/midscene-playground.js +247 -63
  38. package/dist/lib/midscene-playground.js.map +1 -1
  39. package/dist/lib/midscene-server.js.map +1 -1
  40. package/dist/lib/playground.js +247 -63
  41. package/dist/lib/playground.js.map +1 -1
  42. package/dist/lib/playwright-report.js.map +1 -1
  43. package/dist/lib/playwright.js +247 -63
  44. package/dist/lib/playwright.js.map +1 -1
  45. package/dist/lib/puppeteer-agent-launcher.js +247 -63
  46. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  47. package/dist/lib/puppeteer.js +247 -63
  48. package/dist/lib/puppeteer.js.map +1 -1
  49. package/dist/lib/ui-utils.js.map +1 -1
  50. package/dist/lib/utils.js.map +1 -1
  51. package/dist/lib/yaml.js.map +1 -1
  52. package/dist/types/agent.d.ts +19 -4
  53. package/dist/types/bridge-mode-browser.d.ts +2 -2
  54. package/dist/types/bridge-mode.d.ts +2 -2
  55. package/dist/types/{browser-a1877d18.d.ts → browser-aec1055d.d.ts} +1 -1
  56. package/dist/types/chrome-extension.d.ts +2 -2
  57. package/dist/types/index.d.ts +1 -1
  58. package/dist/types/midscene-server.d.ts +1 -1
  59. package/dist/types/{page-663ece08.d.ts → page-86ab0fe1.d.ts} +34 -34
  60. package/dist/types/playground.d.ts +2 -2
  61. package/dist/types/playwright.d.ts +1 -1
  62. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  63. package/dist/types/puppeteer.d.ts +1 -1
  64. package/dist/types/utils.d.ts +1 -1
  65. package/dist/types/yaml.d.ts +1 -1
  66. package/package.json +3 -3
@@ -1693,44 +1693,70 @@ var TaskCache = class {
1693
1693
  this.cache = cacheContent;
1694
1694
  this.cacheOriginalLength = this.cache.caches.length;
1695
1695
  }
1696
- matchCache(prompt, type) {
1696
+ matchCache(prompt, type, contextData) {
1697
+ const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1697
1698
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1698
1699
  const item = this.cache.caches[i];
1699
1700
  const key = `${type}:${prompt}:${i}`;
1700
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1701
- this.matchedCacheIndices.add(key);
1702
- debug3(
1703
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1704
- type,
1705
- prompt,
1706
- i
1707
- );
1708
- return {
1709
- cacheContent: item,
1710
- updateFn: (cb) => {
1711
- debug3(
1712
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1713
- type,
1714
- prompt,
1715
- i
1716
- );
1717
- cb(item);
1718
- debug3(
1719
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1720
- type,
1721
- prompt,
1722
- i
1723
- );
1724
- this.flushCacheToFile();
1701
+ if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1702
+ continue;
1703
+ }
1704
+ if (type === "plan" && item.type === "plan") {
1705
+ const planItem = item;
1706
+ if (contextHash && planItem.contextHash) {
1707
+ if (contextHash !== planItem.contextHash) {
1708
+ debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1709
+ continue;
1725
1710
  }
1726
- };
1711
+ } else if (contextHash || planItem.contextHash) {
1712
+ debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1713
+ continue;
1714
+ }
1727
1715
  }
1716
+ this.matchedCacheIndices.add(key);
1717
+ debug3(
1718
+ "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1719
+ type,
1720
+ prompt,
1721
+ i,
1722
+ contextHash ? "yes" : "no-context"
1723
+ );
1724
+ return {
1725
+ cacheContent: item,
1726
+ updateFn: (cb) => {
1727
+ debug3(
1728
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1729
+ type,
1730
+ prompt,
1731
+ i
1732
+ );
1733
+ cb(item);
1734
+ debug3(
1735
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1736
+ type,
1737
+ prompt,
1738
+ i
1739
+ );
1740
+ this.flushCacheToFile();
1741
+ }
1742
+ };
1728
1743
  }
1729
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1744
+ debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1730
1745
  return void 0;
1731
1746
  }
1732
- matchPlanCache(prompt) {
1733
- return this.matchCache(prompt, "plan");
1747
+ generateContextHash(contextData) {
1748
+ const sortedKeys = Object.keys(contextData).sort();
1749
+ const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1750
+ let hash = 0;
1751
+ for (let i = 0; i < stableString.length; i++) {
1752
+ const char = stableString.charCodeAt(i);
1753
+ hash = (hash << 5) - hash + char;
1754
+ hash = hash & hash;
1755
+ }
1756
+ return hash.toString(36);
1757
+ }
1758
+ matchPlanCache(prompt, contextData) {
1759
+ return this.matchCache(prompt, "plan", contextData);
1734
1760
  }
1735
1761
  matchLocateCache(prompt) {
1736
1762
  return this.matchCache(prompt, "locate");
@@ -1806,11 +1832,16 @@ cache file: ${cacheFile}`
1806
1832
  );
1807
1833
  }
1808
1834
  }
1809
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1835
+ updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1810
1836
  if (cachedRecord) {
1811
1837
  if (newRecord.type === "plan") {
1812
1838
  cachedRecord.updateFn((cache) => {
1813
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1839
+ const planCache = cache;
1840
+ planCache.yamlWorkflow = newRecord.yamlWorkflow;
1841
+ if (contextData) {
1842
+ planCache.contextHash = this.generateContextHash(contextData);
1843
+ planCache.contextData = { ...contextData };
1844
+ }
1814
1845
  });
1815
1846
  } else {
1816
1847
  cachedRecord.updateFn((cache) => {
@@ -1818,6 +1849,11 @@ cache file: ${cacheFile}`
1818
1849
  });
1819
1850
  }
1820
1851
  } else {
1852
+ if (newRecord.type === "plan" && contextData) {
1853
+ const planRecord = newRecord;
1854
+ planRecord.contextHash = this.generateContextHash(contextData);
1855
+ planRecord.contextData = { ...contextData };
1856
+ }
1821
1857
  this.appendCache(newRecord);
1822
1858
  }
1823
1859
  }
@@ -2178,34 +2214,35 @@ var PageAgent = class {
2178
2214
  };
2179
2215
  }
2180
2216
  async aiAction(taskPrompt, opt) {
2217
+ const originalPrompt = taskPrompt;
2218
+ let processedPrompt = taskPrompt;
2181
2219
  if (this.opts.enableCumulativeContext && this.contextStore) {
2182
2220
  try {
2183
- const originalPrompt = taskPrompt;
2184
- const processedPrompt = this.contextStore.replaceAllReferences(taskPrompt, "action");
2185
- if (originalPrompt !== processedPrompt) {
2186
- debug4("Context replacement in aiAction:", {
2187
- original: originalPrompt,
2188
- processed: processedPrompt,
2189
- storedData: this.contextStore.getAllData()
2221
+ const storedData = this.contextStore.getAllData();
2222
+ if (Object.keys(storedData).length > 0) {
2223
+ debug4("Available data for aiAction:", {
2224
+ prompt: taskPrompt,
2225
+ availableData: storedData
2190
2226
  });
2191
2227
  }
2192
- this.contextStore.addStep({
2193
- type: "action",
2194
- summary: `Action: ${processedPrompt}`,
2195
- prompt: processedPrompt
2196
- });
2197
- debug4("Added action step to context store:", {
2198
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2199
- totalSteps: this.contextStore.getRecentSteps(100).length
2200
- });
2201
- taskPrompt = processedPrompt;
2202
2228
  } catch (error) {
2203
2229
  debug4("Context store operation failed:", error);
2204
2230
  }
2205
2231
  }
2206
2232
  const cacheable = opt?.cacheable;
2207
2233
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2208
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2234
+ let contextData;
2235
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2236
+ try {
2237
+ contextData = this.contextStore.getAllData();
2238
+ if (contextData && Object.keys(contextData).length === 0) {
2239
+ contextData = void 0;
2240
+ }
2241
+ } catch (error) {
2242
+ debug4("Failed to get context data for cache:", error);
2243
+ }
2244
+ }
2245
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2209
2246
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2210
2247
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2211
2248
  taskPrompt,
@@ -2215,6 +2252,28 @@ var PageAgent = class {
2215
2252
  debug4("matched cache, will call .runYaml to run the action");
2216
2253
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2217
2254
  const result = await this.runYaml(yaml5);
2255
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2256
+ try {
2257
+ const executionResult = {
2258
+ success: true,
2259
+ actionType: "cached",
2260
+ description: `Executed cached action: ${processedPrompt}`,
2261
+ timing: result.metadata?.totalTime
2262
+ };
2263
+ this.contextStore.addStep({
2264
+ type: "action",
2265
+ summary: `Action: ${processedPrompt} (cached)`,
2266
+ prompt: processedPrompt,
2267
+ executionResult
2268
+ });
2269
+ debug4("Added cached action step to context store:", {
2270
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2271
+ totalSteps: this.contextStore.getRecentSteps(100).length
2272
+ });
2273
+ } catch (error) {
2274
+ debug4("Failed to add cached action step:", error);
2275
+ }
2276
+ }
2218
2277
  return {
2219
2278
  result: result.result,
2220
2279
  metadata: metadata2
@@ -2239,10 +2298,39 @@ var PageAgent = class {
2239
2298
  prompt: taskPrompt,
2240
2299
  yamlWorkflow: yamlFlowStr
2241
2300
  },
2242
- matchedCache
2301
+ matchedCache,
2302
+ contextData
2303
+ // Pass context data for cache creation
2243
2304
  );
2244
2305
  }
2245
2306
  const metadata = this.afterTaskRunning(executor);
2307
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2308
+ try {
2309
+ const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2310
+ this.contextStore.addStep({
2311
+ type: "action",
2312
+ summary: `Action: ${processedPrompt}`,
2313
+ prompt: processedPrompt,
2314
+ executionResult
2315
+ });
2316
+ debug4("Added action step with execution result to context store:", {
2317
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2318
+ totalSteps: this.contextStore.getRecentSteps(100).length,
2319
+ executionResult
2320
+ });
2321
+ } catch (error) {
2322
+ debug4("Failed to analyze execution results, adding step without execution result:", error);
2323
+ try {
2324
+ this.contextStore.addStep({
2325
+ type: "action",
2326
+ summary: `Action: ${processedPrompt}`,
2327
+ prompt: processedPrompt
2328
+ });
2329
+ } catch (stepError) {
2330
+ debug4("Failed to add action step:", stepError);
2331
+ }
2332
+ }
2333
+ }
2246
2334
  return {
2247
2335
  result: output,
2248
2336
  metadata
@@ -2427,23 +2515,40 @@ var PageAgent = class {
2427
2515
  };
2428
2516
  }
2429
2517
  async aiAssert(assertion, msg, opt) {
2430
- let processedAssertion = assertion;
2518
+ let executionContext = "";
2431
2519
  if (this.opts.enableCumulativeContext && this.contextStore) {
2432
2520
  try {
2433
- const originalAssertion = assertion;
2434
- processedAssertion = this.contextStore.replaceAllReferences(assertion, "assertion");
2435
- if (originalAssertion !== processedAssertion) {
2436
- debug4("Context replacement in aiAssert:", {
2437
- original: originalAssertion,
2438
- processed: processedAssertion,
2439
- context: "assertion",
2440
- storedData: this.contextStore.getAllData()
2521
+ const recentSteps = this.contextStore.getRecentSteps(3);
2522
+ const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2523
+ const storedData = this.contextStore.getAllData();
2524
+ if (stepsWithExecutionResults.length > 0) {
2525
+ const recentActions = stepsWithExecutionResults.map((step) => {
2526
+ const result = step.executionResult;
2527
+ return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2528
+ }).join("\n");
2529
+ executionContext = `
2530
+
2531
+ Recent actions performed:
2532
+ ${recentActions}
2533
+
2534
+ This context may help verify the assertion.`;
2535
+ }
2536
+ if (storedData && Object.keys(storedData).length > 0) {
2537
+ executionContext += `
2538
+
2539
+ Available data for reference:
2540
+ ${JSON.stringify(storedData, null, 2)}
2541
+
2542
+ Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2543
+ debug4("Available data for aiAssert:", {
2544
+ assertion,
2545
+ availableData: storedData
2441
2546
  });
2442
2547
  }
2443
2548
  this.contextStore.addStep({
2444
2549
  type: "assertion",
2445
- summary: `Assertion: ${processedAssertion}`,
2446
- prompt: processedAssertion
2550
+ summary: `Assertion: ${assertion}`,
2551
+ prompt: assertion
2447
2552
  });
2448
2553
  debug4("Added assertion step to context store:", {
2449
2554
  totalSteps: this.contextStore.getRecentSteps(100).length
@@ -2459,7 +2564,13 @@ var PageAgent = class {
2459
2564
  } catch (e) {
2460
2565
  }
2461
2566
  }
2462
- const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2567
+ let assertionWithContext = assertion;
2568
+ if (currentUrl) {
2569
+ assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2570
+ }
2571
+ if (executionContext) {
2572
+ assertionWithContext += executionContext;
2573
+ }
2463
2574
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2464
2575
  const metadata = this.afterTaskRunning(executor, true);
2465
2576
  if (output && opt?.keepRawResponse) {
@@ -2673,6 +2784,79 @@ ${errors}`);
2673
2784
  async destroy() {
2674
2785
  await this.page.destroy();
2675
2786
  }
2787
+ /**
2788
+ * Analyze execution results from executor to generate meaningful descriptions
2789
+ */
2790
+ analyzeExecutionResults(executor, originalPrompt) {
2791
+ const tasks = executor.tasks;
2792
+ const success = !executor.isInErrorState();
2793
+ if (!success) {
2794
+ const errorTask = executor.latestErrorTask();
2795
+ return {
2796
+ success: false,
2797
+ actionType: "error",
2798
+ description: `Failed to execute: ${originalPrompt}`,
2799
+ error: errorTask?.error
2800
+ };
2801
+ }
2802
+ const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2803
+ const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2804
+ const lastAction = actionTasks[actionTasks.length - 1];
2805
+ const lastLocate = locateTasks[locateTasks.length - 1];
2806
+ if (!lastAction) {
2807
+ return {
2808
+ success: true,
2809
+ actionType: "unknown",
2810
+ description: `Completed: ${originalPrompt}`
2811
+ };
2812
+ }
2813
+ const actionType = lastAction.subType || "unknown";
2814
+ const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2815
+ const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2816
+ return {
2817
+ success: true,
2818
+ actionType,
2819
+ description,
2820
+ elementInfo,
2821
+ timing: lastAction.timing?.cost
2822
+ };
2823
+ }
2824
+ /**
2825
+ * Extract element information from locate task
2826
+ */
2827
+ extractElementInfo(locateTask, _actionTask) {
2828
+ if (!locateTask?.output?.element)
2829
+ return void 0;
2830
+ const element = locateTask.output.element;
2831
+ return {
2832
+ type: element.attributes?.nodeType || "unknown",
2833
+ text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2834
+ location: `(${element.center[0]}, ${element.center[1]})`
2835
+ };
2836
+ }
2837
+ /**
2838
+ * Generate natural language description for actions
2839
+ */
2840
+ generateActionDescription(actionType, param, elementInfo) {
2841
+ const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2842
+ switch (actionType) {
2843
+ case "Tap":
2844
+ return `Clicked on ${elementDesc}`;
2845
+ case "Input":
2846
+ const inputValue = param?.value || "";
2847
+ return `Entered "${inputValue}" into ${elementDesc}`;
2848
+ case "KeyboardPress":
2849
+ return `Pressed ${param?.value || "key"}`;
2850
+ case "Scroll":
2851
+ return `Scrolled ${param?.direction || "on page"}`;
2852
+ case "Hover":
2853
+ return `Hovered over ${elementDesc}`;
2854
+ case "Drag":
2855
+ return `Dragged ${elementDesc}`;
2856
+ default:
2857
+ return `Performed ${actionType} action on ${elementDesc}`;
2858
+ }
2859
+ }
2676
2860
  };
2677
2861
 
2678
2862
  // src/playground/agent.ts