misoai-web 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +352 -352
  3. package/bin/midscene-playground +2 -2
  4. package/dist/es/agent.js +247 -63
  5. package/dist/es/agent.js.map +1 -1
  6. package/dist/es/bridge-mode-browser.js.map +1 -1
  7. package/dist/es/bridge-mode.js +247 -63
  8. package/dist/es/bridge-mode.js.map +1 -1
  9. package/dist/es/chrome-extension.js +247 -63
  10. package/dist/es/chrome-extension.js.map +1 -1
  11. package/dist/es/index.js +247 -63
  12. package/dist/es/index.js.map +1 -1
  13. package/dist/es/midscene-playground.js +247 -63
  14. package/dist/es/midscene-playground.js.map +1 -1
  15. package/dist/es/midscene-server.js.map +1 -1
  16. package/dist/es/playground.js +247 -63
  17. package/dist/es/playground.js.map +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +247 -63
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +247 -63
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +247 -63
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js.map +1 -1
  28. package/dist/lib/agent.js +247 -63
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/bridge-mode-browser.js.map +1 -1
  31. package/dist/lib/bridge-mode.js +247 -63
  32. package/dist/lib/bridge-mode.js.map +1 -1
  33. package/dist/lib/chrome-extension.js +247 -63
  34. package/dist/lib/chrome-extension.js.map +1 -1
  35. package/dist/lib/index.js +247 -63
  36. package/dist/lib/index.js.map +1 -1
  37. package/dist/lib/midscene-playground.js +247 -63
  38. package/dist/lib/midscene-playground.js.map +1 -1
  39. package/dist/lib/midscene-server.js.map +1 -1
  40. package/dist/lib/playground.js +247 -63
  41. package/dist/lib/playground.js.map +1 -1
  42. package/dist/lib/playwright-report.js.map +1 -1
  43. package/dist/lib/playwright.js +247 -63
  44. package/dist/lib/playwright.js.map +1 -1
  45. package/dist/lib/puppeteer-agent-launcher.js +247 -63
  46. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  47. package/dist/lib/puppeteer.js +247 -63
  48. package/dist/lib/puppeteer.js.map +1 -1
  49. package/dist/lib/ui-utils.js.map +1 -1
  50. package/dist/lib/utils.js.map +1 -1
  51. package/dist/lib/yaml.js.map +1 -1
  52. package/dist/types/agent.d.ts +19 -4
  53. package/dist/types/bridge-mode-browser.d.ts +2 -2
  54. package/dist/types/bridge-mode.d.ts +2 -2
  55. package/dist/types/{browser-a1877d18.d.ts → browser-aec1055d.d.ts} +1 -1
  56. package/dist/types/chrome-extension.d.ts +2 -2
  57. package/dist/types/index.d.ts +1 -1
  58. package/dist/types/midscene-server.d.ts +1 -1
  59. package/dist/types/{page-663ece08.d.ts → page-86ab0fe1.d.ts} +34 -34
  60. package/dist/types/playground.d.ts +2 -2
  61. package/dist/types/playwright.d.ts +1 -1
  62. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  63. package/dist/types/puppeteer.d.ts +1 -1
  64. package/dist/types/utils.d.ts +1 -1
  65. package/dist/types/yaml.d.ts +1 -1
  66. package/package.json +3 -3
@@ -1707,44 +1707,70 @@ var TaskCache = class {
1707
1707
  this.cache = cacheContent;
1708
1708
  this.cacheOriginalLength = this.cache.caches.length;
1709
1709
  }
1710
- matchCache(prompt, type) {
1710
+ matchCache(prompt, type, contextData) {
1711
+ const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1711
1712
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1712
1713
  const item = this.cache.caches[i];
1713
1714
  const key = `${type}:${prompt}:${i}`;
1714
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1715
- this.matchedCacheIndices.add(key);
1716
- debug3(
1717
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1718
- type,
1719
- prompt,
1720
- i
1721
- );
1722
- return {
1723
- cacheContent: item,
1724
- updateFn: (cb) => {
1725
- debug3(
1726
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1727
- type,
1728
- prompt,
1729
- i
1730
- );
1731
- cb(item);
1732
- debug3(
1733
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1734
- type,
1735
- prompt,
1736
- i
1737
- );
1738
- this.flushCacheToFile();
1715
+ if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1716
+ continue;
1717
+ }
1718
+ if (type === "plan" && item.type === "plan") {
1719
+ const planItem = item;
1720
+ if (contextHash && planItem.contextHash) {
1721
+ if (contextHash !== planItem.contextHash) {
1722
+ debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1723
+ continue;
1739
1724
  }
1740
- };
1725
+ } else if (contextHash || planItem.contextHash) {
1726
+ debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1727
+ continue;
1728
+ }
1741
1729
  }
1730
+ this.matchedCacheIndices.add(key);
1731
+ debug3(
1732
+ "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1733
+ type,
1734
+ prompt,
1735
+ i,
1736
+ contextHash ? "yes" : "no-context"
1737
+ );
1738
+ return {
1739
+ cacheContent: item,
1740
+ updateFn: (cb) => {
1741
+ debug3(
1742
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1743
+ type,
1744
+ prompt,
1745
+ i
1746
+ );
1747
+ cb(item);
1748
+ debug3(
1749
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1750
+ type,
1751
+ prompt,
1752
+ i
1753
+ );
1754
+ this.flushCacheToFile();
1755
+ }
1756
+ };
1742
1757
  }
1743
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1758
+ debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1744
1759
  return void 0;
1745
1760
  }
1746
- matchPlanCache(prompt) {
1747
- return this.matchCache(prompt, "plan");
1761
+ generateContextHash(contextData) {
1762
+ const sortedKeys = Object.keys(contextData).sort();
1763
+ const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1764
+ let hash = 0;
1765
+ for (let i = 0; i < stableString.length; i++) {
1766
+ const char = stableString.charCodeAt(i);
1767
+ hash = (hash << 5) - hash + char;
1768
+ hash = hash & hash;
1769
+ }
1770
+ return hash.toString(36);
1771
+ }
1772
+ matchPlanCache(prompt, contextData) {
1773
+ return this.matchCache(prompt, "plan", contextData);
1748
1774
  }
1749
1775
  matchLocateCache(prompt) {
1750
1776
  return this.matchCache(prompt, "locate");
@@ -1820,11 +1846,16 @@ cache file: ${cacheFile}`
1820
1846
  );
1821
1847
  }
1822
1848
  }
1823
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1849
+ updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1824
1850
  if (cachedRecord) {
1825
1851
  if (newRecord.type === "plan") {
1826
1852
  cachedRecord.updateFn((cache) => {
1827
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1853
+ const planCache = cache;
1854
+ planCache.yamlWorkflow = newRecord.yamlWorkflow;
1855
+ if (contextData) {
1856
+ planCache.contextHash = this.generateContextHash(contextData);
1857
+ planCache.contextData = { ...contextData };
1858
+ }
1828
1859
  });
1829
1860
  } else {
1830
1861
  cachedRecord.updateFn((cache) => {
@@ -1832,6 +1863,11 @@ cache file: ${cacheFile}`
1832
1863
  });
1833
1864
  }
1834
1865
  } else {
1866
+ if (newRecord.type === "plan" && contextData) {
1867
+ const planRecord = newRecord;
1868
+ planRecord.contextHash = this.generateContextHash(contextData);
1869
+ planRecord.contextData = { ...contextData };
1870
+ }
1835
1871
  this.appendCache(newRecord);
1836
1872
  }
1837
1873
  }
@@ -2192,34 +2228,35 @@ var PageAgent = class {
2192
2228
  };
2193
2229
  }
2194
2230
  async aiAction(taskPrompt, opt) {
2231
+ const originalPrompt = taskPrompt;
2232
+ let processedPrompt = taskPrompt;
2195
2233
  if (this.opts.enableCumulativeContext && this.contextStore) {
2196
2234
  try {
2197
- const originalPrompt = taskPrompt;
2198
- const processedPrompt = this.contextStore.replaceAllReferences(taskPrompt, "action");
2199
- if (originalPrompt !== processedPrompt) {
2200
- debug4("Context replacement in aiAction:", {
2201
- original: originalPrompt,
2202
- processed: processedPrompt,
2203
- storedData: this.contextStore.getAllData()
2235
+ const storedData = this.contextStore.getAllData();
2236
+ if (Object.keys(storedData).length > 0) {
2237
+ debug4("Available data for aiAction:", {
2238
+ prompt: taskPrompt,
2239
+ availableData: storedData
2204
2240
  });
2205
2241
  }
2206
- this.contextStore.addStep({
2207
- type: "action",
2208
- summary: `Action: ${processedPrompt}`,
2209
- prompt: processedPrompt
2210
- });
2211
- debug4("Added action step to context store:", {
2212
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2213
- totalSteps: this.contextStore.getRecentSteps(100).length
2214
- });
2215
- taskPrompt = processedPrompt;
2216
2242
  } catch (error) {
2217
2243
  debug4("Context store operation failed:", error);
2218
2244
  }
2219
2245
  }
2220
2246
  const cacheable = opt?.cacheable;
2221
2247
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2222
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2248
+ let contextData;
2249
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2250
+ try {
2251
+ contextData = this.contextStore.getAllData();
2252
+ if (contextData && Object.keys(contextData).length === 0) {
2253
+ contextData = void 0;
2254
+ }
2255
+ } catch (error) {
2256
+ debug4("Failed to get context data for cache:", error);
2257
+ }
2258
+ }
2259
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2223
2260
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2224
2261
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2225
2262
  taskPrompt,
@@ -2229,6 +2266,28 @@ var PageAgent = class {
2229
2266
  debug4("matched cache, will call .runYaml to run the action");
2230
2267
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2231
2268
  const result = await this.runYaml(yaml5);
2269
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2270
+ try {
2271
+ const executionResult = {
2272
+ success: true,
2273
+ actionType: "cached",
2274
+ description: `Executed cached action: ${processedPrompt}`,
2275
+ timing: result.metadata?.totalTime
2276
+ };
2277
+ this.contextStore.addStep({
2278
+ type: "action",
2279
+ summary: `Action: ${processedPrompt} (cached)`,
2280
+ prompt: processedPrompt,
2281
+ executionResult
2282
+ });
2283
+ debug4("Added cached action step to context store:", {
2284
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2285
+ totalSteps: this.contextStore.getRecentSteps(100).length
2286
+ });
2287
+ } catch (error) {
2288
+ debug4("Failed to add cached action step:", error);
2289
+ }
2290
+ }
2232
2291
  return {
2233
2292
  result: result.result,
2234
2293
  metadata: metadata2
@@ -2253,10 +2312,39 @@ var PageAgent = class {
2253
2312
  prompt: taskPrompt,
2254
2313
  yamlWorkflow: yamlFlowStr
2255
2314
  },
2256
- matchedCache
2315
+ matchedCache,
2316
+ contextData
2317
+ // Pass context data for cache creation
2257
2318
  );
2258
2319
  }
2259
2320
  const metadata = this.afterTaskRunning(executor);
2321
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2322
+ try {
2323
+ const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2324
+ this.contextStore.addStep({
2325
+ type: "action",
2326
+ summary: `Action: ${processedPrompt}`,
2327
+ prompt: processedPrompt,
2328
+ executionResult
2329
+ });
2330
+ debug4("Added action step with execution result to context store:", {
2331
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2332
+ totalSteps: this.contextStore.getRecentSteps(100).length,
2333
+ executionResult
2334
+ });
2335
+ } catch (error) {
2336
+ debug4("Failed to analyze execution results, adding step without execution result:", error);
2337
+ try {
2338
+ this.contextStore.addStep({
2339
+ type: "action",
2340
+ summary: `Action: ${processedPrompt}`,
2341
+ prompt: processedPrompt
2342
+ });
2343
+ } catch (stepError) {
2344
+ debug4("Failed to add action step:", stepError);
2345
+ }
2346
+ }
2347
+ }
2260
2348
  return {
2261
2349
  result: output,
2262
2350
  metadata
@@ -2441,23 +2529,40 @@ var PageAgent = class {
2441
2529
  };
2442
2530
  }
2443
2531
  async aiAssert(assertion, msg, opt) {
2444
- let processedAssertion = assertion;
2532
+ let executionContext = "";
2445
2533
  if (this.opts.enableCumulativeContext && this.contextStore) {
2446
2534
  try {
2447
- const originalAssertion = assertion;
2448
- processedAssertion = this.contextStore.replaceAllReferences(assertion, "assertion");
2449
- if (originalAssertion !== processedAssertion) {
2450
- debug4("Context replacement in aiAssert:", {
2451
- original: originalAssertion,
2452
- processed: processedAssertion,
2453
- context: "assertion",
2454
- storedData: this.contextStore.getAllData()
2535
+ const recentSteps = this.contextStore.getRecentSteps(3);
2536
+ const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2537
+ const storedData = this.contextStore.getAllData();
2538
+ if (stepsWithExecutionResults.length > 0) {
2539
+ const recentActions = stepsWithExecutionResults.map((step) => {
2540
+ const result = step.executionResult;
2541
+ return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2542
+ }).join("\n");
2543
+ executionContext = `
2544
+
2545
+ Recent actions performed:
2546
+ ${recentActions}
2547
+
2548
+ This context may help verify the assertion.`;
2549
+ }
2550
+ if (storedData && Object.keys(storedData).length > 0) {
2551
+ executionContext += `
2552
+
2553
+ Available data for reference:
2554
+ ${JSON.stringify(storedData, null, 2)}
2555
+
2556
+ Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2557
+ debug4("Available data for aiAssert:", {
2558
+ assertion,
2559
+ availableData: storedData
2455
2560
  });
2456
2561
  }
2457
2562
  this.contextStore.addStep({
2458
2563
  type: "assertion",
2459
- summary: `Assertion: ${processedAssertion}`,
2460
- prompt: processedAssertion
2564
+ summary: `Assertion: ${assertion}`,
2565
+ prompt: assertion
2461
2566
  });
2462
2567
  debug4("Added assertion step to context store:", {
2463
2568
  totalSteps: this.contextStore.getRecentSteps(100).length
@@ -2473,7 +2578,13 @@ var PageAgent = class {
2473
2578
  } catch (e) {
2474
2579
  }
2475
2580
  }
2476
- const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2581
+ let assertionWithContext = assertion;
2582
+ if (currentUrl) {
2583
+ assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2584
+ }
2585
+ if (executionContext) {
2586
+ assertionWithContext += executionContext;
2587
+ }
2477
2588
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2478
2589
  const metadata = this.afterTaskRunning(executor, true);
2479
2590
  if (output && opt?.keepRawResponse) {
@@ -2687,6 +2798,79 @@ ${errors}`);
2687
2798
  async destroy() {
2688
2799
  await this.page.destroy();
2689
2800
  }
2801
+ /**
2802
+ * Analyze execution results from executor to generate meaningful descriptions
2803
+ */
2804
+ analyzeExecutionResults(executor, originalPrompt) {
2805
+ const tasks = executor.tasks;
2806
+ const success = !executor.isInErrorState();
2807
+ if (!success) {
2808
+ const errorTask = executor.latestErrorTask();
2809
+ return {
2810
+ success: false,
2811
+ actionType: "error",
2812
+ description: `Failed to execute: ${originalPrompt}`,
2813
+ error: errorTask?.error
2814
+ };
2815
+ }
2816
+ const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2817
+ const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2818
+ const lastAction = actionTasks[actionTasks.length - 1];
2819
+ const lastLocate = locateTasks[locateTasks.length - 1];
2820
+ if (!lastAction) {
2821
+ return {
2822
+ success: true,
2823
+ actionType: "unknown",
2824
+ description: `Completed: ${originalPrompt}`
2825
+ };
2826
+ }
2827
+ const actionType = lastAction.subType || "unknown";
2828
+ const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2829
+ const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2830
+ return {
2831
+ success: true,
2832
+ actionType,
2833
+ description,
2834
+ elementInfo,
2835
+ timing: lastAction.timing?.cost
2836
+ };
2837
+ }
2838
+ /**
2839
+ * Extract element information from locate task
2840
+ */
2841
+ extractElementInfo(locateTask, _actionTask) {
2842
+ if (!locateTask?.output?.element)
2843
+ return void 0;
2844
+ const element = locateTask.output.element;
2845
+ return {
2846
+ type: element.attributes?.nodeType || "unknown",
2847
+ text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2848
+ location: `(${element.center[0]}, ${element.center[1]})`
2849
+ };
2850
+ }
2851
+ /**
2852
+ * Generate natural language description for actions
2853
+ */
2854
+ generateActionDescription(actionType, param, elementInfo) {
2855
+ const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2856
+ switch (actionType) {
2857
+ case "Tap":
2858
+ return `Clicked on ${elementDesc}`;
2859
+ case "Input":
2860
+ const inputValue = param?.value || "";
2861
+ return `Entered "${inputValue}" into ${elementDesc}`;
2862
+ case "KeyboardPress":
2863
+ return `Pressed ${param?.value || "key"}`;
2864
+ case "Scroll":
2865
+ return `Scrolled ${param?.direction || "on page"}`;
2866
+ case "Hover":
2867
+ return `Hovered over ${elementDesc}`;
2868
+ case "Drag":
2869
+ return `Dragged ${elementDesc}`;
2870
+ default:
2871
+ return `Performed ${actionType} action on ${elementDesc}`;
2872
+ }
2873
+ }
2690
2874
  };
2691
2875
 
2692
2876
  // src/bridge-mode/agent-cli-side.ts