misoai-web 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +352 -352
  3. package/bin/midscene-playground +2 -2
  4. package/dist/es/agent.js +247 -63
  5. package/dist/es/agent.js.map +1 -1
  6. package/dist/es/bridge-mode-browser.js.map +1 -1
  7. package/dist/es/bridge-mode.js +247 -63
  8. package/dist/es/bridge-mode.js.map +1 -1
  9. package/dist/es/chrome-extension.js +247 -63
  10. package/dist/es/chrome-extension.js.map +1 -1
  11. package/dist/es/index.js +247 -63
  12. package/dist/es/index.js.map +1 -1
  13. package/dist/es/midscene-playground.js +247 -63
  14. package/dist/es/midscene-playground.js.map +1 -1
  15. package/dist/es/midscene-server.js.map +1 -1
  16. package/dist/es/playground.js +247 -63
  17. package/dist/es/playground.js.map +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +247 -63
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +247 -63
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +247 -63
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js.map +1 -1
  28. package/dist/lib/agent.js +247 -63
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/bridge-mode-browser.js.map +1 -1
  31. package/dist/lib/bridge-mode.js +247 -63
  32. package/dist/lib/bridge-mode.js.map +1 -1
  33. package/dist/lib/chrome-extension.js +247 -63
  34. package/dist/lib/chrome-extension.js.map +1 -1
  35. package/dist/lib/index.js +247 -63
  36. package/dist/lib/index.js.map +1 -1
  37. package/dist/lib/midscene-playground.js +247 -63
  38. package/dist/lib/midscene-playground.js.map +1 -1
  39. package/dist/lib/midscene-server.js.map +1 -1
  40. package/dist/lib/playground.js +247 -63
  41. package/dist/lib/playground.js.map +1 -1
  42. package/dist/lib/playwright-report.js.map +1 -1
  43. package/dist/lib/playwright.js +247 -63
  44. package/dist/lib/playwright.js.map +1 -1
  45. package/dist/lib/puppeteer-agent-launcher.js +247 -63
  46. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  47. package/dist/lib/puppeteer.js +247 -63
  48. package/dist/lib/puppeteer.js.map +1 -1
  49. package/dist/lib/ui-utils.js.map +1 -1
  50. package/dist/lib/utils.js.map +1 -1
  51. package/dist/lib/yaml.js.map +1 -1
  52. package/dist/types/agent.d.ts +19 -4
  53. package/dist/types/bridge-mode-browser.d.ts +2 -2
  54. package/dist/types/bridge-mode.d.ts +2 -2
  55. package/dist/types/{browser-a1877d18.d.ts → browser-aec1055d.d.ts} +1 -1
  56. package/dist/types/chrome-extension.d.ts +2 -2
  57. package/dist/types/index.d.ts +1 -1
  58. package/dist/types/midscene-server.d.ts +1 -1
  59. package/dist/types/{page-663ece08.d.ts → page-86ab0fe1.d.ts} +34 -34
  60. package/dist/types/playground.d.ts +2 -2
  61. package/dist/types/playwright.d.ts +1 -1
  62. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  63. package/dist/types/puppeteer.d.ts +1 -1
  64. package/dist/types/utils.d.ts +1 -1
  65. package/dist/types/yaml.d.ts +1 -1
  66. package/package.json +3 -3
package/dist/lib/index.js CHANGED
@@ -1739,44 +1739,70 @@ var TaskCache = class {
1739
1739
  this.cache = cacheContent;
1740
1740
  this.cacheOriginalLength = this.cache.caches.length;
1741
1741
  }
1742
- matchCache(prompt, type) {
1742
+ matchCache(prompt, type, contextData) {
1743
+ const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1743
1744
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1744
1745
  const item = this.cache.caches[i];
1745
1746
  const key = `${type}:${prompt}:${i}`;
1746
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1747
- this.matchedCacheIndices.add(key);
1748
- debug3(
1749
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1750
- type,
1751
- prompt,
1752
- i
1753
- );
1754
- return {
1755
- cacheContent: item,
1756
- updateFn: (cb) => {
1757
- debug3(
1758
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1759
- type,
1760
- prompt,
1761
- i
1762
- );
1763
- cb(item);
1764
- debug3(
1765
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1766
- type,
1767
- prompt,
1768
- i
1769
- );
1770
- this.flushCacheToFile();
1747
+ if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1748
+ continue;
1749
+ }
1750
+ if (type === "plan" && item.type === "plan") {
1751
+ const planItem = item;
1752
+ if (contextHash && planItem.contextHash) {
1753
+ if (contextHash !== planItem.contextHash) {
1754
+ debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1755
+ continue;
1771
1756
  }
1772
- };
1757
+ } else if (contextHash || planItem.contextHash) {
1758
+ debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1759
+ continue;
1760
+ }
1773
1761
  }
1762
+ this.matchedCacheIndices.add(key);
1763
+ debug3(
1764
+ "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1765
+ type,
1766
+ prompt,
1767
+ i,
1768
+ contextHash ? "yes" : "no-context"
1769
+ );
1770
+ return {
1771
+ cacheContent: item,
1772
+ updateFn: (cb) => {
1773
+ debug3(
1774
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1775
+ type,
1776
+ prompt,
1777
+ i
1778
+ );
1779
+ cb(item);
1780
+ debug3(
1781
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1782
+ type,
1783
+ prompt,
1784
+ i
1785
+ );
1786
+ this.flushCacheToFile();
1787
+ }
1788
+ };
1774
1789
  }
1775
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1790
+ debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1776
1791
  return void 0;
1777
1792
  }
1778
- matchPlanCache(prompt) {
1779
- return this.matchCache(prompt, "plan");
1793
+ generateContextHash(contextData) {
1794
+ const sortedKeys = Object.keys(contextData).sort();
1795
+ const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1796
+ let hash = 0;
1797
+ for (let i = 0; i < stableString.length; i++) {
1798
+ const char = stableString.charCodeAt(i);
1799
+ hash = (hash << 5) - hash + char;
1800
+ hash = hash & hash;
1801
+ }
1802
+ return hash.toString(36);
1803
+ }
1804
+ matchPlanCache(prompt, contextData) {
1805
+ return this.matchCache(prompt, "plan", contextData);
1780
1806
  }
1781
1807
  matchLocateCache(prompt) {
1782
1808
  return this.matchCache(prompt, "locate");
@@ -1852,11 +1878,16 @@ cache file: ${cacheFile}`
1852
1878
  );
1853
1879
  }
1854
1880
  }
1855
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1881
+ updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1856
1882
  if (cachedRecord) {
1857
1883
  if (newRecord.type === "plan") {
1858
1884
  cachedRecord.updateFn((cache) => {
1859
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1885
+ const planCache = cache;
1886
+ planCache.yamlWorkflow = newRecord.yamlWorkflow;
1887
+ if (contextData) {
1888
+ planCache.contextHash = this.generateContextHash(contextData);
1889
+ planCache.contextData = { ...contextData };
1890
+ }
1860
1891
  });
1861
1892
  } else {
1862
1893
  cachedRecord.updateFn((cache) => {
@@ -1864,6 +1895,11 @@ cache file: ${cacheFile}`
1864
1895
  });
1865
1896
  }
1866
1897
  } else {
1898
+ if (newRecord.type === "plan" && contextData) {
1899
+ const planRecord = newRecord;
1900
+ planRecord.contextHash = this.generateContextHash(contextData);
1901
+ planRecord.contextData = { ...contextData };
1902
+ }
1867
1903
  this.appendCache(newRecord);
1868
1904
  }
1869
1905
  }
@@ -2224,34 +2260,35 @@ var PageAgent = class {
2224
2260
  };
2225
2261
  }
2226
2262
  async aiAction(taskPrompt, opt) {
2263
+ const originalPrompt = taskPrompt;
2264
+ let processedPrompt = taskPrompt;
2227
2265
  if (this.opts.enableCumulativeContext && this.contextStore) {
2228
2266
  try {
2229
- const originalPrompt = taskPrompt;
2230
- const processedPrompt = this.contextStore.replaceAllReferences(taskPrompt, "action");
2231
- if (originalPrompt !== processedPrompt) {
2232
- debug4("Context replacement in aiAction:", {
2233
- original: originalPrompt,
2234
- processed: processedPrompt,
2235
- storedData: this.contextStore.getAllData()
2267
+ const storedData = this.contextStore.getAllData();
2268
+ if (Object.keys(storedData).length > 0) {
2269
+ debug4("Available data for aiAction:", {
2270
+ prompt: taskPrompt,
2271
+ availableData: storedData
2236
2272
  });
2237
2273
  }
2238
- this.contextStore.addStep({
2239
- type: "action",
2240
- summary: `Action: ${processedPrompt}`,
2241
- prompt: processedPrompt
2242
- });
2243
- debug4("Added action step to context store:", {
2244
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2245
- totalSteps: this.contextStore.getRecentSteps(100).length
2246
- });
2247
- taskPrompt = processedPrompt;
2248
2274
  } catch (error) {
2249
2275
  debug4("Context store operation failed:", error);
2250
2276
  }
2251
2277
  }
2252
2278
  const cacheable = opt?.cacheable;
2253
2279
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2254
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2280
+ let contextData;
2281
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2282
+ try {
2283
+ contextData = this.contextStore.getAllData();
2284
+ if (contextData && Object.keys(contextData).length === 0) {
2285
+ contextData = void 0;
2286
+ }
2287
+ } catch (error) {
2288
+ debug4("Failed to get context data for cache:", error);
2289
+ }
2290
+ }
2291
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2255
2292
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2256
2293
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2257
2294
  taskPrompt,
@@ -2261,6 +2298,28 @@ var PageAgent = class {
2261
2298
  debug4("matched cache, will call .runYaml to run the action");
2262
2299
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2263
2300
  const result = await this.runYaml(yaml5);
2301
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2302
+ try {
2303
+ const executionResult = {
2304
+ success: true,
2305
+ actionType: "cached",
2306
+ description: `Executed cached action: ${processedPrompt}`,
2307
+ timing: result.metadata?.totalTime
2308
+ };
2309
+ this.contextStore.addStep({
2310
+ type: "action",
2311
+ summary: `Action: ${processedPrompt} (cached)`,
2312
+ prompt: processedPrompt,
2313
+ executionResult
2314
+ });
2315
+ debug4("Added cached action step to context store:", {
2316
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2317
+ totalSteps: this.contextStore.getRecentSteps(100).length
2318
+ });
2319
+ } catch (error) {
2320
+ debug4("Failed to add cached action step:", error);
2321
+ }
2322
+ }
2264
2323
  return {
2265
2324
  result: result.result,
2266
2325
  metadata: metadata2
@@ -2285,10 +2344,39 @@ var PageAgent = class {
2285
2344
  prompt: taskPrompt,
2286
2345
  yamlWorkflow: yamlFlowStr
2287
2346
  },
2288
- matchedCache
2347
+ matchedCache,
2348
+ contextData
2349
+ // Pass context data for cache creation
2289
2350
  );
2290
2351
  }
2291
2352
  const metadata = this.afterTaskRunning(executor);
2353
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2354
+ try {
2355
+ const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2356
+ this.contextStore.addStep({
2357
+ type: "action",
2358
+ summary: `Action: ${processedPrompt}`,
2359
+ prompt: processedPrompt,
2360
+ executionResult
2361
+ });
2362
+ debug4("Added action step with execution result to context store:", {
2363
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2364
+ totalSteps: this.contextStore.getRecentSteps(100).length,
2365
+ executionResult
2366
+ });
2367
+ } catch (error) {
2368
+ debug4("Failed to analyze execution results, adding step without execution result:", error);
2369
+ try {
2370
+ this.contextStore.addStep({
2371
+ type: "action",
2372
+ summary: `Action: ${processedPrompt}`,
2373
+ prompt: processedPrompt
2374
+ });
2375
+ } catch (stepError) {
2376
+ debug4("Failed to add action step:", stepError);
2377
+ }
2378
+ }
2379
+ }
2292
2380
  return {
2293
2381
  result: output,
2294
2382
  metadata
@@ -2473,23 +2561,40 @@ var PageAgent = class {
2473
2561
  };
2474
2562
  }
2475
2563
  async aiAssert(assertion, msg, opt) {
2476
- let processedAssertion = assertion;
2564
+ let executionContext = "";
2477
2565
  if (this.opts.enableCumulativeContext && this.contextStore) {
2478
2566
  try {
2479
- const originalAssertion = assertion;
2480
- processedAssertion = this.contextStore.replaceAllReferences(assertion, "assertion");
2481
- if (originalAssertion !== processedAssertion) {
2482
- debug4("Context replacement in aiAssert:", {
2483
- original: originalAssertion,
2484
- processed: processedAssertion,
2485
- context: "assertion",
2486
- storedData: this.contextStore.getAllData()
2567
+ const recentSteps = this.contextStore.getRecentSteps(3);
2568
+ const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2569
+ const storedData = this.contextStore.getAllData();
2570
+ if (stepsWithExecutionResults.length > 0) {
2571
+ const recentActions = stepsWithExecutionResults.map((step) => {
2572
+ const result = step.executionResult;
2573
+ return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2574
+ }).join("\n");
2575
+ executionContext = `
2576
+
2577
+ Recent actions performed:
2578
+ ${recentActions}
2579
+
2580
+ This context may help verify the assertion.`;
2581
+ }
2582
+ if (storedData && Object.keys(storedData).length > 0) {
2583
+ executionContext += `
2584
+
2585
+ Available data for reference:
2586
+ ${JSON.stringify(storedData, null, 2)}
2587
+
2588
+ Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2589
+ debug4("Available data for aiAssert:", {
2590
+ assertion,
2591
+ availableData: storedData
2487
2592
  });
2488
2593
  }
2489
2594
  this.contextStore.addStep({
2490
2595
  type: "assertion",
2491
- summary: `Assertion: ${processedAssertion}`,
2492
- prompt: processedAssertion
2596
+ summary: `Assertion: ${assertion}`,
2597
+ prompt: assertion
2493
2598
  });
2494
2599
  debug4("Added assertion step to context store:", {
2495
2600
  totalSteps: this.contextStore.getRecentSteps(100).length
@@ -2505,7 +2610,13 @@ var PageAgent = class {
2505
2610
  } catch (e) {
2506
2611
  }
2507
2612
  }
2508
- const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2613
+ let assertionWithContext = assertion;
2614
+ if (currentUrl) {
2615
+ assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2616
+ }
2617
+ if (executionContext) {
2618
+ assertionWithContext += executionContext;
2619
+ }
2509
2620
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2510
2621
  const metadata = this.afterTaskRunning(executor, true);
2511
2622
  if (output && opt?.keepRawResponse) {
@@ -2719,6 +2830,79 @@ ${errors}`);
2719
2830
  async destroy() {
2720
2831
  await this.page.destroy();
2721
2832
  }
2833
+ /**
2834
+ * Analyze execution results from executor to generate meaningful descriptions
2835
+ */
2836
+ analyzeExecutionResults(executor, originalPrompt) {
2837
+ const tasks = executor.tasks;
2838
+ const success = !executor.isInErrorState();
2839
+ if (!success) {
2840
+ const errorTask = executor.latestErrorTask();
2841
+ return {
2842
+ success: false,
2843
+ actionType: "error",
2844
+ description: `Failed to execute: ${originalPrompt}`,
2845
+ error: errorTask?.error
2846
+ };
2847
+ }
2848
+ const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2849
+ const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2850
+ const lastAction = actionTasks[actionTasks.length - 1];
2851
+ const lastLocate = locateTasks[locateTasks.length - 1];
2852
+ if (!lastAction) {
2853
+ return {
2854
+ success: true,
2855
+ actionType: "unknown",
2856
+ description: `Completed: ${originalPrompt}`
2857
+ };
2858
+ }
2859
+ const actionType = lastAction.subType || "unknown";
2860
+ const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2861
+ const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2862
+ return {
2863
+ success: true,
2864
+ actionType,
2865
+ description,
2866
+ elementInfo,
2867
+ timing: lastAction.timing?.cost
2868
+ };
2869
+ }
2870
+ /**
2871
+ * Extract element information from locate task
2872
+ */
2873
+ extractElementInfo(locateTask, _actionTask) {
2874
+ if (!locateTask?.output?.element)
2875
+ return void 0;
2876
+ const element = locateTask.output.element;
2877
+ return {
2878
+ type: element.attributes?.nodeType || "unknown",
2879
+ text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2880
+ location: `(${element.center[0]}, ${element.center[1]})`
2881
+ };
2882
+ }
2883
+ /**
2884
+ * Generate natural language description for actions
2885
+ */
2886
+ generateActionDescription(actionType, param, elementInfo) {
2887
+ const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2888
+ switch (actionType) {
2889
+ case "Tap":
2890
+ return `Clicked on ${elementDesc}`;
2891
+ case "Input":
2892
+ const inputValue = param?.value || "";
2893
+ return `Entered "${inputValue}" into ${elementDesc}`;
2894
+ case "KeyboardPress":
2895
+ return `Pressed ${param?.value || "key"}`;
2896
+ case "Scroll":
2897
+ return `Scrolled ${param?.direction || "on page"}`;
2898
+ case "Hover":
2899
+ return `Hovered over ${elementDesc}`;
2900
+ case "Drag":
2901
+ return `Dragged ${elementDesc}`;
2902
+ default:
2903
+ return `Performed ${actionType} action on ${elementDesc}`;
2904
+ }
2905
+ }
2722
2906
  };
2723
2907
 
2724
2908
  // src/puppeteer/base-page.ts