misoai-web 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +352 -352
  3. package/bin/midscene-playground +2 -2
  4. package/dist/es/agent.js +247 -63
  5. package/dist/es/agent.js.map +1 -1
  6. package/dist/es/bridge-mode-browser.js.map +1 -1
  7. package/dist/es/bridge-mode.js +247 -63
  8. package/dist/es/bridge-mode.js.map +1 -1
  9. package/dist/es/chrome-extension.js +247 -63
  10. package/dist/es/chrome-extension.js.map +1 -1
  11. package/dist/es/index.js +247 -63
  12. package/dist/es/index.js.map +1 -1
  13. package/dist/es/midscene-playground.js +247 -63
  14. package/dist/es/midscene-playground.js.map +1 -1
  15. package/dist/es/midscene-server.js.map +1 -1
  16. package/dist/es/playground.js +247 -63
  17. package/dist/es/playground.js.map +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +247 -63
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +247 -63
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +247 -63
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js.map +1 -1
  28. package/dist/lib/agent.js +247 -63
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/bridge-mode-browser.js.map +1 -1
  31. package/dist/lib/bridge-mode.js +247 -63
  32. package/dist/lib/bridge-mode.js.map +1 -1
  33. package/dist/lib/chrome-extension.js +247 -63
  34. package/dist/lib/chrome-extension.js.map +1 -1
  35. package/dist/lib/index.js +247 -63
  36. package/dist/lib/index.js.map +1 -1
  37. package/dist/lib/midscene-playground.js +247 -63
  38. package/dist/lib/midscene-playground.js.map +1 -1
  39. package/dist/lib/midscene-server.js.map +1 -1
  40. package/dist/lib/playground.js +247 -63
  41. package/dist/lib/playground.js.map +1 -1
  42. package/dist/lib/playwright-report.js.map +1 -1
  43. package/dist/lib/playwright.js +247 -63
  44. package/dist/lib/playwright.js.map +1 -1
  45. package/dist/lib/puppeteer-agent-launcher.js +247 -63
  46. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  47. package/dist/lib/puppeteer.js +247 -63
  48. package/dist/lib/puppeteer.js.map +1 -1
  49. package/dist/lib/ui-utils.js.map +1 -1
  50. package/dist/lib/utils.js.map +1 -1
  51. package/dist/lib/yaml.js.map +1 -1
  52. package/dist/types/agent.d.ts +19 -4
  53. package/dist/types/bridge-mode-browser.d.ts +2 -2
  54. package/dist/types/bridge-mode.d.ts +2 -2
  55. package/dist/types/{browser-a1877d18.d.ts → browser-aec1055d.d.ts} +1 -1
  56. package/dist/types/chrome-extension.d.ts +2 -2
  57. package/dist/types/index.d.ts +1 -1
  58. package/dist/types/midscene-server.d.ts +1 -1
  59. package/dist/types/{page-663ece08.d.ts → page-86ab0fe1.d.ts} +34 -34
  60. package/dist/types/playground.d.ts +2 -2
  61. package/dist/types/playwright.d.ts +1 -1
  62. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  63. package/dist/types/puppeteer.d.ts +1 -1
  64. package/dist/types/utils.d.ts +1 -1
  65. package/dist/types/yaml.d.ts +1 -1
  66. package/package.json +3 -3
@@ -1741,44 +1741,70 @@ var TaskCache = class {
1741
1741
  this.cache = cacheContent;
1742
1742
  this.cacheOriginalLength = this.cache.caches.length;
1743
1743
  }
1744
- matchCache(prompt, type) {
1744
+ matchCache(prompt, type, contextData) {
1745
+ const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1745
1746
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1746
1747
  const item = this.cache.caches[i];
1747
1748
  const key = `${type}:${prompt}:${i}`;
1748
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1749
- this.matchedCacheIndices.add(key);
1750
- debug3(
1751
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1752
- type,
1753
- prompt,
1754
- i
1755
- );
1756
- return {
1757
- cacheContent: item,
1758
- updateFn: (cb) => {
1759
- debug3(
1760
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1761
- type,
1762
- prompt,
1763
- i
1764
- );
1765
- cb(item);
1766
- debug3(
1767
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1768
- type,
1769
- prompt,
1770
- i
1771
- );
1772
- this.flushCacheToFile();
1749
+ if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1750
+ continue;
1751
+ }
1752
+ if (type === "plan" && item.type === "plan") {
1753
+ const planItem = item;
1754
+ if (contextHash && planItem.contextHash) {
1755
+ if (contextHash !== planItem.contextHash) {
1756
+ debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1757
+ continue;
1773
1758
  }
1774
- };
1759
+ } else if (contextHash || planItem.contextHash) {
1760
+ debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1761
+ continue;
1762
+ }
1775
1763
  }
1764
+ this.matchedCacheIndices.add(key);
1765
+ debug3(
1766
+ "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1767
+ type,
1768
+ prompt,
1769
+ i,
1770
+ contextHash ? "yes" : "no-context"
1771
+ );
1772
+ return {
1773
+ cacheContent: item,
1774
+ updateFn: (cb) => {
1775
+ debug3(
1776
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1777
+ type,
1778
+ prompt,
1779
+ i
1780
+ );
1781
+ cb(item);
1782
+ debug3(
1783
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1784
+ type,
1785
+ prompt,
1786
+ i
1787
+ );
1788
+ this.flushCacheToFile();
1789
+ }
1790
+ };
1776
1791
  }
1777
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1792
+ debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1778
1793
  return void 0;
1779
1794
  }
1780
- matchPlanCache(prompt) {
1781
- return this.matchCache(prompt, "plan");
1795
+ generateContextHash(contextData) {
1796
+ const sortedKeys = Object.keys(contextData).sort();
1797
+ const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1798
+ let hash = 0;
1799
+ for (let i = 0; i < stableString.length; i++) {
1800
+ const char = stableString.charCodeAt(i);
1801
+ hash = (hash << 5) - hash + char;
1802
+ hash = hash & hash;
1803
+ }
1804
+ return hash.toString(36);
1805
+ }
1806
+ matchPlanCache(prompt, contextData) {
1807
+ return this.matchCache(prompt, "plan", contextData);
1782
1808
  }
1783
1809
  matchLocateCache(prompt) {
1784
1810
  return this.matchCache(prompt, "locate");
@@ -1854,11 +1880,16 @@ cache file: ${cacheFile}`
1854
1880
  );
1855
1881
  }
1856
1882
  }
1857
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1883
+ updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1858
1884
  if (cachedRecord) {
1859
1885
  if (newRecord.type === "plan") {
1860
1886
  cachedRecord.updateFn((cache) => {
1861
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1887
+ const planCache = cache;
1888
+ planCache.yamlWorkflow = newRecord.yamlWorkflow;
1889
+ if (contextData) {
1890
+ planCache.contextHash = this.generateContextHash(contextData);
1891
+ planCache.contextData = { ...contextData };
1892
+ }
1862
1893
  });
1863
1894
  } else {
1864
1895
  cachedRecord.updateFn((cache) => {
@@ -1866,6 +1897,11 @@ cache file: ${cacheFile}`
1866
1897
  });
1867
1898
  }
1868
1899
  } else {
1900
+ if (newRecord.type === "plan" && contextData) {
1901
+ const planRecord = newRecord;
1902
+ planRecord.contextHash = this.generateContextHash(contextData);
1903
+ planRecord.contextData = { ...contextData };
1904
+ }
1869
1905
  this.appendCache(newRecord);
1870
1906
  }
1871
1907
  }
@@ -2226,34 +2262,35 @@ var PageAgent = class {
2226
2262
  };
2227
2263
  }
2228
2264
  async aiAction(taskPrompt, opt) {
2265
+ const originalPrompt = taskPrompt;
2266
+ let processedPrompt = taskPrompt;
2229
2267
  if (this.opts.enableCumulativeContext && this.contextStore) {
2230
2268
  try {
2231
- const originalPrompt = taskPrompt;
2232
- const processedPrompt = this.contextStore.replaceAllReferences(taskPrompt, "action");
2233
- if (originalPrompt !== processedPrompt) {
2234
- debug4("Context replacement in aiAction:", {
2235
- original: originalPrompt,
2236
- processed: processedPrompt,
2237
- storedData: this.contextStore.getAllData()
2269
+ const storedData = this.contextStore.getAllData();
2270
+ if (Object.keys(storedData).length > 0) {
2271
+ debug4("Available data for aiAction:", {
2272
+ prompt: taskPrompt,
2273
+ availableData: storedData
2238
2274
  });
2239
2275
  }
2240
- this.contextStore.addStep({
2241
- type: "action",
2242
- summary: `Action: ${processedPrompt}`,
2243
- prompt: processedPrompt
2244
- });
2245
- debug4("Added action step to context store:", {
2246
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2247
- totalSteps: this.contextStore.getRecentSteps(100).length
2248
- });
2249
- taskPrompt = processedPrompt;
2250
2276
  } catch (error) {
2251
2277
  debug4("Context store operation failed:", error);
2252
2278
  }
2253
2279
  }
2254
2280
  const cacheable = opt?.cacheable;
2255
2281
  const isVlmUiTars = (0, import_env2.vlLocateMode)() === "vlm-ui-tars";
2256
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2282
+ let contextData;
2283
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2284
+ try {
2285
+ contextData = this.contextStore.getAllData();
2286
+ if (contextData && Object.keys(contextData).length === 0) {
2287
+ contextData = void 0;
2288
+ }
2289
+ } catch (error) {
2290
+ debug4("Failed to get context data for cache:", error);
2291
+ }
2292
+ }
2293
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2257
2294
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2258
2295
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2259
2296
  taskPrompt,
@@ -2263,6 +2300,28 @@ var PageAgent = class {
2263
2300
  debug4("matched cache, will call .runYaml to run the action");
2264
2301
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2265
2302
  const result = await this.runYaml(yaml5);
2303
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2304
+ try {
2305
+ const executionResult = {
2306
+ success: true,
2307
+ actionType: "cached",
2308
+ description: `Executed cached action: ${processedPrompt}`,
2309
+ timing: result.metadata?.totalTime
2310
+ };
2311
+ this.contextStore.addStep({
2312
+ type: "action",
2313
+ summary: `Action: ${processedPrompt} (cached)`,
2314
+ prompt: processedPrompt,
2315
+ executionResult
2316
+ });
2317
+ debug4("Added cached action step to context store:", {
2318
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2319
+ totalSteps: this.contextStore.getRecentSteps(100).length
2320
+ });
2321
+ } catch (error) {
2322
+ debug4("Failed to add cached action step:", error);
2323
+ }
2324
+ }
2266
2325
  return {
2267
2326
  result: result.result,
2268
2327
  metadata: metadata2
@@ -2287,10 +2346,39 @@ var PageAgent = class {
2287
2346
  prompt: taskPrompt,
2288
2347
  yamlWorkflow: yamlFlowStr
2289
2348
  },
2290
- matchedCache
2349
+ matchedCache,
2350
+ contextData
2351
+ // Pass context data for cache creation
2291
2352
  );
2292
2353
  }
2293
2354
  const metadata = this.afterTaskRunning(executor);
2355
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2356
+ try {
2357
+ const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2358
+ this.contextStore.addStep({
2359
+ type: "action",
2360
+ summary: `Action: ${processedPrompt}`,
2361
+ prompt: processedPrompt,
2362
+ executionResult
2363
+ });
2364
+ debug4("Added action step with execution result to context store:", {
2365
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2366
+ totalSteps: this.contextStore.getRecentSteps(100).length,
2367
+ executionResult
2368
+ });
2369
+ } catch (error) {
2370
+ debug4("Failed to analyze execution results, adding step without execution result:", error);
2371
+ try {
2372
+ this.contextStore.addStep({
2373
+ type: "action",
2374
+ summary: `Action: ${processedPrompt}`,
2375
+ prompt: processedPrompt
2376
+ });
2377
+ } catch (stepError) {
2378
+ debug4("Failed to add action step:", stepError);
2379
+ }
2380
+ }
2381
+ }
2294
2382
  return {
2295
2383
  result: output,
2296
2384
  metadata
@@ -2475,23 +2563,40 @@ var PageAgent = class {
2475
2563
  };
2476
2564
  }
2477
2565
  async aiAssert(assertion, msg, opt) {
2478
- let processedAssertion = assertion;
2566
+ let executionContext = "";
2479
2567
  if (this.opts.enableCumulativeContext && this.contextStore) {
2480
2568
  try {
2481
- const originalAssertion = assertion;
2482
- processedAssertion = this.contextStore.replaceAllReferences(assertion, "assertion");
2483
- if (originalAssertion !== processedAssertion) {
2484
- debug4("Context replacement in aiAssert:", {
2485
- original: originalAssertion,
2486
- processed: processedAssertion,
2487
- context: "assertion",
2488
- storedData: this.contextStore.getAllData()
2569
+ const recentSteps = this.contextStore.getRecentSteps(3);
2570
+ const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2571
+ const storedData = this.contextStore.getAllData();
2572
+ if (stepsWithExecutionResults.length > 0) {
2573
+ const recentActions = stepsWithExecutionResults.map((step) => {
2574
+ const result = step.executionResult;
2575
+ return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2576
+ }).join("\n");
2577
+ executionContext = `
2578
+
2579
+ Recent actions performed:
2580
+ ${recentActions}
2581
+
2582
+ This context may help verify the assertion.`;
2583
+ }
2584
+ if (storedData && Object.keys(storedData).length > 0) {
2585
+ executionContext += `
2586
+
2587
+ Available data for reference:
2588
+ ${JSON.stringify(storedData, null, 2)}
2589
+
2590
+ Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2591
+ debug4("Available data for aiAssert:", {
2592
+ assertion,
2593
+ availableData: storedData
2489
2594
  });
2490
2595
  }
2491
2596
  this.contextStore.addStep({
2492
2597
  type: "assertion",
2493
- summary: `Assertion: ${processedAssertion}`,
2494
- prompt: processedAssertion
2598
+ summary: `Assertion: ${assertion}`,
2599
+ prompt: assertion
2495
2600
  });
2496
2601
  debug4("Added assertion step to context store:", {
2497
2602
  totalSteps: this.contextStore.getRecentSteps(100).length
@@ -2507,7 +2612,13 @@ var PageAgent = class {
2507
2612
  } catch (e) {
2508
2613
  }
2509
2614
  }
2510
- const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2615
+ let assertionWithContext = assertion;
2616
+ if (currentUrl) {
2617
+ assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2618
+ }
2619
+ if (executionContext) {
2620
+ assertionWithContext += executionContext;
2621
+ }
2511
2622
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2512
2623
  const metadata = this.afterTaskRunning(executor, true);
2513
2624
  if (output && opt?.keepRawResponse) {
@@ -2721,6 +2832,79 @@ ${errors}`);
2721
2832
  async destroy() {
2722
2833
  await this.page.destroy();
2723
2834
  }
2835
+ /**
2836
+ * Analyze execution results from executor to generate meaningful descriptions
2837
+ */
2838
+ analyzeExecutionResults(executor, originalPrompt) {
2839
+ const tasks = executor.tasks;
2840
+ const success = !executor.isInErrorState();
2841
+ if (!success) {
2842
+ const errorTask = executor.latestErrorTask();
2843
+ return {
2844
+ success: false,
2845
+ actionType: "error",
2846
+ description: `Failed to execute: ${originalPrompt}`,
2847
+ error: errorTask?.error
2848
+ };
2849
+ }
2850
+ const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2851
+ const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2852
+ const lastAction = actionTasks[actionTasks.length - 1];
2853
+ const lastLocate = locateTasks[locateTasks.length - 1];
2854
+ if (!lastAction) {
2855
+ return {
2856
+ success: true,
2857
+ actionType: "unknown",
2858
+ description: `Completed: ${originalPrompt}`
2859
+ };
2860
+ }
2861
+ const actionType = lastAction.subType || "unknown";
2862
+ const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2863
+ const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2864
+ return {
2865
+ success: true,
2866
+ actionType,
2867
+ description,
2868
+ elementInfo,
2869
+ timing: lastAction.timing?.cost
2870
+ };
2871
+ }
2872
+ /**
2873
+ * Extract element information from locate task
2874
+ */
2875
+ extractElementInfo(locateTask, _actionTask) {
2876
+ if (!locateTask?.output?.element)
2877
+ return void 0;
2878
+ const element = locateTask.output.element;
2879
+ return {
2880
+ type: element.attributes?.nodeType || "unknown",
2881
+ text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2882
+ location: `(${element.center[0]}, ${element.center[1]})`
2883
+ };
2884
+ }
2885
+ /**
2886
+ * Generate natural language description for actions
2887
+ */
2888
+ generateActionDescription(actionType, param, elementInfo) {
2889
+ const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2890
+ switch (actionType) {
2891
+ case "Tap":
2892
+ return `Clicked on ${elementDesc}`;
2893
+ case "Input":
2894
+ const inputValue = param?.value || "";
2895
+ return `Entered "${inputValue}" into ${elementDesc}`;
2896
+ case "KeyboardPress":
2897
+ return `Pressed ${param?.value || "key"}`;
2898
+ case "Scroll":
2899
+ return `Scrolled ${param?.direction || "on page"}`;
2900
+ case "Hover":
2901
+ return `Hovered over ${elementDesc}`;
2902
+ case "Drag":
2903
+ return `Dragged ${elementDesc}`;
2904
+ default:
2905
+ return `Performed ${actionType} action on ${elementDesc}`;
2906
+ }
2907
+ }
2724
2908
  };
2725
2909
 
2726
2910
  // src/puppeteer/index.ts