misoai-web 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/LICENSE +21 -21
  2. package/README.md +352 -352
  3. package/bin/midscene-playground +2 -2
  4. package/dist/es/agent.js +247 -63
  5. package/dist/es/agent.js.map +1 -1
  6. package/dist/es/bridge-mode-browser.js.map +1 -1
  7. package/dist/es/bridge-mode.js +247 -63
  8. package/dist/es/bridge-mode.js.map +1 -1
  9. package/dist/es/chrome-extension.js +247 -63
  10. package/dist/es/chrome-extension.js.map +1 -1
  11. package/dist/es/index.js +247 -63
  12. package/dist/es/index.js.map +1 -1
  13. package/dist/es/midscene-playground.js +247 -63
  14. package/dist/es/midscene-playground.js.map +1 -1
  15. package/dist/es/midscene-server.js.map +1 -1
  16. package/dist/es/playground.js +247 -63
  17. package/dist/es/playground.js.map +1 -1
  18. package/dist/es/playwright-report.js.map +1 -1
  19. package/dist/es/playwright.js +247 -63
  20. package/dist/es/playwright.js.map +1 -1
  21. package/dist/es/puppeteer-agent-launcher.js +247 -63
  22. package/dist/es/puppeteer-agent-launcher.js.map +1 -1
  23. package/dist/es/puppeteer.js +247 -63
  24. package/dist/es/puppeteer.js.map +1 -1
  25. package/dist/es/ui-utils.js.map +1 -1
  26. package/dist/es/utils.js.map +1 -1
  27. package/dist/es/yaml.js.map +1 -1
  28. package/dist/lib/agent.js +247 -63
  29. package/dist/lib/agent.js.map +1 -1
  30. package/dist/lib/bridge-mode-browser.js.map +1 -1
  31. package/dist/lib/bridge-mode.js +247 -63
  32. package/dist/lib/bridge-mode.js.map +1 -1
  33. package/dist/lib/chrome-extension.js +247 -63
  34. package/dist/lib/chrome-extension.js.map +1 -1
  35. package/dist/lib/index.js +247 -63
  36. package/dist/lib/index.js.map +1 -1
  37. package/dist/lib/midscene-playground.js +247 -63
  38. package/dist/lib/midscene-playground.js.map +1 -1
  39. package/dist/lib/midscene-server.js.map +1 -1
  40. package/dist/lib/playground.js +247 -63
  41. package/dist/lib/playground.js.map +1 -1
  42. package/dist/lib/playwright-report.js.map +1 -1
  43. package/dist/lib/playwright.js +247 -63
  44. package/dist/lib/playwright.js.map +1 -1
  45. package/dist/lib/puppeteer-agent-launcher.js +247 -63
  46. package/dist/lib/puppeteer-agent-launcher.js.map +1 -1
  47. package/dist/lib/puppeteer.js +247 -63
  48. package/dist/lib/puppeteer.js.map +1 -1
  49. package/dist/lib/ui-utils.js.map +1 -1
  50. package/dist/lib/utils.js.map +1 -1
  51. package/dist/lib/yaml.js.map +1 -1
  52. package/dist/types/agent.d.ts +19 -4
  53. package/dist/types/bridge-mode-browser.d.ts +2 -2
  54. package/dist/types/bridge-mode.d.ts +2 -2
  55. package/dist/types/{browser-a1877d18.d.ts → browser-aec1055d.d.ts} +1 -1
  56. package/dist/types/chrome-extension.d.ts +2 -2
  57. package/dist/types/index.d.ts +1 -1
  58. package/dist/types/midscene-server.d.ts +1 -1
  59. package/dist/types/{page-663ece08.d.ts → page-86ab0fe1.d.ts} +34 -34
  60. package/dist/types/playground.d.ts +2 -2
  61. package/dist/types/playwright.d.ts +1 -1
  62. package/dist/types/puppeteer-agent-launcher.d.ts +1 -1
  63. package/dist/types/puppeteer.d.ts +1 -1
  64. package/dist/types/utils.d.ts +1 -1
  65. package/dist/types/yaml.d.ts +1 -1
  66. package/package.json +3 -3
@@ -1,3 +1,3 @@
1
- #!/usr/bin/env node
2
-
1
+ #!/usr/bin/env node
2
+
3
3
  require('../dist/lib/midscene-playground.js');
package/dist/es/agent.js CHANGED
@@ -1688,44 +1688,70 @@ var TaskCache = class {
1688
1688
  this.cache = cacheContent;
1689
1689
  this.cacheOriginalLength = this.cache.caches.length;
1690
1690
  }
1691
- matchCache(prompt, type) {
1691
+ matchCache(prompt, type, contextData) {
1692
+ const contextHash = contextData ? this.generateContextHash(contextData) : void 0;
1692
1693
  for (let i = 0; i < this.cacheOriginalLength; i++) {
1693
1694
  const item = this.cache.caches[i];
1694
1695
  const key = `${type}:${prompt}:${i}`;
1695
- if (item.type === type && item.prompt === prompt && !this.matchedCacheIndices.has(key)) {
1696
- this.matchedCacheIndices.add(key);
1697
- debug3(
1698
- "cache found and marked as used, type: %s, prompt: %s, index: %d",
1699
- type,
1700
- prompt,
1701
- i
1702
- );
1703
- return {
1704
- cacheContent: item,
1705
- updateFn: (cb) => {
1706
- debug3(
1707
- "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1708
- type,
1709
- prompt,
1710
- i
1711
- );
1712
- cb(item);
1713
- debug3(
1714
- "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1715
- type,
1716
- prompt,
1717
- i
1718
- );
1719
- this.flushCacheToFile();
1696
+ if (item.type !== type || item.prompt !== prompt || this.matchedCacheIndices.has(key)) {
1697
+ continue;
1698
+ }
1699
+ if (type === "plan" && item.type === "plan") {
1700
+ const planItem = item;
1701
+ if (contextHash && planItem.contextHash) {
1702
+ if (contextHash !== planItem.contextHash) {
1703
+ debug3("cache context mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1704
+ continue;
1720
1705
  }
1721
- };
1706
+ } else if (contextHash || planItem.contextHash) {
1707
+ debug3("cache context availability mismatch, type: %s, prompt: %s, index: %d", type, prompt, i);
1708
+ continue;
1709
+ }
1722
1710
  }
1711
+ this.matchedCacheIndices.add(key);
1712
+ debug3(
1713
+ "cache found and marked as used, type: %s, prompt: %s, index: %d, contextMatch: %s",
1714
+ type,
1715
+ prompt,
1716
+ i,
1717
+ contextHash ? "yes" : "no-context"
1718
+ );
1719
+ return {
1720
+ cacheContent: item,
1721
+ updateFn: (cb) => {
1722
+ debug3(
1723
+ "will call updateFn to update cache, type: %s, prompt: %s, index: %d",
1724
+ type,
1725
+ prompt,
1726
+ i
1727
+ );
1728
+ cb(item);
1729
+ debug3(
1730
+ "cache updated, will flush to file, type: %s, prompt: %s, index: %d",
1731
+ type,
1732
+ prompt,
1733
+ i
1734
+ );
1735
+ this.flushCacheToFile();
1736
+ }
1737
+ };
1723
1738
  }
1724
- debug3("no unused cache found, type: %s, prompt: %s", type, prompt);
1739
+ debug3("no unused cache found, type: %s, prompt: %s, contextHash: %s", type, prompt, contextHash);
1725
1740
  return void 0;
1726
1741
  }
1727
- matchPlanCache(prompt) {
1728
- return this.matchCache(prompt, "plan");
1742
+ generateContextHash(contextData) {
1743
+ const sortedKeys = Object.keys(contextData).sort();
1744
+ const stableString = sortedKeys.map((key) => `${key}:${JSON.stringify(contextData[key])}`).join("|");
1745
+ let hash = 0;
1746
+ for (let i = 0; i < stableString.length; i++) {
1747
+ const char = stableString.charCodeAt(i);
1748
+ hash = (hash << 5) - hash + char;
1749
+ hash = hash & hash;
1750
+ }
1751
+ return hash.toString(36);
1752
+ }
1753
+ matchPlanCache(prompt, contextData) {
1754
+ return this.matchCache(prompt, "plan", contextData);
1729
1755
  }
1730
1756
  matchLocateCache(prompt) {
1731
1757
  return this.matchCache(prompt, "locate");
@@ -1801,11 +1827,16 @@ cache file: ${cacheFile}`
1801
1827
  );
1802
1828
  }
1803
1829
  }
1804
- updateOrAppendCacheRecord(newRecord, cachedRecord) {
1830
+ updateOrAppendCacheRecord(newRecord, cachedRecord, contextData) {
1805
1831
  if (cachedRecord) {
1806
1832
  if (newRecord.type === "plan") {
1807
1833
  cachedRecord.updateFn((cache) => {
1808
- cache.yamlWorkflow = newRecord.yamlWorkflow;
1834
+ const planCache = cache;
1835
+ planCache.yamlWorkflow = newRecord.yamlWorkflow;
1836
+ if (contextData) {
1837
+ planCache.contextHash = this.generateContextHash(contextData);
1838
+ planCache.contextData = { ...contextData };
1839
+ }
1809
1840
  });
1810
1841
  } else {
1811
1842
  cachedRecord.updateFn((cache) => {
@@ -1813,6 +1844,11 @@ cache file: ${cacheFile}`
1813
1844
  });
1814
1845
  }
1815
1846
  } else {
1847
+ if (newRecord.type === "plan" && contextData) {
1848
+ const planRecord = newRecord;
1849
+ planRecord.contextHash = this.generateContextHash(contextData);
1850
+ planRecord.contextData = { ...contextData };
1851
+ }
1816
1852
  this.appendCache(newRecord);
1817
1853
  }
1818
1854
  }
@@ -2173,34 +2209,35 @@ var PageAgent = class {
2173
2209
  };
2174
2210
  }
2175
2211
  async aiAction(taskPrompt, opt) {
2212
+ const originalPrompt = taskPrompt;
2213
+ let processedPrompt = taskPrompt;
2176
2214
  if (this.opts.enableCumulativeContext && this.contextStore) {
2177
2215
  try {
2178
- const originalPrompt = taskPrompt;
2179
- const processedPrompt = this.contextStore.replaceAllReferences(taskPrompt, "action");
2180
- if (originalPrompt !== processedPrompt) {
2181
- debug4("Context replacement in aiAction:", {
2182
- original: originalPrompt,
2183
- processed: processedPrompt,
2184
- storedData: this.contextStore.getAllData()
2216
+ const storedData = this.contextStore.getAllData();
2217
+ if (Object.keys(storedData).length > 0) {
2218
+ debug4("Available data for aiAction:", {
2219
+ prompt: taskPrompt,
2220
+ availableData: storedData
2185
2221
  });
2186
2222
  }
2187
- this.contextStore.addStep({
2188
- type: "action",
2189
- summary: `Action: ${processedPrompt}`,
2190
- prompt: processedPrompt
2191
- });
2192
- debug4("Added action step to context store:", {
2193
- stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2194
- totalSteps: this.contextStore.getRecentSteps(100).length
2195
- });
2196
- taskPrompt = processedPrompt;
2197
2223
  } catch (error) {
2198
2224
  debug4("Context store operation failed:", error);
2199
2225
  }
2200
2226
  }
2201
2227
  const cacheable = opt?.cacheable;
2202
2228
  const isVlmUiTars = vlLocateMode() === "vlm-ui-tars";
2203
- const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt);
2229
+ let contextData;
2230
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2231
+ try {
2232
+ contextData = this.contextStore.getAllData();
2233
+ if (contextData && Object.keys(contextData).length === 0) {
2234
+ contextData = void 0;
2235
+ }
2236
+ } catch (error) {
2237
+ debug4("Failed to get context data for cache:", error);
2238
+ }
2239
+ }
2240
+ const matchedCache = isVlmUiTars || cacheable === false ? void 0 : this.taskCache?.matchPlanCache(taskPrompt, contextData);
2204
2241
  if (matchedCache && this.taskCache?.isCacheResultUsed) {
2205
2242
  const { executor: executor2 } = await this.taskExecutor.loadYamlFlowAsPlanning(
2206
2243
  taskPrompt,
@@ -2210,6 +2247,28 @@ var PageAgent = class {
2210
2247
  debug4("matched cache, will call .runYaml to run the action");
2211
2248
  const yaml5 = matchedCache.cacheContent?.yamlWorkflow;
2212
2249
  const result = await this.runYaml(yaml5);
2250
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2251
+ try {
2252
+ const executionResult = {
2253
+ success: true,
2254
+ actionType: "cached",
2255
+ description: `Executed cached action: ${processedPrompt}`,
2256
+ timing: result.metadata?.totalTime
2257
+ };
2258
+ this.contextStore.addStep({
2259
+ type: "action",
2260
+ summary: `Action: ${processedPrompt} (cached)`,
2261
+ prompt: processedPrompt,
2262
+ executionResult
2263
+ });
2264
+ debug4("Added cached action step to context store:", {
2265
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2266
+ totalSteps: this.contextStore.getRecentSteps(100).length
2267
+ });
2268
+ } catch (error) {
2269
+ debug4("Failed to add cached action step:", error);
2270
+ }
2271
+ }
2213
2272
  return {
2214
2273
  result: result.result,
2215
2274
  metadata: metadata2
@@ -2234,10 +2293,39 @@ var PageAgent = class {
2234
2293
  prompt: taskPrompt,
2235
2294
  yamlWorkflow: yamlFlowStr
2236
2295
  },
2237
- matchedCache
2296
+ matchedCache,
2297
+ contextData
2298
+ // Pass context data for cache creation
2238
2299
  );
2239
2300
  }
2240
2301
  const metadata = this.afterTaskRunning(executor);
2302
+ if (this.opts.enableCumulativeContext && this.contextStore) {
2303
+ try {
2304
+ const executionResult = this.analyzeExecutionResults(executor, originalPrompt);
2305
+ this.contextStore.addStep({
2306
+ type: "action",
2307
+ summary: `Action: ${processedPrompt}`,
2308
+ prompt: processedPrompt,
2309
+ executionResult
2310
+ });
2311
+ debug4("Added action step with execution result to context store:", {
2312
+ stepNumber: this.contextStore.getRecentSteps(1)[0]?.stepNumber,
2313
+ totalSteps: this.contextStore.getRecentSteps(100).length,
2314
+ executionResult
2315
+ });
2316
+ } catch (error) {
2317
+ debug4("Failed to analyze execution results, adding step without execution result:", error);
2318
+ try {
2319
+ this.contextStore.addStep({
2320
+ type: "action",
2321
+ summary: `Action: ${processedPrompt}`,
2322
+ prompt: processedPrompt
2323
+ });
2324
+ } catch (stepError) {
2325
+ debug4("Failed to add action step:", stepError);
2326
+ }
2327
+ }
2328
+ }
2241
2329
  return {
2242
2330
  result: output,
2243
2331
  metadata
@@ -2422,23 +2510,40 @@ var PageAgent = class {
2422
2510
  };
2423
2511
  }
2424
2512
  async aiAssert(assertion, msg, opt) {
2425
- let processedAssertion = assertion;
2513
+ let executionContext = "";
2426
2514
  if (this.opts.enableCumulativeContext && this.contextStore) {
2427
2515
  try {
2428
- const originalAssertion = assertion;
2429
- processedAssertion = this.contextStore.replaceAllReferences(assertion, "assertion");
2430
- if (originalAssertion !== processedAssertion) {
2431
- debug4("Context replacement in aiAssert:", {
2432
- original: originalAssertion,
2433
- processed: processedAssertion,
2434
- context: "assertion",
2435
- storedData: this.contextStore.getAllData()
2516
+ const recentSteps = this.contextStore.getRecentSteps(3);
2517
+ const stepsWithExecutionResults = recentSteps.filter((step) => step.executionResult);
2518
+ const storedData = this.contextStore.getAllData();
2519
+ if (stepsWithExecutionResults.length > 0) {
2520
+ const recentActions = stepsWithExecutionResults.map((step) => {
2521
+ const result = step.executionResult;
2522
+ return `- ${result.description}${result.success ? "" : " (FAILED)"}`;
2523
+ }).join("\n");
2524
+ executionContext = `
2525
+
2526
+ Recent actions performed:
2527
+ ${recentActions}
2528
+
2529
+ This context may help verify the assertion.`;
2530
+ }
2531
+ if (storedData && Object.keys(storedData).length > 0) {
2532
+ executionContext += `
2533
+
2534
+ Available data for reference:
2535
+ ${JSON.stringify(storedData, null, 2)}
2536
+
2537
+ Note: If the assertion references any data keys or natural language equivalents, consider the stored values when verifying.`;
2538
+ debug4("Available data for aiAssert:", {
2539
+ assertion,
2540
+ availableData: storedData
2436
2541
  });
2437
2542
  }
2438
2543
  this.contextStore.addStep({
2439
2544
  type: "assertion",
2440
- summary: `Assertion: ${processedAssertion}`,
2441
- prompt: processedAssertion
2545
+ summary: `Assertion: ${assertion}`,
2546
+ prompt: assertion
2442
2547
  });
2443
2548
  debug4("Added assertion step to context store:", {
2444
2549
  totalSteps: this.contextStore.getRecentSteps(100).length
@@ -2454,7 +2559,13 @@ var PageAgent = class {
2454
2559
  } catch (e) {
2455
2560
  }
2456
2561
  }
2457
- const assertionWithContext = currentUrl ? `For the page at URL "${currentUrl}", ${processedAssertion}` : processedAssertion;
2562
+ let assertionWithContext = assertion;
2563
+ if (currentUrl) {
2564
+ assertionWithContext = `For the page at URL "${currentUrl}", ${assertion}`;
2565
+ }
2566
+ if (executionContext) {
2567
+ assertionWithContext += executionContext;
2568
+ }
2458
2569
  const { output, executor } = await this.taskExecutor.assert(assertionWithContext);
2459
2570
  const metadata = this.afterTaskRunning(executor, true);
2460
2571
  if (output && opt?.keepRawResponse) {
@@ -2668,6 +2779,79 @@ ${errors}`);
2668
2779
  async destroy() {
2669
2780
  await this.page.destroy();
2670
2781
  }
2782
+ /**
2783
+ * Analyze execution results from executor to generate meaningful descriptions
2784
+ */
2785
+ analyzeExecutionResults(executor, originalPrompt) {
2786
+ const tasks = executor.tasks;
2787
+ const success = !executor.isInErrorState();
2788
+ if (!success) {
2789
+ const errorTask = executor.latestErrorTask();
2790
+ return {
2791
+ success: false,
2792
+ actionType: "error",
2793
+ description: `Failed to execute: ${originalPrompt}`,
2794
+ error: errorTask?.error
2795
+ };
2796
+ }
2797
+ const actionTasks = tasks.filter((t) => t.type === "Action" && t.status === "finished");
2798
+ const locateTasks = tasks.filter((t) => t.type === "Insight" && t.subType === "Locate");
2799
+ const lastAction = actionTasks[actionTasks.length - 1];
2800
+ const lastLocate = locateTasks[locateTasks.length - 1];
2801
+ if (!lastAction) {
2802
+ return {
2803
+ success: true,
2804
+ actionType: "unknown",
2805
+ description: `Completed: ${originalPrompt}`
2806
+ };
2807
+ }
2808
+ const actionType = lastAction.subType || "unknown";
2809
+ const elementInfo = this.extractElementInfo(lastLocate, lastAction);
2810
+ const description = this.generateActionDescription(actionType, lastAction.param, elementInfo);
2811
+ return {
2812
+ success: true,
2813
+ actionType,
2814
+ description,
2815
+ elementInfo,
2816
+ timing: lastAction.timing?.cost
2817
+ };
2818
+ }
2819
+ /**
2820
+ * Extract element information from locate task
2821
+ */
2822
+ extractElementInfo(locateTask, _actionTask) {
2823
+ if (!locateTask?.output?.element)
2824
+ return void 0;
2825
+ const element = locateTask.output.element;
2826
+ return {
2827
+ type: element.attributes?.nodeType || "unknown",
2828
+ text: element.content || element.attributes?.placeholder || element.attributes?.title || "",
2829
+ location: `(${element.center[0]}, ${element.center[1]})`
2830
+ };
2831
+ }
2832
+ /**
2833
+ * Generate natural language description for actions
2834
+ */
2835
+ generateActionDescription(actionType, param, elementInfo) {
2836
+ const elementDesc = elementInfo ? `'${elementInfo.text || elementInfo.type}' element` : "element";
2837
+ switch (actionType) {
2838
+ case "Tap":
2839
+ return `Clicked on ${elementDesc}`;
2840
+ case "Input":
2841
+ const inputValue = param?.value || "";
2842
+ return `Entered "${inputValue}" into ${elementDesc}`;
2843
+ case "KeyboardPress":
2844
+ return `Pressed ${param?.value || "key"}`;
2845
+ case "Scroll":
2846
+ return `Scrolled ${param?.direction || "on page"}`;
2847
+ case "Hover":
2848
+ return `Hovered over ${elementDesc}`;
2849
+ case "Drag":
2850
+ return `Dragged ${elementDesc}`;
2851
+ default:
2852
+ return `Performed ${actionType} action on ${elementDesc}`;
2853
+ }
2854
+ }
2671
2855
  };
2672
2856
  export {
2673
2857
  PageAgent