testdriverai 7.0.0 → 7.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. package/.env.example +2 -0
  2. package/.github/workflows/linux-tests.yml +28 -0
  3. package/README.md +126 -0
  4. package/agent/index.js +7 -9
  5. package/agent/interface.js +13 -2
  6. package/agent/lib/commands.js +795 -136
  7. package/agent/lib/redraw.js +124 -39
  8. package/agent/lib/sandbox.js +40 -3
  9. package/agent/lib/sdk.js +21 -0
  10. package/agent/lib/valid-version.js +2 -2
  11. package/debugger/index.html +1 -1
  12. package/docs/docs.json +86 -71
  13. package/docs/guide/best-practices-polling.mdx +154 -0
  14. package/docs/v6/getting-started/self-hosting.mdx +3 -2
  15. package/docs/v7/_drafts/agents.mdx +852 -0
  16. package/docs/v7/_drafts/auto-cache-key.mdx +167 -0
  17. package/docs/v7/_drafts/best-practices.mdx +486 -0
  18. package/docs/v7/_drafts/caching-ai.mdx +215 -0
  19. package/docs/v7/_drafts/caching-selectors.mdx +400 -0
  20. package/docs/v7/_drafts/caching.mdx +366 -0
  21. package/docs/v7/_drafts/cli-to-sdk-migration.mdx +425 -0
  22. package/docs/v7/_drafts/core.mdx +459 -0
  23. package/docs/v7/_drafts/dashcam-title-feature.mdx +89 -0
  24. package/docs/v7/_drafts/debugging.mdx +349 -0
  25. package/docs/v7/_drafts/error-handling.mdx +501 -0
  26. package/docs/v7/_drafts/faq.mdx +393 -0
  27. package/docs/v7/_drafts/hooks.mdx +360 -0
  28. package/docs/v7/_drafts/implementation-plan.mdx +994 -0
  29. package/docs/v7/_drafts/init-command.mdx +95 -0
  30. package/docs/v7/_drafts/optimal-sdk-design.mdx +1348 -0
  31. package/docs/v7/_drafts/performance.mdx +517 -0
  32. package/docs/v7/_drafts/presets.mdx +210 -0
  33. package/docs/v7/_drafts/progressive-disclosure.mdx +230 -0
  34. package/docs/v7/_drafts/provision.mdx +266 -0
  35. package/docs/{QUICK_START_TEST_RECORDING.md → v7/_drafts/quick-start-test-recording.mdx} +3 -3
  36. package/docs/v7/_drafts/sdk-v7-complete.mdx +345 -0
  37. package/docs/v7/{guides → _drafts}/self-hosting.mdx +1 -1
  38. package/docs/v7/_drafts/troubleshooting.mdx +526 -0
  39. package/docs/v7/_drafts/vitest-plugin.mdx +477 -0
  40. package/docs/v7/_drafts/vitest.mdx +535 -0
  41. package/docs/v7/api/{ai.mdx → act.mdx} +24 -24
  42. package/docs/v7/api/client.mdx +1 -1
  43. package/docs/v7/api/dashcam.mdx +497 -0
  44. package/docs/v7/api/doubleClick.mdx +102 -0
  45. package/docs/v7/api/elements.mdx +143 -41
  46. package/docs/v7/api/find.mdx +258 -0
  47. package/docs/v7/api/mouseDown.mdx +161 -0
  48. package/docs/v7/api/mouseUp.mdx +164 -0
  49. package/docs/v7/api/rightClick.mdx +123 -0
  50. package/docs/v7/api/type.mdx +51 -7
  51. package/docs/v7/features/ai-native.mdx +427 -0
  52. package/docs/v7/features/easy-to-write.mdx +351 -0
  53. package/docs/v7/features/enterprise.mdx +540 -0
  54. package/docs/v7/features/fast.mdx +424 -0
  55. package/docs/v7/features/observable.mdx +623 -0
  56. package/docs/v7/features/powerful.mdx +531 -0
  57. package/docs/v7/features/scalable.mdx +417 -0
  58. package/docs/v7/features/stable.mdx +514 -0
  59. package/docs/v7/getting-started/configuration.mdx +380 -0
  60. package/docs/v7/getting-started/generating-tests.mdx +525 -0
  61. package/docs/v7/getting-started/installation.mdx +486 -0
  62. package/docs/v7/getting-started/quickstart.mdx +320 -141
  63. package/docs/v7/getting-started/running-and-debugging.mdx +511 -0
  64. package/docs/v7/getting-started/setting-up-in-ci.mdx +612 -0
  65. package/docs/v7/getting-started/writing-tests.mdx +535 -0
  66. package/docs/v7/overview/what-is-testdriver.mdx +398 -0
  67. package/docs/v7/platforms/linux.mdx +308 -0
  68. package/docs/v7/platforms/macos.mdx +433 -0
  69. package/docs/v7/platforms/windows.mdx +430 -0
  70. package/docs/v7/playwright.mdx +3 -3
  71. package/docs/v7/presets/chrome-extension.mdx +223 -0
  72. package/docs/v7/presets/chrome.mdx +303 -0
  73. package/docs/v7/presets/electron.mdx +453 -0
  74. package/docs/v7/presets/vscode.mdx +417 -0
  75. package/docs/v7/presets/webapp.mdx +396 -0
  76. package/examples/run-tests-with-recording.sh +2 -2
  77. package/interfaces/cli/commands/init.js +358 -0
  78. package/interfaces/vitest-plugin.mjs +393 -103
  79. package/lib/core/Dashcam.js +506 -0
  80. package/lib/core/index.d.ts +150 -0
  81. package/lib/core/index.js +12 -0
  82. package/lib/presets/index.mjs +331 -0
  83. package/lib/vitest/hooks.d.ts +119 -0
  84. package/lib/vitest/hooks.mjs +316 -0
  85. package/lib/vitest/setup.mjs +44 -0
  86. package/package.json +13 -3
  87. package/sdk.d.ts +350 -44
  88. package/sdk.js +818 -105
  89. package/{self-hosted.yml → setup/aws/self-hosted.yml} +1 -1
  90. package/test/manual/test-console-logs.test.mjs +42 -0
  91. package/test/manual/test-init.sh +54 -0
  92. package/test/manual/test-provision-auth.mjs +22 -0
  93. package/test/testdriver/assert.test.mjs +41 -0
  94. package/test/testdriver/auto-cache-key-demo.test.mjs +56 -0
  95. package/test/testdriver/chrome-extension.test.mjs +89 -0
  96. package/{testdriver/acceptance-sdk → test/testdriver}/drag-and-drop.test.mjs +7 -19
  97. package/{testdriver/acceptance-sdk → test/testdriver}/element-not-found.test.mjs +6 -19
  98. package/{testdriver/acceptance-sdk → test/testdriver}/exec-js.test.mjs +6 -18
  99. package/{testdriver/acceptance-sdk → test/testdriver}/exec-output.test.mjs +9 -21
  100. package/{testdriver/acceptance-sdk → test/testdriver}/exec-pwsh.test.mjs +14 -26
  101. package/{testdriver/acceptance-sdk → test/testdriver}/focus-window.test.mjs +8 -20
  102. package/{testdriver/acceptance-sdk → test/testdriver}/formatted-logging.test.mjs +5 -20
  103. package/{testdriver/acceptance-sdk → test/testdriver}/hover-image.test.mjs +10 -19
  104. package/{testdriver/acceptance-sdk → test/testdriver}/hover-text-with-description.test.mjs +7 -19
  105. package/{testdriver/acceptance-sdk → test/testdriver}/hover-text.test.mjs +5 -19
  106. package/{testdriver/acceptance-sdk → test/testdriver}/match-image.test.mjs +7 -19
  107. package/{testdriver/acceptance-sdk → test/testdriver}/press-keys.test.mjs +5 -19
  108. package/{testdriver/acceptance-sdk → test/testdriver}/prompt.test.mjs +7 -19
  109. package/{testdriver/acceptance-sdk → test/testdriver}/scroll-keyboard.test.mjs +6 -20
  110. package/{testdriver/acceptance-sdk → test/testdriver}/scroll-until-image.test.mjs +6 -18
  111. package/test/testdriver/scroll-until-text.test.mjs +28 -0
  112. package/{testdriver/acceptance-sdk → test/testdriver}/scroll.test.mjs +12 -21
  113. package/test/testdriver/setup/lifecycleHelpers.mjs +262 -0
  114. package/{testdriver/acceptance-sdk → test/testdriver}/setup/testHelpers.mjs +25 -20
  115. package/test/testdriver/type.test.mjs +45 -0
  116. package/vitest.config.mjs +11 -56
  117. package/.github/dependabot.yml +0 -11
  118. package/.github/workflows/acceptance-linux.yml +0 -75
  119. package/.github/workflows/acceptance-sdk-tests.yml +0 -133
  120. package/.github/workflows/acceptance-tests.yml +0 -130
  121. package/.github/workflows/lint.yml +0 -27
  122. package/.github/workflows/publish-canary.yml +0 -40
  123. package/.github/workflows/publish-latest.yml +0 -61
  124. package/.github/workflows/test-install.yml +0 -29
  125. package/.vscode/extensions.json +0 -3
  126. package/.vscode/launch.json +0 -22
  127. package/.vscode/mcp.json +0 -9
  128. package/.vscode/settings.json +0 -14
  129. package/CODEOWNERS +0 -3
  130. package/MIGRATION.md +0 -389
  131. package/SDK_README.md +0 -1122
  132. package/_testdriver/acceptance/assert.yaml +0 -7
  133. package/_testdriver/acceptance/dashcam.yaml +0 -9
  134. package/_testdriver/acceptance/drag-and-drop.yaml +0 -49
  135. package/_testdriver/acceptance/embed.yaml +0 -9
  136. package/_testdriver/acceptance/exec-js.yaml +0 -29
  137. package/_testdriver/acceptance/exec-output.yaml +0 -43
  138. package/_testdriver/acceptance/exec-shell.yaml +0 -40
  139. package/_testdriver/acceptance/focus-window.yaml +0 -16
  140. package/_testdriver/acceptance/hover-image.yaml +0 -18
  141. package/_testdriver/acceptance/hover-text-with-description.yaml +0 -29
  142. package/_testdriver/acceptance/hover-text.yaml +0 -14
  143. package/_testdriver/acceptance/if-else.yaml +0 -31
  144. package/_testdriver/acceptance/match-image.yaml +0 -15
  145. package/_testdriver/acceptance/press-keys.yaml +0 -35
  146. package/_testdriver/acceptance/prompt.yaml +0 -11
  147. package/_testdriver/acceptance/remember.yaml +0 -27
  148. package/_testdriver/acceptance/screenshots/cart.png +0 -0
  149. package/_testdriver/acceptance/scroll-keyboard.yaml +0 -34
  150. package/_testdriver/acceptance/scroll-until-image.yaml +0 -26
  151. package/_testdriver/acceptance/scroll-until-text.yaml +0 -20
  152. package/_testdriver/acceptance/scroll.yaml +0 -33
  153. package/_testdriver/acceptance/snippets/login.yaml +0 -29
  154. package/_testdriver/acceptance/snippets/match-cart.yaml +0 -8
  155. package/_testdriver/acceptance/type.yaml +0 -29
  156. package/_testdriver/behavior/failure.yaml +0 -7
  157. package/_testdriver/behavior/hover-text.yaml +0 -13
  158. package/_testdriver/behavior/lifecycle/postrun.yaml +0 -10
  159. package/_testdriver/behavior/lifecycle/prerun.yaml +0 -8
  160. package/_testdriver/behavior/lifecycle/provision.yaml +0 -8
  161. package/_testdriver/behavior/secrets.yaml +0 -7
  162. package/_testdriver/edge-cases/dashcam-chrome.yaml +0 -8
  163. package/_testdriver/edge-cases/exec-pwsh-multiline.yaml +0 -10
  164. package/_testdriver/edge-cases/js-exception.yaml +0 -8
  165. package/_testdriver/edge-cases/js-promise.yaml +0 -19
  166. package/_testdriver/edge-cases/lifecycle/postrun.yaml +0 -10
  167. package/_testdriver/edge-cases/prompt-in-middle.yaml +0 -23
  168. package/_testdriver/edge-cases/prompt-nested.yaml +0 -7
  169. package/_testdriver/edge-cases/success-test.yaml +0 -9
  170. package/_testdriver/examples/android/example.yaml +0 -12
  171. package/_testdriver/examples/android/lifecycle/postrun.yaml +0 -11
  172. package/_testdriver/examples/android/lifecycle/provision.yaml +0 -47
  173. package/_testdriver/examples/android/readme.md +0 -7
  174. package/_testdriver/examples/chrome-extension/lifecycle/provision.yaml +0 -74
  175. package/_testdriver/examples/desktop/lifecycle/prerun.yaml +0 -0
  176. package/_testdriver/examples/desktop/lifecycle/provision.yaml +0 -64
  177. package/_testdriver/examples/vscode-extension/lifecycle/provision.yaml +0 -73
  178. package/_testdriver/examples/web/lifecycle/postrun.yaml +0 -7
  179. package/_testdriver/examples/web/lifecycle/prerun.yaml +0 -22
  180. package/_testdriver/lifecycle/postrun.yaml +0 -8
  181. package/_testdriver/lifecycle/prerun.yaml +0 -15
  182. package/_testdriver/lifecycle/provision.yaml +0 -25
  183. package/debug-screenshot-1763401388589.png +0 -0
  184. package/mcp-server/AI_GUIDELINES.md +0 -57
  185. package/scripts/view-test-results.mjs +0 -96
  186. package/styles/.vale-config/2-MDX.ini +0 -5
  187. package/styles/Microsoft/AMPM.yml +0 -9
  188. package/styles/Microsoft/Accessibility.yml +0 -30
  189. package/styles/Microsoft/Acronyms.yml +0 -64
  190. package/styles/Microsoft/Adverbs.yml +0 -272
  191. package/styles/Microsoft/Auto.yml +0 -11
  192. package/styles/Microsoft/Avoid.yml +0 -14
  193. package/styles/Microsoft/Contractions.yml +0 -50
  194. package/styles/Microsoft/Dashes.yml +0 -13
  195. package/styles/Microsoft/DateFormat.yml +0 -8
  196. package/styles/Microsoft/DateNumbers.yml +0 -40
  197. package/styles/Microsoft/DateOrder.yml +0 -8
  198. package/styles/Microsoft/Ellipses.yml +0 -9
  199. package/styles/Microsoft/FirstPerson.yml +0 -16
  200. package/styles/Microsoft/Foreign.yml +0 -13
  201. package/styles/Microsoft/Gender.yml +0 -8
  202. package/styles/Microsoft/GenderBias.yml +0 -42
  203. package/styles/Microsoft/GeneralURL.yml +0 -11
  204. package/styles/Microsoft/HeadingAcronyms.yml +0 -7
  205. package/styles/Microsoft/HeadingColons.yml +0 -8
  206. package/styles/Microsoft/HeadingPunctuation.yml +0 -13
  207. package/styles/Microsoft/Headings.yml +0 -28
  208. package/styles/Microsoft/Hyphens.yml +0 -14
  209. package/styles/Microsoft/Negative.yml +0 -13
  210. package/styles/Microsoft/Ordinal.yml +0 -13
  211. package/styles/Microsoft/OxfordComma.yml +0 -8
  212. package/styles/Microsoft/Passive.yml +0 -183
  213. package/styles/Microsoft/Percentages.yml +0 -7
  214. package/styles/Microsoft/Plurals.yml +0 -7
  215. package/styles/Microsoft/Quotes.yml +0 -7
  216. package/styles/Microsoft/RangeTime.yml +0 -13
  217. package/styles/Microsoft/Semicolon.yml +0 -8
  218. package/styles/Microsoft/SentenceLength.yml +0 -6
  219. package/styles/Microsoft/Spacing.yml +0 -8
  220. package/styles/Microsoft/Suspended.yml +0 -7
  221. package/styles/Microsoft/Terms.yml +0 -42
  222. package/styles/Microsoft/URLFormat.yml +0 -9
  223. package/styles/Microsoft/Units.yml +0 -16
  224. package/styles/Microsoft/Vocab.yml +0 -25
  225. package/styles/Microsoft/We.yml +0 -11
  226. package/styles/Microsoft/Wordiness.yml +0 -127
  227. package/styles/Microsoft/meta.json +0 -4
  228. package/styles/alex/Ablist.yml +0 -274
  229. package/styles/alex/Condescending.yml +0 -16
  230. package/styles/alex/Gendered.yml +0 -110
  231. package/styles/alex/LGBTQ.yml +0 -55
  232. package/styles/alex/OCD.yml +0 -10
  233. package/styles/alex/Press.yml +0 -12
  234. package/styles/alex/ProfanityLikely.yml +0 -1289
  235. package/styles/alex/ProfanityMaybe.yml +0 -282
  236. package/styles/alex/ProfanityUnlikely.yml +0 -251
  237. package/styles/alex/README.md +0 -27
  238. package/styles/alex/Race.yml +0 -85
  239. package/styles/alex/Suicide.yml +0 -26
  240. package/styles/alex/meta.json +0 -4
  241. package/styles/config/vocabularies/Docs/accept.txt +0 -47
  242. package/styles/config/vocabularies/Docs/reject.txt +0 -4
  243. package/styles/proselint/Airlinese.yml +0 -8
  244. package/styles/proselint/AnimalLabels.yml +0 -48
  245. package/styles/proselint/Annotations.yml +0 -9
  246. package/styles/proselint/Apologizing.yml +0 -8
  247. package/styles/proselint/Archaisms.yml +0 -52
  248. package/styles/proselint/But.yml +0 -8
  249. package/styles/proselint/Cliches.yml +0 -782
  250. package/styles/proselint/CorporateSpeak.yml +0 -30
  251. package/styles/proselint/Currency.yml +0 -5
  252. package/styles/proselint/Cursing.yml +0 -15
  253. package/styles/proselint/DateCase.yml +0 -7
  254. package/styles/proselint/DateMidnight.yml +0 -7
  255. package/styles/proselint/DateRedundancy.yml +0 -10
  256. package/styles/proselint/DateSpacing.yml +0 -7
  257. package/styles/proselint/DenizenLabels.yml +0 -52
  258. package/styles/proselint/Diacritical.yml +0 -95
  259. package/styles/proselint/GenderBias.yml +0 -45
  260. package/styles/proselint/GroupTerms.yml +0 -39
  261. package/styles/proselint/Hedging.yml +0 -8
  262. package/styles/proselint/Hyperbole.yml +0 -6
  263. package/styles/proselint/Jargon.yml +0 -11
  264. package/styles/proselint/LGBTOffensive.yml +0 -13
  265. package/styles/proselint/LGBTTerms.yml +0 -15
  266. package/styles/proselint/Malapropisms.yml +0 -8
  267. package/styles/proselint/Needless.yml +0 -358
  268. package/styles/proselint/Nonwords.yml +0 -38
  269. package/styles/proselint/Oxymorons.yml +0 -22
  270. package/styles/proselint/P-Value.yml +0 -6
  271. package/styles/proselint/RASSyndrome.yml +0 -30
  272. package/styles/proselint/README.md +0 -12
  273. package/styles/proselint/Skunked.yml +0 -13
  274. package/styles/proselint/Spelling.yml +0 -17
  275. package/styles/proselint/Typography.yml +0 -11
  276. package/styles/proselint/Uncomparables.yml +0 -50
  277. package/styles/proselint/Very.yml +0 -6
  278. package/styles/proselint/meta.json +0 -15
  279. package/styles/write-good/Cliches.yml +0 -702
  280. package/styles/write-good/E-Prime.yml +0 -32
  281. package/styles/write-good/Illusions.yml +0 -11
  282. package/styles/write-good/Passive.yml +0 -183
  283. package/styles/write-good/README.md +0 -27
  284. package/styles/write-good/So.yml +0 -5
  285. package/styles/write-good/ThereIs.yml +0 -6
  286. package/styles/write-good/TooWordy.yml +0 -221
  287. package/styles/write-good/Weasel.yml +0 -29
  288. package/styles/write-good/meta.json +0 -4
  289. package/test/mcp-example-test.yaml +0 -27
  290. package/test/test_parser.js +0 -47
  291. package/testdriver/acceptance-sdk/QUICK_REFERENCE.md +0 -61
  292. package/testdriver/acceptance-sdk/README.md +0 -128
  293. package/testdriver/acceptance-sdk/TEST_REPORTING.md +0 -245
  294. package/testdriver/acceptance-sdk/assert.test.mjs +0 -44
  295. package/testdriver/acceptance-sdk/scroll-until-text.test.mjs +0 -42
  296. package/testdriver/acceptance-sdk/setup/lifecycleHelpers.mjs +0 -239
  297. package/testdriver/acceptance-sdk/type-checking-demo.js +0 -49
  298. package/testdriver/acceptance-sdk/type.test.mjs +0 -84
  299. package/vale.ini +0 -18
  300. package/vitest.config.example.js +0 -19
  301. package/vitest.config.mjs.bak +0 -44
  302. /package/docs/{ARCHITECTURE.md → v7/_drafts/architecture.mdx} +0 -0
  303. /package/docs/{AWESOME_LOGS_QUICK_REF.md → v7/_drafts/awesome-logs-quick-ref.mdx} +0 -0
  304. /package/{CONTRIBUTING.md → docs/v7/_drafts/contributing.mdx} +0 -0
  305. /package/docs/v7/{guides → _drafts}/migration.mdx +0 -0
  306. /package/{PLUGIN_MIGRATION.md → docs/v7/_drafts/plugin-migration.mdx} +0 -0
  307. /package/{PROMPT_CACHE.md → docs/v7/_drafts/prompt-cache.mdx} +0 -0
  308. /package/docs/{SDK_AWESOME_LOGS.md → v7/_drafts/sdk-awesome-logs.mdx} +0 -0
  309. /package/docs/{sdk-browser-rendering.md → v7/_drafts/sdk-browser-rendering.mdx} +0 -0
  310. /package/{SDK_LOGGING.md → docs/v7/_drafts/sdk-logging.mdx} +0 -0
  311. /package/{SDK_MIGRATION.md → docs/v7/_drafts/sdk-migration.mdx} +0 -0
  312. /package/docs/{TEST_RECORDING.md → v7/_drafts/test-recording.mdx} +0 -0
  313. /package/docs/v7/{README.md → overview/readme.mdx} +0 -0
  314. /package/{debug-locate-response.js → test/manual/debug-locate-response.js} +0 -0
  315. /package/{test-find-api.js → test/manual/test-find-api.js} +0 -0
  316. /package/{test-prompt-cache.js → test/manual/test-prompt-cache.js} +0 -0
  317. /package/{test-sandbox-render.js → test/manual/test-sandbox-render.js} +0 -0
  318. /package/{test-sdk-methods.js → test/manual/test-sdk-methods.js} +0 -0
  319. /package/{test-sdk-refactor.js → test/manual/test-sdk-refactor.js} +0 -0
  320. /package/{test-stack-trace.mjs → test/manual/test-stack-trace.mjs} +0 -0
  321. /package/{verify-element-api.js → test/manual/verify-element-api.js} +0 -0
  322. /package/{verify-types.js → test/manual/verify-types.js} +0 -0
  323. /package/{testdriver/acceptance-sdk → test/testdriver}/setup/globalTeardown.mjs +0 -0
  324. /package/{testdriver/acceptance-sdk → test/testdriver}/setup/vitestSetup.mjs +0 -0
@@ -13,6 +13,21 @@ const { createRedraw } = require("./redraw.js");
13
13
 
14
14
  const { events } = require("../events.js");
15
15
 
16
+ /**
17
+ * Helper to detect if arguments are using object-based API or positional API
18
+ * @param {Array} args - The arguments passed to a command
19
+ * @param {Array<string>} knownKeys - Keys that would be present in object-based call
20
+ * @returns {boolean} True if using object-based API
21
+ */
22
+ const isObjectArgs = (args, knownKeys) => {
23
+ if (args.length === 0) return false;
24
+ if (args.length === 1 && typeof args[0] === 'object' && args[0] !== null && !Array.isArray(args[0])) {
25
+ // Check if it has at least one known key
26
+ return knownKeys.some(key => key in args[0]);
27
+ }
28
+ return false;
29
+ };
30
+
16
31
  /**
17
32
  * Error When a match is not found
18
33
  * these should be recoverable by --heal
@@ -36,6 +51,28 @@ class CommandError extends Error {
36
51
  }
37
52
  }
38
53
 
54
+ /**
55
+ * Extract redraw options from command options
56
+ * @param {Object} options - Command options that may contain redraw settings
57
+ * @returns {Object} Redraw options object
58
+ */
59
+ const extractRedrawOptions = (options = {}) => {
60
+ const redrawOpts = {};
61
+
62
+ // Support nested redraw object: { redraw: { enabled: false, diffThreshold: 0.5 } }
63
+ if (options.redraw && typeof options.redraw === 'object') {
64
+ return options.redraw;
65
+ }
66
+
67
+ // Support flat options for convenience
68
+ if ('redrawEnabled' in options) redrawOpts.enabled = options.redrawEnabled;
69
+ if ('redrawScreenRedraw' in options) redrawOpts.screenRedraw = options.redrawScreenRedraw;
70
+ if ('redrawNetworkMonitor' in options) redrawOpts.networkMonitor = options.redrawNetworkMonitor;
71
+ if ('redrawDiffThreshold' in options) redrawOpts.diffThreshold = options.redrawDiffThreshold;
72
+
73
+ return redrawOpts;
74
+ };
75
+
39
76
  // Factory function that creates commands with the provided emitter
40
77
  const createCommands = (
41
78
  emitter,
@@ -44,12 +81,16 @@ const createCommands = (
44
81
  config,
45
82
  sessionInstance,
46
83
  getCurrentFilePath,
47
- redrawThreshold = 0.1,
84
+ redrawThreshold = 0.01,
85
+ getDashcamElapsedTime = null,
48
86
  ) => {
49
87
  // Create SDK instance with emitter, config, and session
50
88
  const sdk = createSDK(emitter, config, sessionInstance);
51
- // Create redraw instance with the system
52
- const redraw = createRedraw(emitter, system, sandbox, redrawThreshold);
89
+ // Create redraw instance with the system - support both number and object for backward compatibility
90
+ const defaultRedrawOptions = typeof redrawThreshold === 'number'
91
+ ? { diffThreshold: redrawThreshold }
92
+ : redrawThreshold;
93
+ const redraw = createRedraw(emitter, system, sandbox, defaultRedrawOptions);
53
94
 
54
95
  // Helper method to resolve file paths relative to the current file
55
96
  const resolveRelativePath = (relativePath) => {
@@ -198,19 +239,59 @@ const createCommands = (
198
239
 
199
240
  emitter.emit(events.log.narration, `thinking...`);
200
241
 
242
+ const assertStartTime = Date.now();
201
243
  let response = await sdk.req("assert", {
202
244
  expect: assertion,
203
245
  image: await system.captureScreenBase64(),
204
246
  });
247
+ const assertDuration = Date.now() - assertStartTime;
248
+
249
+ // Determine if assertion passed or failed
250
+ const assertionPassed = response.data.indexOf("The task passed") > -1;
251
+
252
+ // Track interaction with success/failure
253
+ const sessionId = sessionInstance?.get();
254
+ if (sessionId) {
255
+ try {
256
+ await sandbox.send({
257
+ type: "trackInteraction",
258
+ interactionType: "assert",
259
+ session: sessionId,
260
+ prompt: assertion,
261
+ timestamp: assertStartTime,
262
+ duration: assertDuration,
263
+ success: assertionPassed,
264
+ error: assertionPassed ? undefined : response.data,
265
+ });
266
+ } catch (err) {
267
+ console.warn("Failed to track assert interaction:", err.message);
268
+ }
269
+ }
270
+
205
271
  return handleAssertResponse(response.data);
206
272
  };
207
- const scroll = async (direction = "down", amount = 300) => {
273
+
274
+ /**
275
+ * Scroll the screen in a direction
276
+ * @param {string} [direction='down'] - Direction to scroll ('up', 'down', 'left', 'right')
277
+ * @param {Object} [options] - Additional options
278
+ * @param {number} [options.amount=300] - Amount to scroll in pixels
279
+ * @param {Object} [options.redraw] - Redraw detection options
280
+ * @param {boolean} [options.redraw.enabled=true] - Enable/disable redraw detection
281
+ * @param {boolean} [options.redraw.screenRedraw=true] - Enable/disable screen redraw detection
282
+ * @param {boolean} [options.redraw.networkMonitor=true] - Enable/disable network monitoring
283
+ * @param {number} [options.redraw.diffThreshold=0.1] - Screen diff threshold percentage
284
+ */
285
+ const scroll = async (direction = 'down', options = {}) => {
286
+ let { amount = 300 } = options;
287
+ const redrawOptions = extractRedrawOptions(options);
288
+
208
289
  emitter.emit(
209
290
  events.log.narration,
210
291
  theme.dim(`scrolling ${direction} ${amount}px...`),
211
292
  );
212
293
 
213
- await redraw.start();
294
+ await redraw.start(redrawOptions);
214
295
 
215
296
  amount = parseInt(amount, 10);
216
297
 
@@ -222,7 +303,7 @@ const createCommands = (
222
303
  amount,
223
304
  direction,
224
305
  });
225
- await redraw.wait(2500);
306
+ await redraw.wait(2500, redrawOptions);
226
307
  break;
227
308
  case "down":
228
309
  await sandbox.send({
@@ -230,7 +311,7 @@ const createCommands = (
230
311
  amount,
231
312
  direction,
232
313
  });
233
- await redraw.wait(2500);
314
+ await redraw.wait(2500, redrawOptions);
234
315
  break;
235
316
  case "left":
236
317
  console.error("Not Supported");
@@ -251,91 +332,284 @@ const createCommands = (
251
332
  }
252
333
  };
253
334
 
254
- // perform a mouse click
255
- // click, right-click, double-click, hover
256
- const click = async (x, y, action = "click") => {
257
- await redraw.start();
335
+ /**
336
+ * Perform a mouse click action
337
+ * @param {Object|number} options - Options object or x coordinate (for backward compatibility)
338
+ * @param {number} options.x - X coordinate
339
+ * @param {number} options.y - Y coordinate
340
+ * @param {string} [options.action='click'] - Click action ('click', 'right-click', 'double-click', 'hover', 'mouseDown', 'mouseUp')
341
+ * @param {string} [options.prompt] - Prompt for tracking
342
+ * @param {boolean} [options.cacheHit] - Whether cache was hit
343
+ * @param {string} [options.selector] - Selector used
344
+ * @param {boolean} [options.selectorUsed] - Whether selector was used
345
+ * @param {Object} [options.redraw] - Redraw detection options
346
+ * @param {boolean} [options.redraw.enabled=true] - Enable/disable redraw detection
347
+ * @param {boolean} [options.redraw.screenRedraw=true] - Enable/disable screen redraw detection
348
+ * @param {boolean} [options.redraw.networkMonitor=true] - Enable/disable network monitoring
349
+ * @param {number} [options.redraw.diffThreshold=0.1] - Screen diff threshold percentage
350
+ */
351
+ const click = async (...args) => {
352
+ const clickStartTime = Date.now();
353
+ let x, y, action, elementData, redrawOptions;
354
+
355
+ // Handle both object and positional argument styles
356
+ if (isObjectArgs(args, ['x', 'y', 'action', 'prompt', 'cacheHit', 'selector'])) {
357
+ const { x: xPos, y: yPos, action: actionArg = 'click', redraw: redrawOpts, ...rest } = args[0];
358
+ x = xPos;
359
+ y = yPos;
360
+ action = actionArg;
361
+ elementData = rest;
362
+ redrawOptions = extractRedrawOptions({ redraw: redrawOpts, ...rest });
363
+ } else {
364
+ // Legacy positional: click(x, y, action, elementData)
365
+ [x, y, action = 'click', elementData = {}] = args;
366
+ redrawOptions = extractRedrawOptions(elementData);
367
+ }
368
+
369
+ try {
370
+ await redraw.start(redrawOptions);
258
371
 
259
- let button = "left";
260
- let double = false;
372
+ let button = "left";
373
+ let double = false;
261
374
 
262
- if (action === "right-click") {
263
- button = "right";
264
- }
265
- if (action === "double-click") {
266
- double = true;
267
- }
375
+ if (action === "right-click") {
376
+ button = "right";
377
+ }
378
+ if (action === "double-click") {
379
+ double = true;
380
+ }
268
381
 
269
- emitter.emit(
270
- events.log.narration,
271
- theme.dim(`${action} ${button} clicking at ${x}, ${y}...`),
272
- true,
273
- );
382
+ emitter.emit(
383
+ events.log.narration,
384
+ theme.dim(`${action} ${button} clicking at ${x}, ${y}...`),
385
+ true,
386
+ );
274
387
 
275
- x = parseInt(x);
276
- y = parseInt(y);
388
+ x = parseInt(x);
389
+ y = parseInt(y);
277
390
 
278
- await sandbox.send({ type: "moveMouse", x, y });
391
+ // Add dashcam timestamp if available
392
+ if (getDashcamElapsedTime) {
393
+ const elapsed = getDashcamElapsedTime();
394
+ if (elapsed !== null) {
395
+ elementData.timestamp = elapsed;
396
+ }
397
+ }
279
398
 
280
- emitter.emit(events.mouseMove, { x, y });
399
+ await sandbox.send({ type: "moveMouse", x, y, ...elementData });
281
400
 
282
- await delay(2500); // wait for the mouse to move
401
+ emitter.emit(events.mouseMove, { x, y });
283
402
 
284
- if (action !== "hover") {
285
- if (action === "click" || action === "left-click") {
286
- await sandbox.send({ type: "leftClick" });
287
- } else if (action === "right-click") {
288
- await sandbox.send({ type: "rightClick" });
289
- } else if (action === "middle-click") {
290
- await sandbox.send({ type: "middleClick" });
291
- } else if (action === "double-click") {
292
- await sandbox.send({ type: "doubleClick" });
293
- } else if (action === "mouseDown") {
294
- await sandbox.send({ type: "mousePress", button: "left" });
295
- } else if (action === "mouseUp") {
296
- await sandbox.send({
297
- type: "mouseRelease",
298
- button: "left",
299
- });
403
+ await delay(2500); // wait for the mouse to move
404
+
405
+ if (action !== "hover") {
406
+ // Update timestamp for the actual click action
407
+ if (getDashcamElapsedTime) {
408
+ const elapsed = getDashcamElapsedTime();
409
+ if (elapsed !== null) {
410
+ elementData.timestamp = elapsed;
411
+ }
412
+ }
413
+
414
+ if (action === "click" || action === "left-click") {
415
+ await sandbox.send({ type: "leftClick", x, y, ...elementData });
416
+ } else if (action === "right-click") {
417
+ await sandbox.send({ type: "rightClick", x, y, ...elementData });
418
+ } else if (action === "middle-click") {
419
+ await sandbox.send({ type: "middleClick", x, y, ...elementData });
420
+ } else if (action === "double-click") {
421
+ await sandbox.send({ type: "doubleClick", x, y, ...elementData });
422
+ } else if (action === "mouseDown") {
423
+ await sandbox.send({ type: "mousePress", button: "left", x, y, ...elementData });
424
+ } else if (action === "mouseUp") {
425
+ await sandbox.send({
426
+ type: "mouseRelease",
427
+ button: "left",
428
+ x,
429
+ y,
430
+ ...elementData
431
+ });
432
+ }
433
+
434
+ emitter.emit(events.mouseClick, { x, y, button, click, double });
435
+
436
+ // Track interaction
437
+ const sessionId = sessionInstance?.get();
438
+ if (sessionId && elementData.prompt) {
439
+ try {
440
+ const clickDuration = Date.now() - clickStartTime;
441
+ await sandbox.send({
442
+ type: "trackInteraction",
443
+ interactionType: "click",
444
+ session: sessionId,
445
+ prompt: elementData.prompt,
446
+ input: { x, y, action },
447
+ timestamp: clickStartTime,
448
+ duration: clickDuration,
449
+ success: true,
450
+ cacheHit: elementData.cacheHit,
451
+ selector: elementData.selector,
452
+ selectorUsed: elementData.selectorUsed,
453
+ });
454
+ } catch (err) {
455
+ console.warn("Failed to track click interaction:", err.message);
456
+ }
457
+ }
300
458
  }
301
459
 
302
- emitter.emit(events.mouseClick, { x, y, button, click, double });
460
+ await redraw.wait(5000, redrawOptions);
461
+
462
+ return;
463
+ } catch (error) {
464
+ // Track interaction failure
465
+ const sessionId = sessionInstance?.get();
466
+ if (sessionId && elementData.prompt) {
467
+ try {
468
+ const clickDuration = Date.now() - clickStartTime;
469
+ await sandbox.send({
470
+ type: "trackInteraction",
471
+ interactionType: "click",
472
+ session: sessionId,
473
+ prompt: elementData.prompt,
474
+ input: { x, y, action },
475
+ timestamp: clickStartTime,
476
+ duration: clickDuration,
477
+ success: false,
478
+ error: error.message,
479
+ cacheHit: elementData.cacheHit,
480
+ selector: elementData.selector,
481
+ selectorUsed: elementData.selectorUsed,
482
+ });
483
+ } catch (err) {
484
+ console.warn("Failed to track click interaction:", err.message);
485
+ }
486
+ }
487
+ throw error;
303
488
  }
489
+ };
304
490
 
305
- await redraw.wait(5000);
491
+ /**
492
+ * Hover at coordinates
493
+ * @param {Object|number} options - Options object or x coordinate (for backward compatibility)
494
+ * @param {number} options.x - X coordinate
495
+ * @param {number} options.y - Y coordinate
496
+ * @param {string} [options.prompt] - Prompt for tracking
497
+ * @param {boolean} [options.cacheHit] - Whether cache was hit
498
+ * @param {string} [options.selector] - Selector used
499
+ * @param {boolean} [options.selectorUsed] - Whether selector was used
500
+ */
501
+ const hover = async (...args) => {
502
+ const hoverStartTime = Date.now();
503
+ let x, y, elementData, redrawOptions;
504
+
505
+ // Handle both object and positional argument styles
506
+ if (isObjectArgs(args, ['x', 'y', 'prompt', 'cacheHit', 'selector'])) {
507
+ const { x: xPos, y: yPos, redraw: redrawOpts, ...rest } = args[0];
508
+ x = xPos;
509
+ y = yPos;
510
+ elementData = rest;
511
+ redrawOptions = extractRedrawOptions({ redraw: redrawOpts, ...rest });
512
+ } else {
513
+ // Legacy positional: hover(x, y, elementData)
514
+ [x, y, elementData = {}] = args;
515
+ redrawOptions = extractRedrawOptions(elementData);
516
+ }
517
+
518
+ try {
519
+ emitter.emit(events.log.narration, theme.dim(`hovering at ${x}, ${y}...`));
306
520
 
307
- return;
308
- };
521
+ await redraw.start(redrawOptions);
309
522
 
310
- const hover = async (x, y) => {
311
- emitter.emit(events.log.narration, theme.dim(`hovering at ${x}, ${y}...`));
523
+ x = parseInt(x);
524
+ y = parseInt(y);
312
525
 
313
- await redraw.start();
526
+ // Add dashcam timestamp if available
527
+ if (getDashcamElapsedTime) {
528
+ const elapsed = getDashcamElapsedTime();
529
+ if (elapsed !== null) {
530
+ elementData.timestamp = elapsed;
531
+ }
532
+ }
314
533
 
315
- x = parseInt(x);
316
- y = parseInt(y);
534
+ await sandbox.send({ type: "moveMouse", x, y, ...elementData });
317
535
 
318
- await sandbox.send({ type: "moveMouse", x, y });
536
+ // Track interaction
537
+ const sessionId = sessionInstance?.get();
538
+ if (sessionId && elementData.prompt) {
539
+ try {
540
+ const hoverDuration = Date.now() - hoverStartTime;
541
+ await sandbox.send({
542
+ type: "trackInteraction",
543
+ interactionType: "hover",
544
+ session: sessionId,
545
+ prompt: elementData.prompt,
546
+ input: { x, y },
547
+ timestamp: hoverStartTime,
548
+ duration: hoverDuration,
549
+ success: true,
550
+ cacheHit: elementData.cacheHit,
551
+ selector: elementData.selector,
552
+ selectorUsed: elementData.selectorUsed,
553
+ });
554
+ } catch (err) {
555
+ console.warn("Failed to track hover interaction:", err.message);
556
+ }
557
+ }
319
558
 
320
- await redraw.wait(2500);
559
+ await redraw.wait(2500, redrawOptions);
321
560
 
322
- return;
561
+ return;
562
+ } catch (error) {
563
+ // Track interaction failure
564
+ const sessionId = sessionInstance?.get();
565
+ if (sessionId && elementData.prompt) {
566
+ try {
567
+ const hoverDuration = Date.now() - hoverStartTime;
568
+ await sandbox.send({
569
+ type: "trackInteraction",
570
+ interactionType: "hover",
571
+ session: sessionId,
572
+ prompt: elementData.prompt,
573
+ input: { x, y },
574
+ timestamp: hoverStartTime,
575
+ duration: hoverDuration,
576
+ success: false,
577
+ error: error.message,
578
+ cacheHit: elementData.cacheHit,
579
+ selector: elementData.selector,
580
+ selectorUsed: elementData.selectorUsed,
581
+ });
582
+ } catch (err) {
583
+ console.warn("Failed to track hover interaction:", err.message);
584
+ }
585
+ }
586
+ throw error;
587
+ }
323
588
  };
324
589
 
325
590
  let commands = {
326
591
  scroll: scroll,
327
592
  click: click,
328
593
  hover: hover,
329
- // method, levenshein, dice, or combined
330
- // leven = this is turbo, all around good for text similarity
331
- // dice = this is good for short strings, but not as good for long strings
332
- // turbo (default) = turbo of both, with a 2x preference for levenshtein
333
- "hover-text": async (
334
- text,
335
- description = null,
336
- action = "click",
337
- timeout = 5000, // we pass this to the subsequent wait-for-text block
338
- ) => {
594
+ /**
595
+ * Hover over text on screen
596
+ * @param {Object|string} options - Options object or text (for backward compatibility)
597
+ * @param {string} options.text - Text to find and hover over
598
+ * @param {string|null} [options.description] - Optional description of the element
599
+ * @param {string} [options.action='click'] - Action to perform
600
+ * @param {number} [options.timeout=5000] - Timeout in milliseconds
601
+ */
602
+ "hover-text": async (...args) => {
603
+ let text, description, action, timeout;
604
+
605
+ // Handle both object and positional argument styles
606
+ if (isObjectArgs(args, ['text', 'description', 'action', 'timeout'])) {
607
+ ({ text, description = null, action = 'click', timeout = 5000 } = args[0]);
608
+ } else {
609
+ // Legacy positional: hoverText(text, description, action, timeout)
610
+ [text, description = null, action = 'click', timeout = 5000] = args;
611
+ }
612
+
339
613
  emitter.emit(
340
614
  events.log.narration,
341
615
  theme.dim(
@@ -346,7 +620,7 @@ const createCommands = (
346
620
  text = text ? text.toString() : null;
347
621
 
348
622
  // wait for the text to appear on screen
349
- await commands["wait-for-text"](text, timeout);
623
+ await commands["wait-for-text"]({ text, timeout });
350
624
 
351
625
  description = description ? description.toString() : null;
352
626
 
@@ -369,15 +643,30 @@ const createCommands = (
369
643
 
370
644
  // Perform the action using the located coordinates
371
645
  if (action === "hover") {
372
- await commands.hover(response.coordinates.x, response.coordinates.y);
646
+ await commands.hover({ x: response.coordinates.x, y: response.coordinates.y });
373
647
  } else {
374
- await click(response.coordinates.x, response.coordinates.y, action);
648
+ await click({ x: response.coordinates.x, y: response.coordinates.y, action });
375
649
  }
376
650
 
377
651
  return response;
378
652
  },
379
- // uses our api to find all images on screen
380
- "hover-image": async (description, action = "click") => {
653
+ /**
654
+ * Hover over an image on screen
655
+ * @param {Object|string} options - Options object or description (for backward compatibility)
656
+ * @param {string} options.description - Description of the image to find
657
+ * @param {string} [options.action='click'] - Action to perform
658
+ */
659
+ "hover-image": async (...args) => {
660
+ let description, action;
661
+
662
+ // Handle both object and positional argument styles
663
+ if (isObjectArgs(args, ['description', 'action'])) {
664
+ ({ description, action = 'click' } = args[0]);
665
+ } else {
666
+ // Legacy positional: hoverImage(description, action)
667
+ [description, action = 'click'] = args;
668
+ }
669
+
381
670
  emitter.emit(
382
671
  events.log.narration,
383
672
  theme.dim(`searching for image: "${description}"...`),
@@ -394,14 +683,31 @@ const createCommands = (
394
683
 
395
684
  // Perform the action using the located coordinates
396
685
  if (action === "hover") {
397
- await commands.hover(response.coordinates.x, response.coordinates.y);
686
+ await commands.hover({ x: response.coordinates.x, y: response.coordinates.y });
398
687
  } else {
399
- await click(response.coordinates.x, response.coordinates.y, action);
688
+ await click({ x: response.coordinates.x, y: response.coordinates.y, action });
400
689
  }
401
690
 
402
691
  return response;
403
692
  },
404
- "match-image": async (relativePath, action = "click", invert = false) => {
693
+ /**
694
+ * Match and interact with an image template
695
+ * @param {Object|string} options - Options object or path (for backward compatibility)
696
+ * @param {string} options.path - Path to the image template
697
+ * @param {string} [options.action='click'] - Action to perform
698
+ * @param {boolean} [options.invert=false] - Invert the match
699
+ */
700
+ "match-image": async (...args) => {
701
+ let relativePath, action, invert;
702
+
703
+ // Handle both object and positional argument styles
704
+ if (isObjectArgs(args, ['path', 'action', 'invert'])) {
705
+ ({ path: relativePath, action = 'click', invert = false } = args[0]);
706
+ } else {
707
+ // Legacy positional: matchImage(relativePath, action, invert)
708
+ [relativePath, action = 'click', invert = false] = args;
709
+ }
710
+
405
711
  emitter.emit(
406
712
  events.log.narration,
407
713
  theme.dim(`${action} on image template "${relativePath}"...`),
@@ -422,52 +728,173 @@ const createCommands = (
422
728
  throw new CommandError(`Image not found: ${resolvedPath}`);
423
729
  } else {
424
730
  if (action === "click") {
425
- await click(result.centerX, result.centerY, action);
731
+ await click({ x: result.centerX, y: result.centerY, action });
426
732
  } else if (action === "hover") {
427
- await hover(result.centerX, result.centerY);
733
+ await hover({ x: result.centerX, y: result.centerY });
428
734
  }
429
735
  }
430
736
 
431
737
  return true;
432
738
  },
433
- // type a string
739
+ /**
740
+ * Type text
741
+ * @param {string|number} text - Text to type
742
+ * @param {Object} [options] - Additional options
743
+ * @param {number} [options.delay=250] - Delay between keystrokes in milliseconds
744
+ * @param {boolean} [options.secret=false] - If true, text is treated as sensitive (not logged or stored)
745
+ * @param {Object} [options.redraw] - Redraw detection options
746
+ * @param {boolean} [options.redraw.enabled=true] - Enable/disable redraw detection
747
+ * @param {boolean} [options.redraw.screenRedraw=true] - Enable/disable screen redraw detection
748
+ * @param {boolean} [options.redraw.networkMonitor=true] - Enable/disable network monitoring
749
+ * @param {number} [options.redraw.diffThreshold=0.1] - Screen diff threshold percentage
750
+ */
751
+ "type": async (text, options = {}) => {
752
+ const typeStartTime = Date.now();
753
+ const { delay = 250, secret = false, redraw: redrawOpts, ...elementData } = options;
754
+ const redrawOptions = extractRedrawOptions({ redraw: redrawOpts, ...options });
755
+
756
+ // Log masked version if secret, otherwise show actual text
757
+ if (secret) {
758
+ emitter.emit(events.log.narration, theme.dim(`typing secret "****"...`));
759
+ } else {
760
+ emitter.emit(events.log.narration, theme.dim(`typing "${text}"...`));
761
+ }
434
762
 
435
- type: async (string, delay = 250) => {
436
- emitter.emit(events.log.narration, theme.dim(`typing "${string}"...`));
763
+ await redraw.start(redrawOptions);
437
764
 
438
- await redraw.start();
765
+ text = text.toString();
439
766
 
440
- string = string.toString();
767
+ // Add dashcam timestamp if available
768
+ if (getDashcamElapsedTime) {
769
+ const elapsed = getDashcamElapsedTime();
770
+ if (elapsed !== null) {
771
+ elementData.timestamp = elapsed;
772
+ }
773
+ }
441
774
 
442
- await sandbox.send({ type: "write", text: string, delay });
443
- await redraw.wait(5000);
775
+ // Actually type the text in the sandbox
776
+ await sandbox.send({ type: "write", text, delay, ...elementData });
777
+
778
+ // Track interaction
779
+ const sessionId = sessionInstance?.get();
780
+ if (sessionId) {
781
+ try {
782
+ const typeDuration = Date.now() - typeStartTime;
783
+ await sandbox.send({
784
+ type: "trackInteraction",
785
+ interactionType: "type",
786
+ session: sessionId,
787
+ // Store masked text if secret, otherwise store actual text
788
+ input: { text: secret ? "****" : text, delay },
789
+ timestamp: typeStartTime,
790
+ duration: typeDuration,
791
+ success: true,
792
+ isSecret: secret, // Flag this interaction if it contains a secret
793
+ });
794
+ } catch (err) {
795
+ console.warn("Failed to track type interaction:", err.message);
796
+ }
797
+ }
798
+
799
+ await redraw.wait(5000, redrawOptions);
444
800
  return;
445
801
  },
446
- // press keys
447
- // different than `type`, becasue it can press multiple keys at once
448
- "press-keys": async (inputKeys) => {
802
+ /**
803
+ * Press keyboard keys
804
+ * @param {Array} keys - Array of keys to press
805
+ * @param {Object} [options] - Additional options
806
+ * @param {Object} [options.redraw] - Redraw detection options
807
+ * @param {boolean} [options.redraw.enabled=true] - Enable/disable redraw detection
808
+ * @param {boolean} [options.redraw.screenRedraw=true] - Enable/disable screen redraw detection
809
+ * @param {boolean} [options.redraw.networkMonitor=true] - Enable/disable network monitoring
810
+ * @param {number} [options.redraw.diffThreshold=0.1] - Screen diff threshold percentage
811
+ */
812
+ "press-keys": async (keys, options = {}) => {
813
+ const pressKeysStartTime = Date.now();
814
+ const redrawOptions = extractRedrawOptions(options);
449
815
  emitter.emit(
450
816
  events.log.narration,
451
817
  theme.dim(
452
- `pressing keys: ${Array.isArray(inputKeys) ? inputKeys.join(", ") : inputKeys}...`,
818
+ `pressing keys: ${Array.isArray(keys) ? keys.join(", ") : keys}...`,
453
819
  ),
454
820
  );
455
821
 
456
- await redraw.start();
822
+ await redraw.start(redrawOptions);
457
823
 
458
824
  // finally, press the keys
459
- await sandbox.send({ type: "press", keys: inputKeys });
825
+ await sandbox.send({ type: "press", keys });
826
+
827
+ // Track interaction
828
+ const sessionId = sessionInstance?.get();
829
+ if (sessionId) {
830
+ try {
831
+ const pressKeysDuration = Date.now() - pressKeysStartTime;
832
+ await sandbox.send({
833
+ type: "trackInteraction",
834
+ interactionType: "pressKeys",
835
+ session: sessionId,
836
+ input: { keys },
837
+ timestamp: pressKeysStartTime,
838
+ duration: pressKeysDuration,
839
+ success: true,
840
+ });
841
+ } catch (err) {
842
+ console.warn("Failed to track pressKeys interaction:", err.message);
843
+ }
844
+ }
460
845
 
461
- await redraw.wait(5000);
846
+ await redraw.wait(5000, redrawOptions);
462
847
 
463
848
  return;
464
849
  },
465
- // simple delay, usually to let ui render or webpage to load
466
- wait: async (timeout = 3000) => {
850
+ /**
851
+ * Wait for specified time
852
+ * @param {number} [timeout=3000] - Time to wait in milliseconds
853
+ * @param {Object} [options] - Additional options (reserved for future use)
854
+ */
855
+ "wait": async (timeout = 3000, options = {}) => {
856
+ const waitStartTime = Date.now();
467
857
  emitter.emit(events.log.narration, theme.dim(`waiting ${timeout}ms...`));
468
- return await delay(timeout);
858
+ const result = await delay(timeout);
859
+
860
+ // Track interaction
861
+ const sessionId = sessionInstance?.get();
862
+ if (sessionId) {
863
+ try {
864
+ const waitDuration = Date.now() - waitStartTime;
865
+ await sandbox.send({
866
+ type: "trackInteraction",
867
+ interactionType: "wait",
868
+ session: sessionId,
869
+ input: { timeout },
870
+ timestamp: waitStartTime,
871
+ duration: waitDuration,
872
+ success: true,
873
+ });
874
+ } catch (err) {
875
+ console.warn("Failed to track wait interaction:", err.message);
876
+ }
877
+ }
878
+
879
+ return result;
469
880
  },
470
- "wait-for-image": async (description, timeout = 10000) => {
881
+ /**
882
+ * Wait for image to appear on screen
883
+ * @param {Object|string} options - Options object or description (for backward compatibility)
884
+ * @param {string} options.description - Description of the image
885
+ * @param {number} [options.timeout=10000] - Timeout in milliseconds
886
+ */
887
+ "wait-for-image": async (...args) => {
888
+ let description, timeout;
889
+
890
+ // Handle both object and positional argument styles
891
+ if (isObjectArgs(args, ['description', 'timeout'])) {
892
+ ({ description, timeout = 10000 } = args[0]);
893
+ } else {
894
+ // Legacy positional: waitForImage(description, timeout)
895
+ [description, timeout = 10000] = args;
896
+ }
897
+
471
898
  emitter.emit(
472
899
  events.log.narration,
473
900
  theme.dim(
@@ -507,15 +934,80 @@ const createCommands = (
507
934
  ),
508
935
  true,
509
936
  );
937
+
938
+ // Track interaction success
939
+ const sessionId = sessionInstance?.get();
940
+ if (sessionId) {
941
+ try {
942
+ const waitForImageDuration = Date.now() - startTime;
943
+ await sandbox.send({
944
+ type: "trackInteraction",
945
+ interactionType: "waitForImage",
946
+ session: sessionId,
947
+ prompt: description,
948
+ input: { timeout },
949
+ timestamp: startTime,
950
+ duration: waitForImageDuration,
951
+ success: true,
952
+ });
953
+ } catch (err) {
954
+ console.warn("Failed to track waitForImage interaction:", err.message);
955
+ }
956
+ }
957
+
510
958
  return;
511
959
  } else {
512
- throw new MatchError(
513
- `Timed out (${niceSeconds(timeout)} seconds) while searching for an image matching the description "${description}"`,
514
- );
960
+ // Track interaction failure
961
+ const sessionId = sessionInstance?.get();
962
+ const errorMsg = `Timed out (${niceSeconds(timeout)} seconds) while searching for an image matching the description "${description}"`;
963
+ if (sessionId) {
964
+ try {
965
+ const waitForImageDuration = Date.now() - startTime;
966
+ await sandbox.send({
967
+ type: "trackInteraction",
968
+ interactionType: "waitForImage",
969
+ session: sessionId,
970
+ prompt: description,
971
+ input: { timeout },
972
+ timestamp: startTime,
973
+ duration: waitForImageDuration,
974
+ success: false,
975
+ error: errorMsg,
976
+ });
977
+ } catch (err) {
978
+ console.warn("Failed to track waitForImage interaction:", err.message);
979
+ }
980
+ }
981
+
982
+ throw new MatchError(errorMsg);
515
983
  }
516
984
  },
517
- "wait-for-text": async (text, timeout = 5000) => {
518
- await redraw.start();
985
+ /**
986
+ * Wait for text to appear on screen
987
+ * @param {Object|string} options - Options object or text (for backward compatibility)
988
+ * @param {string} options.text - Text to wait for
989
+ * @param {number} [options.timeout=5000] - Timeout in milliseconds
990
+ * @param {Object} [options.redraw] - Redraw detection options
991
+ * @param {boolean} [options.redraw.enabled=true] - Enable/disable redraw detection
992
+ * @param {boolean} [options.redraw.screenRedraw=true] - Enable/disable screen redraw detection
993
+ * @param {boolean} [options.redraw.networkMonitor=true] - Enable/disable network monitoring
994
+ * @param {number} [options.redraw.diffThreshold=0.1] - Screen diff threshold percentage
995
+ */
996
+ "wait-for-text": async (...args) => {
997
+ let text, timeout, redrawOptions;
998
+
999
+ // Handle both object and positional argument styles
1000
+ if (isObjectArgs(args, ['text', 'timeout'])) {
1001
+ const { redraw: redrawOpts, ...rest } = args[0];
1002
+ ({ text, timeout = 5000 } = rest);
1003
+ redrawOptions = extractRedrawOptions({ redraw: redrawOpts, ...rest });
1004
+ } else {
1005
+ // Legacy positional: waitForText(text, timeout)
1006
+ [text, timeout = 5000] = args;
1007
+ redrawOptions = {};
1008
+ }
1009
+
1010
+ await redraw.start(redrawOptions);
519
1011
 
520
1012
  emitter.emit(
521
1013
  events.log.narration,
@@ -552,20 +1044,82 @@ const createCommands = (
552
1044
 
553
1045
  if (passed) {
554
1046
  emitter.emit(events.log.narration, theme.dim(`"${text}" found!`), true);
1047
+
1048
+ // Track interaction success
1049
+ const sessionId = sessionInstance?.get();
1050
+ if (sessionId) {
1051
+ try {
1052
+ const waitForTextDuration = Date.now() - startTime;
1053
+ await sandbox.send({
1054
+ type: "trackInteraction",
1055
+ interactionType: "waitForText",
1056
+ session: sessionId,
1057
+ prompt: text,
1058
+ input: { timeout },
1059
+ timestamp: startTime,
1060
+ duration: waitForTextDuration,
1061
+ success: true,
1062
+ });
1063
+ } catch (err) {
1064
+ console.warn("Failed to track waitForText interaction:", err.message);
1065
+ }
1066
+ }
1067
+
555
1068
  return;
556
1069
  } else {
557
- throw new MatchError(
558
- `Timed out (${niceSeconds(timeout)} seconds) while searching for "${text}"`,
559
- );
1070
+ // Track interaction failure
1071
+ const sessionId = sessionInstance?.get();
1072
+ const errorMsg = `Timed out (${niceSeconds(timeout)} seconds) while searching for "${text}"`;
1073
+ if (sessionId) {
1074
+ try {
1075
+ const waitForTextDuration = Date.now() - startTime;
1076
+ await sandbox.send({
1077
+ type: "trackInteraction",
1078
+ interactionType: "waitForText",
1079
+ session: sessionId,
1080
+ prompt: text,
1081
+ input: { timeout },
1082
+ timestamp: startTime,
1083
+ duration: waitForTextDuration,
1084
+ success: false,
1085
+ error: errorMsg,
1086
+ });
1087
+ } catch (err) {
1088
+ console.warn("Failed to track waitForText interaction:", err.message);
1089
+ }
1090
+ }
1091
+
1092
+ throw new MatchError(errorMsg);
560
1093
  }
561
1094
  },
562
- "scroll-until-text": async (
563
- text,
564
- direction = "down",
565
- maxDistance = 10000,
566
- invert = false,
567
- ) => {
568
- await redraw.start();
1095
+ /**
1096
+ * Scroll until text is found
1097
+ * @param {Object|string} options - Options object or text (for backward compatibility)
1098
+ * @param {string} options.text - Text to find
1099
+ * @param {string} [options.direction='down'] - Scroll direction
1100
+ * @param {number} [options.maxDistance=10000] - Maximum distance to scroll in pixels
1101
+ * @param {boolean} [options.invert=false] - Invert the match
1102
+ * @param {Object} [options.redraw] - Redraw detection options
1103
+ * @param {boolean} [options.redraw.enabled=true] - Enable/disable redraw detection
1104
+ * @param {boolean} [options.redraw.screenRedraw=true] - Enable/disable screen redraw detection
1105
+ * @param {boolean} [options.redraw.networkMonitor=true] - Enable/disable network monitoring
1106
+ * @param {number} [options.redraw.diffThreshold=0.1] - Screen diff threshold percentage
1107
+ */
1108
+ "scroll-until-text": async (...args) => {
1109
+ let text, direction, maxDistance, invert, redrawOptions;
1110
+
1111
+ // Handle both object and positional argument styles
1112
+ if (isObjectArgs(args, ['text', 'direction', 'maxDistance', 'invert'])) {
1113
+ const { redraw: redrawOpts, ...rest } = args[0];
1114
+ ({ text, direction = 'down', maxDistance = 10000, invert = false } = rest);
1115
+ redrawOptions = extractRedrawOptions({ redraw: redrawOpts, ...rest });
1116
+ } else {
1117
+ // Legacy positional: scrollUntilText(text, direction, maxDistance, invert)
1118
+ [text, direction = 'down', maxDistance = 10000, invert = false] = args;
1119
+ redrawOptions = {};
1120
+ }
1121
+
1122
+ await redraw.start(redrawOptions);
569
1123
 
570
1124
  emitter.emit(
571
1125
  events.log.narration,
@@ -597,7 +1151,7 @@ const createCommands = (
597
1151
  ),
598
1152
  true,
599
1153
  );
600
- await scroll(direction, incrementDistance);
1154
+ await scroll({ direction, amount: incrementDistance });
601
1155
  scrollDistance = scrollDistance + incrementDistance;
602
1156
  }
603
1157
  }
@@ -611,21 +1165,34 @@ const createCommands = (
611
1165
  );
612
1166
  }
613
1167
  },
614
- "scroll-until-image": async (
615
- description,
616
- direction = "down",
617
- maxDistance = 10000,
618
- method = "mouse",
619
- path,
620
- invert = false,
621
- ) => {
622
- const needle = description || path;
1168
+ /**
1169
+ * Scroll until image is found
1170
+ * @param {Object|string} options - Options object or description (for backward compatibility)
1171
+ * @param {string} [options.description] - Description of the image
1172
+ * @param {string} [options.direction='down'] - Scroll direction
1173
+ * @param {number} [options.maxDistance=10000] - Maximum distance to scroll in pixels
1174
+ * @param {string} [options.method='mouse'] - Scroll method
1175
+ * @param {string} [options.path] - Path to image template
1176
+ * @param {boolean} [options.invert=false] - Invert the match
1177
+ */
1178
+ "scroll-until-image": async (...args) => {
1179
+ let description, direction, maxDistance, method, imagePath, invert;
1180
+
1181
+ // Handle both object and positional argument styles
1182
+ if (isObjectArgs(args, ['description', 'direction', 'maxDistance', 'method', 'path', 'invert'])) {
1183
+ ({ description, direction = 'down', maxDistance = 10000, method = 'mouse', path: imagePath, invert = false } = args[0]);
1184
+ } else {
1185
+ // Legacy positional: scrollUntilImage(description, direction, maxDistance, method, path, invert)
1186
+ [description, direction = 'down', maxDistance = 10000, method = 'mouse', imagePath, invert = false] = args;
1187
+ }
1188
+
1189
+ const needle = description || imagePath;
623
1190
 
624
1191
  if (!needle) {
625
1192
  throw new CommandError("No description or path provided");
626
1193
  }
627
1194
 
628
- if (description && path) {
1195
+ if (description && imagePath) {
629
1196
  throw new CommandError(
630
1197
  "Only one of description or path can be provided",
631
1198
  );
@@ -651,9 +1218,9 @@ const createCommands = (
651
1218
  );
652
1219
  }
653
1220
 
654
- if (path) {
1221
+ if (imagePath) {
655
1222
  // Don't throw if not found. We only want to know if it's found or not.
656
- passed = await commands["match-image"](path, null).catch(
1223
+ passed = await commands["match-image"]({ path: imagePath }).catch(
657
1224
  console.warn,
658
1225
  );
659
1226
  }
@@ -664,7 +1231,7 @@ const createCommands = (
664
1231
  theme.dim(`scrolling ${direction} ${incrementDistance} pixels...`),
665
1232
  true,
666
1233
  );
667
- await scroll(direction, incrementDistance, method);
1234
+ await scroll({ direction, amount: incrementDistance });
668
1235
  scrollDistance = scrollDistance + incrementDistance;
669
1236
  }
670
1237
  }
@@ -682,30 +1249,122 @@ const createCommands = (
682
1249
  );
683
1250
  }
684
1251
  },
685
- // run applescript to focus an application by name
686
- "focus-application": async (name) => {
687
- await redraw.start();
1252
+ /**
1253
+ * Focus an application by name
1254
+ * @param {string} name - Application name
1255
+ * @param {Object} [options] - Additional options
1256
+ * @param {Object} [options.redraw] - Redraw detection options
1257
+ * @param {boolean} [options.redraw.enabled=true] - Enable/disable redraw detection
1258
+ * @param {boolean} [options.redraw.screenRedraw=true] - Enable/disable screen redraw detection
1259
+ * @param {boolean} [options.redraw.networkMonitor=true] - Enable/disable network monitoring
1260
+ * @param {number} [options.redraw.diffThreshold=0.1] - Screen diff threshold percentage
1261
+ */
1262
+ "focus-application": async (name, options = {}) => {
1263
+ const redrawOptions = extractRedrawOptions(options);
1264
+ await redraw.start(redrawOptions);
688
1265
 
689
1266
  await sandbox.send({
690
1267
  type: "commands.focus-application",
691
1268
  name,
692
1269
  });
693
- await redraw.wait(1000);
1270
+ await redraw.wait(1000, redrawOptions);
694
1271
  return "The application was focused.";
695
1272
  },
696
- remember: async (description) => {
697
- let result = await sdk.req("remember", {
698
- image: await system.captureScreenBase64(),
699
- description,
700
- });
701
- return result.data;
1273
+ /**
1274
+ * Extract and remember information from the screen using AI
1275
+ * @param {Object|string} options - Options object or description (for backward compatibility)
1276
+ * @param {string} options.description - What to remember
1277
+ */
1278
+ "remember": async (...args) => {
1279
+ const rememberStartTime = Date.now();
1280
+ let description;
1281
+
1282
+ // Handle both object and positional argument styles
1283
+ if (isObjectArgs(args, ['description'])) {
1284
+ ({ description } = args[0]);
1285
+ } else {
1286
+ // Legacy positional: remember(description)
1287
+ [description] = args;
1288
+ }
1289
+
1290
+ try {
1291
+ let result = await sdk.req("remember", {
1292
+ image: await system.captureScreenBase64(),
1293
+ description,
1294
+ });
1295
+
1296
+ // Track interaction success
1297
+ const sessionId = sessionInstance?.get();
1298
+ if (sessionId) {
1299
+ try {
1300
+ const rememberDuration = Date.now() - rememberStartTime;
1301
+ await sandbox.send({
1302
+ type: "trackInteraction",
1303
+ interactionType: "remember",
1304
+ session: sessionId,
1305
+ prompt: description,
1306
+ timestamp: rememberStartTime,
1307
+ duration: rememberDuration,
1308
+ success: true,
1309
+ });
1310
+ } catch (err) {
1311
+ console.warn("Failed to track remember interaction:", err.message);
1312
+ }
1313
+ }
1314
+
1315
+ return result.data;
1316
+ } catch (error) {
1317
+ // Track interaction failure
1318
+ const sessionId = sessionInstance?.get();
1319
+ if (sessionId) {
1320
+ try {
1321
+ const rememberDuration = Date.now() - rememberStartTime;
1322
+ await sandbox.send({
1323
+ type: "trackInteraction",
1324
+ interactionType: "remember",
1325
+ session: sessionId,
1326
+ prompt: description,
1327
+ timestamp: rememberStartTime,
1328
+ duration: rememberDuration,
1329
+ success: false,
1330
+ error: error.message,
1331
+ });
1332
+ } catch (err) {
1333
+ console.warn("Failed to track remember interaction:", err.message);
1334
+ }
1335
+ }
1336
+ throw error;
1337
+ }
702
1338
  },
703
- assert: async (assertion) => {
1339
+ /**
1340
+ * Make an AI-powered assertion
1341
+ * @param {string} assertion - Assertion to check
1342
+ * @param {Object} [options] - Additional options (reserved for future use)
1343
+ */
1344
+ "assert": async (assertion, options = {}) => {
704
1345
  let response = await assert(assertion, true);
705
1346
 
706
1347
  return response;
707
1348
  },
708
- exec: async (language = "pwsh", code, timeout, silent = false) => {
1349
+ /**
1350
+ * Execute code in the sandbox
1351
+ * @param {Object|string} options - Options object or language (for backward compatibility)
1352
+ * @param {string} [options.language='pwsh'] - Language ('js', 'pwsh', or 'sh')
1353
+ * @param {string} options.code - Code to execute
1354
+ * @param {number} [options.timeout] - Timeout in milliseconds
1355
+ * @param {boolean} [options.silent=false] - Suppress output
1356
+ */
1357
+ "exec": async (...args) => {
1358
+ let language, code, timeout, silent;
1359
+
1360
+ // Handle both object and positional argument styles
1361
+ if (isObjectArgs(args, ['language', 'code', 'timeout', 'silent'])) {
1362
+ ({ language = 'pwsh', code, timeout, silent = false } = args[0]);
1363
+ } else {
1364
+ // Legacy positional: exec(language, code, timeout, silent)
1365
+ [language = 'pwsh', code, timeout, silent = false] = args;
1366
+ }
1367
+
709
1368
  emitter.emit(events.log.narration, theme.dim(`calling exec...`), true);
710
1369
 
711
1370
  emitter.emit(events.log.log, code);