aihand 0.0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/README.md +152 -2
  2. package/dist/chunk-2NTK7H4W.js +10 -0
  3. package/dist/chunk-3X4FTHLC.cjs +369 -0
  4. package/dist/chunk-BXVNR4E2.js +399 -0
  5. package/dist/chunk-C7DGE6MY.cjs +1456 -0
  6. package/dist/chunk-DUUCVLC3.cjs +254 -0
  7. package/dist/chunk-FAHI53KO.cjs +125 -0
  8. package/dist/chunk-G7KVJ7NF.js +369 -0
  9. package/dist/chunk-GNEUSRGP.js +52 -0
  10. package/dist/chunk-IGNEAOLT.cjs +130 -0
  11. package/dist/chunk-IS5XFUDB.js +125 -0
  12. package/dist/chunk-JLYC76XL.js +2448 -0
  13. package/dist/chunk-KQOABC2O.cjs +52 -0
  14. package/dist/chunk-OVMK33AC.cjs +104 -0
  15. package/dist/chunk-OWYK2IGV.js +250 -0
  16. package/dist/chunk-PQSQN4CN.js +126 -0
  17. package/dist/chunk-QF6AG3M5.cjs +410 -0
  18. package/dist/chunk-QSAMLXML.js +1456 -0
  19. package/dist/chunk-VEKYRKPF.cjs +399 -0
  20. package/dist/chunk-Y6H7W7PI.cjs +2451 -0
  21. package/dist/chunk-YKSYW77R.js +410 -0
  22. package/dist/chunk-Z2Y65YOY.cjs +7 -0
  23. package/dist/chunk-ZJQRNIK7.js +104 -0
  24. package/dist/cli-3J7EYI6G.cjs +651 -0
  25. package/dist/cli-FIJLKAGI.js +649 -0
  26. package/dist/cli-JQEIE7RQ.js +120 -0
  27. package/dist/cli-K3OS2QQH.cjs +122 -0
  28. package/dist/cli-OSYG6LJD.cjs +89 -0
  29. package/dist/cli-TXRW5PG6.js +89 -0
  30. package/dist/cli.cjs +81 -0
  31. package/dist/cli.js +81 -0
  32. package/dist/config-5KEQLN6L.cjs +13 -0
  33. package/dist/config-PJPYKDLQ.js +13 -0
  34. package/dist/graph-IH56SCPK.js +8 -0
  35. package/dist/graph-ZUXXCJ5A.cjs +8 -0
  36. package/dist/index.cjs +481 -0
  37. package/dist/index.d.cts +461 -0
  38. package/dist/index.d.ts +461 -0
  39. package/dist/index.js +479 -0
  40. package/dist/locate-5XFSXJ5J.cjs +15 -0
  41. package/dist/locate-NKSUGL3A.js +15 -0
  42. package/dist/refactor-5FWSZIBN.cjs +19 -0
  43. package/dist/refactor-BOB3SZSA.js +19 -0
  44. package/dist/scan-4R7GQG2W.cjs +9 -0
  45. package/dist/scan-VF54GAAX.js +9 -0
  46. package/dist/ui/probe/server.cjs +505 -0
  47. package/dist/ui/probe/server.js +507 -0
  48. package/dist/vite.cjs +12 -0
  49. package/dist/vite.d.cts +12 -0
  50. package/dist/vite.d.ts +12 -0
  51. package/dist/vite.js +12 -0
  52. package/package.json +82 -9
  53. package/src/cli.ts +107 -0
  54. package/src/index.ts +54 -0
  55. package/src/read/cli.ts +650 -0
  56. package/src/read/compact.ts +286 -0
  57. package/src/read/config.ts +62 -0
  58. package/src/read/graph.ts +182 -0
  59. package/src/read/index.ts +12 -0
  60. package/src/read/inject.ts +121 -0
  61. package/src/read/locate.ts +104 -0
  62. package/src/read/panel.ts +335 -0
  63. package/src/read/pipeline.ts +78 -0
  64. package/src/read/refactor.ts +576 -0
  65. package/src/read/render.ts +1118 -0
  66. package/src/read/scan.ts +61 -0
  67. package/src/read/seam.ts +0 -0
  68. package/src/read/security.ts +171 -0
  69. package/src/read/signals.ts +333 -0
  70. package/src/read/state.ts +71 -0
  71. package/src/read/stategraph.ts +205 -0
  72. package/src/read/types.ts +162 -0
  73. package/src/read/vite.ts +77 -0
  74. package/src/ui/babel/line-profiler.ts +197 -0
  75. package/src/ui/babel/source-loc.ts +68 -0
  76. package/src/ui/bridge/cdp-bridge.ts +138 -0
  77. package/src/ui/bridge/compile-probe.ts +80 -0
  78. package/src/ui/bridge/transport.ts +26 -0
  79. package/src/ui/bridge/vite-bridge.ts +116 -0
  80. package/src/ui/client/client-patch.ts +899 -0
  81. package/src/ui/client/client.ts +2562 -0
  82. package/src/ui/core/action.ts +747 -0
  83. package/src/ui/core/candidates.ts +348 -0
  84. package/src/ui/core/canvas.ts +305 -0
  85. package/src/ui/core/check.ts +34 -0
  86. package/src/ui/core/compact.ts +314 -0
  87. package/src/ui/core/detail.ts +244 -0
  88. package/src/ui/core/diff.ts +253 -0
  89. package/src/ui/core/emit.ts +198 -0
  90. package/src/ui/core/knob-exec.ts +137 -0
  91. package/src/ui/core/perf.ts +254 -0
  92. package/src/ui/core/types.ts +164 -0
  93. package/src/ui/core/util.ts +221 -0
  94. package/src/ui/index.ts +5 -0
  95. package/src/ui/probe/cli.ts +139 -0
  96. package/src/ui/probe/server.ts +468 -0
  97. package/src/ui/self/act.ts +47 -0
  98. package/src/ui/self/discover.ts +101 -0
  99. package/src/ui/self/grow.ts +121 -0
  100. package/src/ui/self/install.ts +100 -0
  101. package/src/ui/self/probe.ts +105 -0
  102. package/src/ui/self/screen-hook.ts +44 -0
  103. package/src/ui/self/self.ts +48 -0
  104. package/src/ui/self/store-refs.ts +123 -0
  105. package/src/ui/self/store-schema.ts +65 -0
  106. package/src/ui/self/synth.ts +37 -0
  107. package/src/ui/server/cli.ts +102 -0
  108. package/src/ui/server/dispatch.ts +276 -0
  109. package/src/ui/server/help-text.ts +237 -0
  110. package/src/ui/server/knob-schema.ts +87 -0
  111. package/src/ui/server/plugin.ts +1151 -0
  112. package/src/vite.ts +39 -0
  113. package/index.js +0 -2
@@ -0,0 +1,747 @@
1
+ /// <reference lib="dom" />
2
+ // aihand actions — write capability over the existing HMR channel.
3
+ //
4
+ // Two layers, intentionally split:
5
+ // - resolveAction(): pure param validation. Runs plugin-side, unit-tested.
6
+ // - performAction(): real DOM mutation. Runs browser-side (client.ts imports it).
7
+ // References window/document — never imported plugin-side.
8
+
9
+ declare global {
10
+ interface Window {
11
+ // The app's own domain projection — co-located with its consumer (doAssert) so the
12
+ // augmentation is present wherever action.ts is imported, not only via client.ts.
13
+ __AIPEEK_SCREEN__?: () => Record<string, unknown>
14
+ }
15
+ }
16
+
17
+ export type ActionType = 'click' | 'dblclick' | 'fill' | 'press' | 'wait' | 'screenshot' | 'realclick' | 'query' | 'assert'
18
+ | 'drag' | 'scrollIntoView' | 'drop' | 'clipboard' | 'hover'
19
+
20
+ export interface ActionArgs {
21
+ sel?: string
22
+ text?: string
23
+ value?: string
24
+ key?: string
25
+ timeout?: number
26
+ gone?: boolean
27
+ // wait: poll until the matched element loses [disabled] (a button that flips enabled).
28
+ enabled?: boolean
29
+ button?: 'left' | 'right'
30
+ x?: number
31
+ y?: number
32
+ // assert: a domain key (from __AIPEEK_SCREEN__) or a CSS sel, checked against `equals`.
33
+ screen?: string
34
+ equals?: string
35
+ // drag: sel/text = source, to = destination selector. drop: sel = target, files = names.
36
+ to?: string
37
+ files?: string[]
38
+ // clipboard: mode read|write; value = text to write (write only).
39
+ mode?: 'read' | 'write'
40
+ }
41
+
42
+ export interface ActionResult {
43
+ ok: boolean
44
+ detail?: string
45
+ error?: string
46
+ dataUrl?: string
47
+ screen?: string
48
+ // For an action that triggers an async flow (send a message → stream → done), the
49
+ // time-stamped state trajectory: S₀ →(Δt) S₁ →(Δt) … →(Δt) Sₙ. `screen` is the net
50
+ // before/after diff; `flow` is *how* it got there. Absent for synchronous actions.
51
+ flow?: string
52
+ actions?: string
53
+ x?: number
54
+ y?: number
55
+ // realclick only: true if the page already fired the trusted click in-process (Electron).
56
+ // false/undefined means a plain Chrome tab couldn't, so the server drives its CDP queue.
57
+ fired?: boolean
58
+ }
59
+
60
+ export const TYPES: ActionType[] = ['click', 'dblclick', 'fill', 'press', 'wait', 'screenshot', 'realclick', 'query', 'assert', 'drag', 'scrollIntoView', 'drop', 'clipboard', 'hover']
61
+
62
+ // --- Pure validation (plugin-side) ---
63
+
64
+ export function resolveAction(type: string, args: ActionArgs): { valid: boolean, error?: string } {
65
+ if (!TYPES.includes(type as ActionType))
66
+ return { valid: false, error: `unknown action: ${type}` }
67
+
68
+ const hasTarget = !!(args.sel || args.text)
69
+ switch (type) {
70
+ case 'click':
71
+ return hasTarget ? { valid: true } : { valid: false, error: 'click needs sel= or text=' }
72
+ case 'dblclick':
73
+ return hasTarget ? { valid: true } : { valid: false, error: 'dblclick needs sel= or text=' }
74
+ case 'hover':
75
+ return hasTarget ? { valid: true } : { valid: false, error: 'hover needs sel= or text=' }
76
+ case 'realclick':
77
+ return hasTarget || (args.x !== undefined && args.y !== undefined) ? { valid: true } : { valid: false, error: 'realclick needs sel=, text=, or x= & y=' }
78
+ case 'fill':
79
+ if (!hasTarget)
80
+ return { valid: false, error: 'fill needs sel= or text=' }
81
+ if (args.value === undefined)
82
+ return { valid: false, error: 'fill needs value=' }
83
+ return { valid: true }
84
+ case 'press':
85
+ return args.key ? { valid: true } : { valid: false, error: 'press needs key=' }
86
+ case 'wait':
87
+ return hasTarget ? { valid: true } : { valid: false, error: 'wait needs sel= or text=' }
88
+ case 'screenshot':
89
+ return { valid: true }
90
+ case 'query':
91
+ return args.sel ? { valid: true } : { valid: false, error: 'query needs sel=' }
92
+ case 'assert':
93
+ if (!args.screen && !args.sel)
94
+ return { valid: false, error: 'assert needs screen= (a __AIPEEK_SCREEN__ key) or sel=' }
95
+ if (args.equals === undefined)
96
+ return { valid: false, error: 'assert needs equals=' }
97
+ return { valid: true }
98
+ case 'drag':
99
+ if (!hasTarget)
100
+ return { valid: false, error: 'drag needs sel= or text= (the source)' }
101
+ if (!args.to)
102
+ return { valid: false, error: 'drag needs to= (destination selector)' }
103
+ return { valid: true }
104
+ case 'scrollIntoView':
105
+ return hasTarget ? { valid: true } : { valid: false, error: 'scrollIntoView needs sel= or text=' }
106
+ case 'drop':
107
+ if (!hasTarget)
108
+ return { valid: false, error: 'drop needs sel= or text= (the drop target)' }
109
+ if (!args.files || !args.files.length)
110
+ return { valid: false, error: 'drop needs files= (array of file names)' }
111
+ return { valid: true }
112
+ case 'clipboard':
113
+ if (args.mode === 'write' && args.value === undefined)
114
+ return { valid: false, error: 'clipboard write needs value=' }
115
+ return { valid: true }
116
+ default:
117
+ return { valid: false, error: `unknown action: ${type}` }
118
+ }
119
+ }
120
+
121
+ // --- Browser-side execution (client.ts only) ---
122
+
123
+ // 命中判定原语(INTERACTIVE/getDirectText/elementLabel/isVisible/reachable)的真身已搬进
124
+ // DOM 命中判定原语(INTERACTIVE/getDirectText/elementLabel/isVisible/reachable)的本地副本。
125
+ // 曾从 teact/dom 跨包 import —— 但那是运行时依赖,发布后会把一个会撞 npm 名的 teact 拉进
126
+ // 用户机器,必炸。这 64 行是零依赖纯 DOM 函数,内联自己一份;teact 测试侧另有逻辑等价的
127
+ // 一份,跨仓两份无运行时耦合(见 candidates.ts 头部)。re-export 让 client.ts 的
128
+ // `from '../core/action'` 无需改路径。
129
+ // isSensitive/safeValue moved down to candidates.ts (next to isSecretKey/redactSecretValue)
130
+ // because elementLabel there now reads input value too — label + state suffix must share the
131
+ // one redaction chokepoint. Re-exported so `from '../core/action'` consumers need no change.
132
+ export { elementLabel, getDirectText, INTERACTIVE, isSecretKey, isSensitive, isVisible, reachable, safeValue } from './candidates'
133
+ import { elementLabel, INTERACTIVE, isBusyKey, isSensitive, isVisible, pickByText, reachable, safeValue } from './candidates'
134
+
135
+ export function findElement(sel?: string, text?: string): Element | null {
136
+ if (sel) {
137
+ try {
138
+ return document.querySelector(sel)
139
+ }
140
+ catch {
141
+ throw new Error(`invalid selector: ${sel} — URL-encode it (curl -G --data-urlencode 'sel=...')`)
142
+ }
143
+ }
144
+ if (text) {
145
+ // argmax over (exactness, reachability, tightness) — see candidates.matchScore.
146
+ // A covered/off-viewport target still resolves (reach just scores lower), so the
147
+ // caller can scrollIntoView/realclick; an exact tight reachable leaf always wins.
148
+ return pickByText(document.querySelectorAll(INTERACTIVE), text)
149
+ }
150
+ return null
151
+ }
152
+
153
+ function clickableList(): string {
154
+ // Clip to the open modal's subtree when present — the layer beneath is
155
+ // unreachable, so listing it (e.g. 90 chat items) is pure noise.
156
+ const root = document.querySelector('[role="dialog"][data-state="open"]') ?? document
157
+ const els = Array.from(root.querySelectorAll(INTERACTIVE))
158
+ .filter(reachable)
159
+ .map(el => elementLabel(el).slice(0, 40))
160
+ .filter(Boolean)
161
+ return [...new Set(els)].slice(0, 30).join(' | ')
162
+ }
163
+
164
+ // The fillable counterpart of clickableList — the real input/textarea/contenteditable fields,
165
+ // each with a copy-paste selector so a doFill that hit an overlay can be retargeted with sel=.
166
+ // Prefer #id, then [name=…] (stable on forms), else the bare tag (still narrows the search).
167
+ function fillableList(): string {
168
+ const root = document.querySelector('[role="dialog"][data-state="open"]') ?? document
169
+ const els = Array.from(root.querySelectorAll('input:not([type=hidden]), textarea, [contenteditable]'))
170
+ .filter(reachable)
171
+ .map((el) => {
172
+ const sel = el.id
173
+ ? `#${el.id}`
174
+ : el.getAttribute('name')
175
+ ? `${el.tagName.toLowerCase()}[name="${el.getAttribute('name')}"]`
176
+ : el.tagName.toLowerCase()
177
+ const label = elementLabel(el).slice(0, 30)
178
+ return `sel=${sel}${label ? ` (${label})` : ''}`
179
+ })
180
+ return [...new Set(els)].slice(0, 20).join(' | ') || '(no fillable fields found)'
181
+ }
182
+
183
+ export async function performAction(type: ActionType, args: ActionArgs): Promise<ActionResult> {
184
+ try {
185
+ switch (type) {
186
+ case 'click': return await doClick(args)
187
+ case 'dblclick': return doDblclick(args)
188
+ case 'hover': return doHover(args)
189
+ case 'realclick': return doResolveRealClick(args)
190
+ case 'fill': return doFill(args)
191
+ case 'press': return doPress(args)
192
+ case 'wait': return await doWait(args)
193
+ case 'screenshot': return await doScreenshot(args)
194
+ case 'query': return doQuery(args)
195
+ case 'assert': return doAssert(args)
196
+ case 'drag': return doDrag(args)
197
+ case 'scrollIntoView': return doScrollIntoView(args)
198
+ case 'drop': return doDrop(args)
199
+ case 'clipboard': return await doClipboard(args)
200
+ }
201
+ }
202
+ catch (e) {
203
+ return { ok: false, error: e instanceof Error ? e.message : String(e) }
204
+ }
205
+ }
206
+
207
+ export interface EvalResult { ok: boolean, value?: string, error?: string, hint?: string }
208
+
209
+ // Refine an eval outcome into an actionable hint — the diagnostic fiber must subdivide
210
+ // to the action fiber, else /eval's generality swallows the better tool. Two traps with a
211
+ // CLEAN signal (the hint is never wrong) survive:
212
+ // 1. illegal selector → raw `SyntaxError: ... is not a valid selector` DOM stack, no
213
+ // cue that the *selector argument* is the problem (CJK/special chars unescaped).
214
+ // 2. /eval used as a hand-rolled /state — reading the store via indexedDB/localStorage,
215
+ // when /state returns every snapshot in one read.
216
+ // A `querySelectorAll` rule was tried and REMOVED: live QA fired it on legitimate
217
+ // cross-element aggregation (`.map(e=>({tag,label,value}))`, `.filter`) that /query can't
218
+ // reproduce — value beyond the noise of nagging a correct escape-hatch use was negative.
219
+ // 元真理: a hint that misfires on valid use is worse than no hint. Pure, unit-tested.
220
+ export function diagnoseEval(code: string, ok: boolean, error?: string): string | undefined {
221
+ const has = (hay: string | undefined, needle: string) => !!hay && hay.includes(needle)
222
+ if (!ok) {
223
+ if (has(error, 'is not a valid selector') || has(error, "Failed to execute 'querySelector"))
224
+ return 'illegal CSS selector — selectors match on class/role/attr, not text. For non-ASCII or text matching use /query?sel= or /click?text=, not a raw querySelector.'
225
+ return undefined
226
+ }
227
+ // store read hand-rolled over indexedDB/localStorage or the raw store registry → the /state
228
+ // axis does it typed: overview / per-store / nested-path, no Promise template, no truncation.
229
+ if (has(code, 'indexedDB.open') || has(code, 'localStorage.getItem') || has(code, 'localforage') || has(code, '__AIPEEK_STORES__'))
230
+ return 'reading store state by hand — the /state axis is typed: /state lists every store, /state/<name> drills into one, /state?path=store.field.0 expands a nested value (e.g. /state?path=imStore.conversations.0).'
231
+ return undefined
232
+ }
233
+
234
+ // /action is the green channel (knob morphism replay / named semantic fn). It is NOT the
235
+ // simulate-a-human endpoint — that's /click /fill /press /hover. But `action` is the most
236
+ // generic verb, so an agent that wants "do something" reaches for /action and brings /click's
237
+ // params (text=, sel=, value=, key=). When knob= AND name= are both absent, those keys can
238
+ // only be a misroute: refine the diagnosis to the action fiber by pointing at the right
239
+ // endpoint, instead of the dead-end "needs knob or name" (a recoverable misroute pessimistically
240
+ // collapsed to "rethink"). Pure, unit-tested. 元真理: only fires when both knob+name are absent,
241
+ // so it never nags a legitimate /action?knob=X&value=8 call.
242
+ const MISROUTE: Record<string, string> = {
243
+ text: '/click?text= (or /fill /press)',
244
+ sel: '/click?sel= (or /fill /hover)',
245
+ value: '/fill?sel=…&value=',
246
+ key: '/press?key=',
247
+ }
248
+ export function misroutedAction(keys: string[]): string | undefined {
249
+ for (const k of keys) {
250
+ const dest = MISROUTE[k]
251
+ if (dest)
252
+ return `\`${k}=\` is a /click·/fill·/press param, not /action. /action is the green channel (?knob= replays a panel morphism, ?name= calls a mounted fn). To simulate a human click/type, use ${dest}.`
253
+ }
254
+ return undefined
255
+ }
256
+
257
+ // Run server-supplied JS in the page with auto-return (Chrome-console / Node-REPL
258
+ // ergonomics): try the whole code as a single expression first so `qsa(...).length`
259
+ // or `1+1` yield a value without an explicit `return`. If that fails to *compile*
260
+ // (multi-statement, contains `return`, etc.) fall back to a plain statement block.
261
+ // A compile error only swaps the wrapper; a runtime throw surfaces as the error.
262
+ // undefined results are dropped (no value); objects are JSON-stringified.
263
+ export async function runEval(code: string): Promise<EvalResult> {
264
+ try {
265
+ let fn: () => Promise<unknown>
266
+ try {
267
+ // eslint-disable-next-line no-new-func
268
+ fn = new Function(`return (async () => (${code}))()`) as () => Promise<unknown>
269
+ }
270
+ catch {
271
+ // eslint-disable-next-line no-new-func
272
+ fn = new Function(`return (async () => { ${code} })()`) as () => Promise<unknown>
273
+ }
274
+ const result = await fn()
275
+ const value = result === undefined ? undefined : typeof result === 'string' ? result : JSON.stringify(result, null, 2)
276
+ return { ok: true, value, hint: diagnoseEval(code, true) }
277
+ }
278
+ catch (e) {
279
+ const error = e instanceof Error ? `${e.message}\n${e.stack ?? ''}` : String(e)
280
+ return { ok: false, error, hint: diagnoseEval(code, false, error) }
281
+ }
282
+ }
283
+
284
+ // Native alert/confirm/prompt are *synchronous* and freeze the whole JS thread until
285
+ // a human dismisses them — which deadlocks the probe (it runs on that same thread, so
286
+ // the HMR channel can never answer and every curl times out). A click that hits a
287
+ // `copy-to-clipboard` fallback or a `confirm("delete?")` would hang aihand forever.
288
+ // So we stub them for the duration of `fn`: auto-answer (confirm→true, prompt→default,
289
+ // alert→noop) and return what was suppressed so the caller can report it. Always
290
+ // restored in finally — the page's own dialogs work again after the action settles.
291
+ export async function withDialogGuard<T>(fn: () => Promise<T>): Promise<{ result: T, dialogs: string[] }> {
292
+ const realAlert = window.alert
293
+ const realConfirm = window.confirm
294
+ const realPrompt = window.prompt
295
+ const dialogs: string[] = []
296
+ window.alert = (m?: unknown) => { dialogs.push(`alert: ${String(m ?? '')}`.slice(0, 80)) }
297
+ window.confirm = (m?: unknown) => { dialogs.push(`confirm→true: ${String(m ?? '')}`.slice(0, 80)); return true }
298
+ window.prompt = (m?: unknown, d?: string) => { dialogs.push(`prompt→default: ${String(m ?? '')}`.slice(0, 80)); return d ?? '' }
299
+ try {
300
+ return { result: await fn(), dialogs }
301
+ }
302
+ finally {
303
+ window.alert = realAlert
304
+ window.confirm = realConfirm
305
+ window.prompt = realPrompt
306
+ }
307
+ }
308
+
309
+ // Click like a human, not like el.click(). A real click is a *position* the browser
310
+ // hit-tests, then a full event sequence at that point — hover, pointerdown/mousedown,
311
+ // browser-decided focus, pointerup/mouseup, click. Two things matter that el.click()
312
+ // skips entirely:
313
+ // 1. The target is whatever sits at (x,y) per elementFromPoint — so overlays,
314
+ // portals, and pointer-events:none are honored exactly as a mouse would.
315
+ // 2. mousedown is cancelable and moves focus; if a handler preventDefault()s it,
316
+ // focus does NOT move (radix triggers rely on this). We replicate that contract.
317
+ // This is what makes aihand catch focus/dismiss bugs that synthetic clicks hide.
318
+ const MOUSE_SEQUENCE = ['pointerover', 'pointerenter', 'mouseover', 'pointermove'] as const
319
+
320
+ function focusableAncestor(el: Element | null): HTMLElement | null {
321
+ return el?.closest<HTMLElement>('button, a[href], input, textarea, select, [tabindex], [contenteditable=""], [contenteditable="true"]') ?? null
322
+ }
323
+
324
+ function realClickAt(x: number, y: number): Element | null {
325
+ // The element a real mouse would hit at this point — not necessarily the one we
326
+ // searched for (it may be covered, or be a portal target).
327
+ const target = (document.elementFromPoint(x, y) as HTMLElement | null) ?? document.body
328
+ const base = { bubbles: true, cancelable: true, composed: true, clientX: x, clientY: y, view: window, button: 0, detail: 1 }
329
+ const ptr = { ...base, pointerId: 1, pointerType: 'mouse', isPrimary: true, width: 1, height: 1 }
330
+
331
+ for (const type of MOUSE_SEQUENCE)
332
+ target.dispatchEvent(type.startsWith('pointer') ? new PointerEvent(type, ptr) : new MouseEvent(type, base))
333
+
334
+ target.dispatchEvent(new PointerEvent('pointerdown', { ...ptr, buttons: 1 }))
335
+ const mousedownLive = target.dispatchEvent(new MouseEvent('mousedown', { ...base, buttons: 1 }))
336
+
337
+ // Browser focus rule: mousedown moves focus to the nearest focusable ancestor,
338
+ // UNLESS a handler called preventDefault() on mousedown. Match that exactly —
339
+ // this is the contract radix's PopoverTrigger depends on.
340
+ if (mousedownLive) {
341
+ const focusable = focusableAncestor(target)
342
+ if (focusable && document.activeElement !== focusable)
343
+ focusable.focus()
344
+ }
345
+
346
+ target.dispatchEvent(new PointerEvent('pointerup', { ...ptr, buttons: 0 }))
347
+ target.dispatchEvent(new MouseEvent('mouseup', { ...base, buttons: 0 }))
348
+ target.dispatchEvent(new MouseEvent('click', base))
349
+ return target
350
+ }
351
+
352
+ function realClick(el: HTMLElement): Element | null {
353
+ const r = el.getBoundingClientRect()
354
+ return realClickAt(r.left + r.width / 2, r.top + r.height / 2)
355
+ }
356
+
357
+ // What sits at the element's center, and whether it's an unrelated thing covering it.
358
+ // "Unrelated" = not the element, not its descendant, not its ancestor — i.e. a portal/
359
+ // overlay on top. This is the same predicate realClick reports, computed *before* the click.
360
+ function coveredBy(el: Element): Element | null {
361
+ const r = el.getBoundingClientRect()
362
+ const top = document.elementFromPoint(r.left + r.width / 2, r.top + r.height / 2)
363
+ return top && top !== el && !el.contains(top) && !top.contains(el) ? top : null
364
+ }
365
+
366
+ async function doClick(args: ActionArgs): Promise<ActionResult> {
367
+ const el = findElement(args.sel, args.text)
368
+ if (!el)
369
+ return { ok: false, error: `no element for ${args.sel || args.text}`, detail: clickableList() }
370
+ // A close→open sequence leaves the previous Radix dialog's overlay in the DOM during
371
+ // its exit animation (~150-300ms, variable). It covers the target's center point, so a
372
+ // real hit-test lands on the overlay, not the button. Don't click blind and report a
373
+ // false success — wait for the overlay to clear, then click the real target.
374
+ let cover = coveredBy(el)
375
+ for (let waited = 0; cover && waited < 400; waited += 16) {
376
+ await new Promise(r => setTimeout(r, 16))
377
+ cover = coveredBy(el)
378
+ }
379
+ // Still covered after the budget = not a clearing exit animation but a STABLE overlay (an
380
+ // open modal sitting on top of the target). The click cannot reach the target — a mouse
381
+ // would hit the overlay, not the button. Report failure, not a false success: ok=true must
382
+ // mean the click landed on the target. Name what blocks it so the caller can dismiss it
383
+ // first (the common cause is a dialog left open by a prior step).
384
+ if (cover)
385
+ return { ok: false, error: `target "${elementLabel(el).slice(0, 40) || el.tagName.toLowerCase()}" is covered by <${cover.tagName.toLowerCase()}>${elementLabel(cover) ? ` "${elementLabel(cover).slice(0, 30)}"` : ''} — an overlay/modal is on top; dismiss it first` }
386
+ const hit = realClick(el as HTMLElement)
387
+ const note = hit && hit !== el && !el.contains(hit) && !hit.contains(el) ? ` (hit <${hit.tagName.toLowerCase()}> on top)` : ''
388
+ return { ok: true, detail: `clicked ${elementLabel(el).slice(0, 40) || el.tagName.toLowerCase()}${note}` }
389
+ }
390
+
391
+ // Hover without clicking — the move-only prefix of realClick. Dispatches the pointer/mouse
392
+ // over sequence at the element's center so hover-only UI (dropdown menus, tooltips, :hover
393
+ // reveals) appears. No down/up/click, so it doesn't activate anything. Resolves the target
394
+ // the mouse would actually hit at that point (overlays/portals honored), like realClickAt.
395
+ function doHover(args: ActionArgs): ActionResult {
396
+ const el = findElement(args.sel, args.text)
397
+ if (!el)
398
+ return { ok: false, error: `no element for ${args.sel || args.text}`, detail: clickableList() }
399
+ const r = el.getBoundingClientRect()
400
+ const x = r.left + r.width / 2
401
+ const y = r.top + r.height / 2
402
+ const target = (document.elementFromPoint(x, y) as HTMLElement | null) ?? el as HTMLElement
403
+ const base = { bubbles: true, cancelable: true, composed: true, clientX: x, clientY: y, view: window }
404
+ const ptr = { ...base, pointerId: 1, pointerType: 'mouse', isPrimary: true, width: 1, height: 1 }
405
+ for (const type of MOUSE_SEQUENCE)
406
+ target.dispatchEvent(type.startsWith('pointer') ? new PointerEvent(type, ptr) : new MouseEvent(type, base))
407
+ return { ok: true, detail: `hovered ${elementLabel(el).slice(0, 40) || el.tagName.toLowerCase()}` }
408
+ }
409
+
410
+ // Double-click — inline-edit (TodoMVC label→input, file rename, spreadsheet cell), text
411
+ // select, map zoom. A real dblclick is two full click sequences followed by one `dblclick`
412
+ // event (detail:2) at the same point; el.click() twice never fires `dblclick`, so handlers
413
+ // listening for it (the common case) never fire. Reuse realClick for the two clicks, then
414
+ // dispatch dblclick at the element center.
415
+ function doDblclick(args: ActionArgs): ActionResult {
416
+ const el = findElement(args.sel, args.text)
417
+ if (!el)
418
+ return { ok: false, error: `no element for ${args.sel || args.text}`, detail: clickableList() }
419
+ realClick(el as HTMLElement)
420
+ const hit = realClick(el as HTMLElement)
421
+ const r = el.getBoundingClientRect()
422
+ const x = r.left + r.width / 2
423
+ const y = r.top + r.height / 2
424
+ const target = (document.elementFromPoint(x, y) as HTMLElement | null) ?? el as HTMLElement
425
+ target.dispatchEvent(new MouseEvent('dblclick', { bubbles: true, cancelable: true, composed: true, clientX: x, clientY: y, view: window, button: 0, detail: 2 }))
426
+ const note = hit && hit !== el && !el.contains(hit) && !hit.contains(el) ? ` (hit <${hit.tagName.toLowerCase()}> on top)` : ''
427
+ return { ok: true, detail: `double-clicked ${elementLabel(el).slice(0, 40) || el.tagName.toLowerCase()}${note}` }
428
+ }
429
+
430
+
431
+ // The trusted click is fired by whichever channel can produce isTrusted=true input:
432
+ // Electron's webContents.sendInputEvent (via electronAPI.invoke, in client.ts) or a Chrome
433
+ // extension's chrome.debugger (via the server's CDP queue). Synthetic events can't open a
434
+ // Radix ContextMenu — that's the whole reason this path exists. When x= & y= are given
435
+ // directly we pass them through unchanged.
436
+ function doResolveRealClick(args: ActionArgs): ActionResult {
437
+ if (args.x !== undefined && args.y !== undefined)
438
+ return { ok: true, x: args.x, y: args.y, detail: `resolved (${args.x}, ${args.y})` }
439
+ const el = findElement(args.sel, args.text)
440
+ if (!el)
441
+ return { ok: false, error: `no element for ${args.sel || args.text}`, detail: clickableList() }
442
+ const r = el.getBoundingClientRect()
443
+ const x = Math.round(r.left + r.width / 2)
444
+ const y = Math.round(r.top + r.height / 2)
445
+ return { ok: true, x, y, detail: `resolved ${elementLabel(el).slice(0, 40) || el.tagName.toLowerCase()} at (${x}, ${y})` }
446
+ }
447
+
448
+ function doFill(args: ActionArgs): ActionResult {
449
+ const el = findElement(args.sel, args.text)
450
+ if (!el)
451
+ return { ok: false, error: `no element for ${args.sel || args.text}`, detail: clickableList() }
452
+ const value = args.value ?? ''
453
+ const html = el as HTMLElement
454
+ html.focus()
455
+
456
+ if (html.isContentEditable) {
457
+ // ProseMirror/tiptap listen on beforeinput — select-all then insertText
458
+ const sel = window.getSelection()
459
+ const range = document.createRange()
460
+ range.selectNodeContents(html)
461
+ sel?.removeAllRanges()
462
+ sel?.addRange(range)
463
+ document.execCommand('insertText', false, value)
464
+ if (html.textContent !== value && !value.includes(html.textContent ?? '\0')) {
465
+ // execCommand may be a no-op under some setups — fall back to direct text + input event
466
+ html.textContent = value
467
+ html.dispatchEvent(new InputEvent('input', { bubbles: true, inputType: 'insertText', data: value }))
468
+ }
469
+ return { ok: true, detail: `filled contenteditable, ${value.length} chars` }
470
+ }
471
+
472
+ // <select>: value= matches an option by its visible text first, then by its value attr
473
+ // (text is what a human reads in the dropdown). Set selectedIndex + dispatch change so
474
+ // React/Vue bindings update.
475
+ if (el.tagName === 'SELECT') {
476
+ const select = el as HTMLSelectElement
477
+ const opts = Array.from(select.options)
478
+ const lower = value.toLowerCase()
479
+ const match = opts.find(o => o.text.trim().toLowerCase() === lower)
480
+ ?? opts.find(o => o.value === value)
481
+ ?? opts.find(o => o.text.toLowerCase().includes(lower))
482
+ if (!match)
483
+ return { ok: false, error: `no <option> matching "${value}" — options: ${opts.map(o => o.text.trim()).join(' | ')}` }
484
+ select.selectedIndex = match.index
485
+ select.dispatchEvent(new Event('input', { bubbles: true }))
486
+ select.dispatchEvent(new Event('change', { bubbles: true }))
487
+ return { ok: true, detail: `selected "${match.text.trim()}"` }
488
+ }
489
+
490
+ // Only INPUT/TEXTAREA value setters are legal to .call() here. text= often matches a
491
+ // styled overlay (GitHub's query-builder paints typed tokens into a <span> sitting over a
492
+ // transparent input) or a <label> — calling the input setter on those throws the opaque
493
+ // native `Illegal invocation`, which tells the agent nothing. Refine to the action
494
+ // (ker 诊断⊆ker 动作): name the matched tag, point at the real fillable inputs nearby.
495
+ if (el.tagName !== 'INPUT' && el.tagName !== 'TEXTAREA')
496
+ return { ok: false, error: `matched <${el.tagName.toLowerCase()}>, not a fillable input — text= likely hit a label/overlay painted over the real field. Target it with sel=`, detail: fillableList() }
497
+
498
+ // React overrides the value setter on the element *instance* to track changes; a plain
499
+ // `input.value = x` writes through it so React's tracker never sees a diff and onChange
500
+ // never fires (controlled inputs stay empty). Call the *prototype* setter instead — the
501
+ // tracker observes the change and the synthetic onChange fires.
502
+ const input = el as HTMLInputElement
503
+ const proto = el.tagName === 'TEXTAREA' ? HTMLTextAreaElement.prototype : HTMLInputElement.prototype
504
+ const setter = Object.getOwnPropertyDescriptor(proto, 'value')?.set
505
+ if (setter)
506
+ setter.call(input, value)
507
+ else input.value = value
508
+ input.dispatchEvent(new Event('input', { bubbles: true }))
509
+ input.dispatchEvent(new Event('change', { bubbles: true }))
510
+ return { ok: true, detail: `filled ${value.length} chars` }
511
+ }
512
+
513
+ function doPress(args: ActionArgs): ActionResult {
514
+ const parts = (args.key ?? '').split('+')
515
+ const key = parts[parts.length - 1]
516
+ const mods = parts.slice(0, -1).map(m => m.toLowerCase())
517
+ const init: KeyboardEventInit = {
518
+ key,
519
+ bubbles: true,
520
+ cancelable: true,
521
+ ctrlKey: mods.includes('control') || mods.includes('ctrl'),
522
+ shiftKey: mods.includes('shift'),
523
+ altKey: mods.includes('alt'),
524
+ metaKey: mods.includes('meta') || mods.includes('cmd'),
525
+ }
526
+ const target = (document.activeElement as HTMLElement) || document.body
527
+ target.dispatchEvent(new KeyboardEvent('keydown', init))
528
+ target.dispatchEvent(new KeyboardEvent('keyup', init))
529
+ return { ok: true, detail: `pressed ${args.key} on ${target.tagName.toLowerCase()}` }
530
+ }
531
+
532
+ function doWait(args: ActionArgs): Promise<ActionResult> {
533
+ const timeout = args.timeout ?? 5000
534
+ const start = performance.now()
535
+ // Three modes: enabled (element present AND not [disabled]), gone, or appeared (default).
536
+ const isDisabled = (el: Element) => (el as HTMLInputElement).disabled || el.getAttribute('aria-disabled') === 'true'
537
+ const verb = args.enabled ? 'enabled' : args.gone ? 'disappeared' : 'appeared'
538
+ return new Promise((resolve) => {
539
+ const tick = () => {
540
+ const el = findElement(args.sel, args.text)
541
+ const done = args.enabled ? (!!el && !isDisabled(el)) : (!!el !== !!args.gone)
542
+ if (done) {
543
+ resolve({ ok: true, detail: `${verb} after ${Math.round(performance.now() - start)}ms` })
544
+ return
545
+ }
546
+ if (performance.now() - start > timeout) {
547
+ resolve({ ok: false, error: `timeout ${timeout}ms waiting for ${args.sel || args.text} to be ${verb}` })
548
+ return
549
+ }
550
+ setTimeout(tick, 100)
551
+ }
552
+ tick()
553
+ })
554
+ }
555
+
556
+ async function doScreenshot(args: ActionArgs): Promise<ActionResult> {
557
+ const target = args.sel ? document.querySelector(args.sel) : document.body
558
+ if (!target)
559
+ return { ok: false, error: `no element for ${args.sel}` }
560
+ const { toPng } = await import('html-to-image')
561
+ // html-to-image re-fetches every <img> to inline it; one cross-origin or
562
+ // rate-limited (429) image rejects the whole render. Drop external/broken
563
+ // images from the capture — a screenshot minus a few avatars beats no screenshot.
564
+ const sameOrigin = (src: string) => {
565
+ try {
566
+ return new URL(src, location.href).origin === location.origin
567
+ }
568
+ catch {
569
+ return false
570
+ }
571
+ }
572
+ const filter = (n: HTMLElement) => !(n instanceof HTMLImageElement && (!n.complete || n.naturalWidth === 0 || !sameOrigin(n.src)))
573
+ try {
574
+ const dataUrl = await toPng(target as HTMLElement, { filter, imagePlaceholder: 'data:image/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==' })
575
+ return { ok: true, dataUrl, detail: `${target.tagName.toLowerCase()} captured` }
576
+ }
577
+ catch (e) {
578
+ const msg = e instanceof Event ? `image load failed (${(e.target as HTMLImageElement)?.src?.slice(0, 80) || 'unknown'})` : e instanceof Error ? e.message : String(e)
579
+ return { ok: false, error: `screenshot failed: ${msg}` }
580
+ }
581
+ }
582
+
583
+ // The read-side twin of click/fill's `sel=`: instead of acting on the matched
584
+ // element, report the facts you'd otherwise reach for via /eval — count of
585
+ // matches, and per-element text / visible / assertion-relevant attrs. /wait
586
+ // answers "appears over time"; query answers "what is it now". Attrs are a
587
+ // whitelist (role, data-state, data-*, aria-*, value, disabled, checked, href,
588
+ // title) — a node's full class/style set is noise. Capped at 20 matches.
589
+ const QUERY_ATTRS = ['role', 'data-state', 'value', 'href', 'title']
590
+ const QUERY_PREFIXES = ['data-', 'aria-']
591
+
592
+ function elAttrs(el: Element): Record<string, string> {
593
+ const sensitive = isSensitive(el)
594
+ const out: Record<string, string> = {}
595
+ for (const attr of Array.from(el.attributes)) {
596
+ if (QUERY_ATTRS.includes(attr.name) || QUERY_PREFIXES.some(p => attr.name.startsWith(p)))
597
+ out[attr.name] = attr.name === 'value' && sensitive ? `‹redacted ${attr.value.length} chars›` : attr.value
598
+ }
599
+ const input = el as HTMLInputElement
600
+ if (typeof input.value === 'string' && out.value === undefined && (el.tagName === 'INPUT' || el.tagName === 'TEXTAREA' || el.tagName === 'SELECT'))
601
+ out.value = safeValue(el)
602
+ if (input.disabled)
603
+ out.disabled = 'true'
604
+ if (input.checked)
605
+ out.checked = 'true'
606
+ return out
607
+ }
608
+
609
+ function doQuery(args: ActionArgs): ActionResult {
610
+ let els: Element[]
611
+ try {
612
+ els = Array.from(document.querySelectorAll(args.sel!))
613
+ }
614
+ catch {
615
+ return { ok: false, error: `invalid selector: ${args.sel} — URL-encode it (curl -G --data-urlencode 'sel=...')` }
616
+ }
617
+ const count = els.length
618
+ const matches = els.slice(0, 20).map(el => ({
619
+ text: elementLabel(el).slice(0, 80),
620
+ visible: isVisible(el),
621
+ attrs: elAttrs(el),
622
+ }))
623
+ const head = count > 20 ? `(showing 20 of ${count})\n` : ''
624
+ return { ok: true, detail: head + JSON.stringify({ count, matches }, null, 2) }
625
+ }
626
+
627
+ // Assert a domain variable (from __AIPEEK_SCREEN__) or a DOM element's text equals the
628
+ // expected value. Pass/fail with the actual value on failure — the chain stops and says
629
+ // "asserted X==Y, actual Z", not "the next step happened to miss".
630
+ function doAssert(args: ActionArgs): ActionResult {
631
+ let actual: string
632
+ if (args.screen) {
633
+ let domain: Record<string, unknown> = {}
634
+ try {
635
+ domain = window.__AIPEEK_SCREEN__?.() ?? {}
636
+ }
637
+ catch (e) {
638
+ return { ok: false, error: `__AIPEEK_SCREEN__ threw: ${e instanceof Error ? e.message : String(e)}` }
639
+ }
640
+ if (!(args.screen in domain))
641
+ return { ok: false, error: `no domain key "${args.screen}" — available: ${Object.keys(domain).join(', ') || '(none — app injected no __AIPEEK_SCREEN__)'}` }
642
+ actual = String(domain[args.screen])
643
+ }
644
+ else {
645
+ const el = findElement(args.sel, args.text)
646
+ if (!el)
647
+ return { ok: false, error: `no element for ${args.sel || args.text}` }
648
+ actual = elementLabel(el)
649
+ }
650
+ const target = args.screen ? `screen.${args.screen}` : (args.sel || args.text)
651
+ if (actual === args.equals)
652
+ return { ok: true, detail: `${target} == ${args.equals}` }
653
+ // Time-ordering fiber: a mismatch on a busy-named key (streaming/loading/流式中…) most likely
654
+ // means the assert ran before the async flow settled, not that the value is permanently wrong.
655
+ // Distinguish "still mid-flight, wait for it" from "settled to the wrong value" so the agent
656
+ // inserts a `wait` instead of concluding the action failed. isBusyKey is the same classifier
657
+ // settle uses — generic, bilingual, no per-app field hard-coding.
658
+ const hint = args.screen && isBusyKey(args.screen)
659
+ ? ` — "${args.screen}" is an in-flight field; if it's still settling, insert a {"type":"wait",...} before this assert`
660
+ : ''
661
+ return { ok: false, error: `asserted ${target} == "${args.equals}", actual "${actual}"${hint}` }
662
+ }
663
+
664
+ // Scroll an element into the viewport so it can be clicked. In a virtual list (tanstack-
665
+ // virtual, dnd-kit) a row off-screen isn't in the DOM at all — but a row rendered yet
666
+ // scrolled out of view is the common case, and this brings it back. Honest about the
667
+ // virtual case: if the element isn't found, say so (the row may need the list scrolled
668
+ // first via a container scroll, which /eval can do).
669
+ function doScrollIntoView(args: ActionArgs): ActionResult {
670
+ const el = findElement(args.sel, args.text)
671
+ if (!el)
672
+ return { ok: false, error: `no element for ${args.sel || args.text} — if it's in a virtual list it may not be rendered yet`, detail: clickableList() }
673
+ el.scrollIntoView({ block: 'center', inline: 'center' })
674
+ const r = el.getBoundingClientRect()
675
+ const inView = r.top >= 0 && r.left >= 0 && r.bottom <= innerHeight && r.right <= innerWidth
676
+ return { ok: true, detail: `scrolled ${elementLabel(el).slice(0, 40) || el.tagName.toLowerCase()} into view${inView ? '' : ' (still partly off-screen)'}` }
677
+ }
678
+
679
+ // Drag via a synthetic pointer sequence: pointerdown on source → a few pointermove steps
680
+ // (dnd-kit's PointerSensor has an activation distance constraint, so we move in increments
681
+ // past it) → pointerup on destination. Also fires the HTML5 drag events for libraries that
682
+ // use the native DnD API. dnd-kit relies on pointer capture + isTrusted in some paths; when
683
+ // synthetic events don't trigger the reorder, fall back to realclick's trusted channel
684
+ // (caller sees needsTrusted in the detail). Best-effort, honestly reported.
685
+ function doDrag(args: ActionArgs): ActionResult {
686
+ const src = findElement(args.sel, args.text)
687
+ if (!src)
688
+ return { ok: false, error: `no source element for ${args.sel || args.text}`, detail: clickableList() }
689
+ const dst = args.to ? document.querySelector(args.to) : null
690
+ if (!dst)
691
+ return { ok: false, error: `no destination element for to=${args.to}` }
692
+ const sr = src.getBoundingClientRect()
693
+ const dr = dst.getBoundingClientRect()
694
+ const sx = sr.left + sr.width / 2
695
+ const sy = sr.top + sr.height / 2
696
+ const dx = dr.left + dr.width / 2
697
+ const dy = dr.top + dr.height / 2
698
+ const ptr = (x: number, y: number, extra: PointerEventInit = {}) =>
699
+ ({ bubbles: true, cancelable: true, composed: true, clientX: x, clientY: y, view: window, pointerId: 1, pointerType: 'mouse', isPrimary: true, button: 0, ...extra })
700
+
701
+ src.dispatchEvent(new PointerEvent('pointerdown', ptr(sx, sy, { buttons: 1 })))
702
+ // Step through the path so dnd-kit clears its activation-distance constraint.
703
+ const steps = 8
704
+ for (let i = 1; i <= steps; i++) {
705
+ const x = sx + (dx - sx) * (i / steps)
706
+ const y = sy + (dy - sy) * (i / steps)
707
+ const over = (document.elementFromPoint(x, y) as HTMLElement | null) ?? dst as HTMLElement
708
+ over.dispatchEvent(new PointerEvent('pointermove', ptr(x, y, { buttons: 1 })))
709
+ }
710
+ ;(dst as HTMLElement).dispatchEvent(new PointerEvent('pointerup', ptr(dx, dy, { buttons: 0 })))
711
+ return { ok: true, detail: `dragged ${elementLabel(src).slice(0, 30) || src.tagName.toLowerCase()} → ${elementLabel(dst).slice(0, 30) || dst.tagName.toLowerCase()} (synthetic; if no reorder, retry with realclick — dnd-kit may need trusted pointer events)` }
712
+ }
713
+
714
+ // Drop synthetic files onto a target — the file-upload path without a native file picker
715
+ // (which synthetic clicks can't drive). Builds a DataTransfer with empty File objects of
716
+ // the given names and fires the dragenter→dragover→drop sequence libraries listen for.
717
+ function doDrop(args: ActionArgs): ActionResult {
718
+ const target = findElement(args.sel, args.text)
719
+ if (!target)
720
+ return { ok: false, error: `no drop target for ${args.sel || args.text}`, detail: clickableList() }
721
+ const dt = new DataTransfer()
722
+ for (const name of args.files ?? [])
723
+ dt.items.add(new File([''], name, { type: 'application/octet-stream' }))
724
+ const r = target.getBoundingClientRect()
725
+ const init = { bubbles: true, cancelable: true, composed: true, clientX: r.left + r.width / 2, clientY: r.top + r.height / 2, dataTransfer: dt }
726
+ for (const type of ['dragenter', 'dragover', 'drop'] as const)
727
+ target.dispatchEvent(new DragEvent(type, init))
728
+ return { ok: true, detail: `dropped ${dt.items.length} file(s) [${(args.files ?? []).join(', ')}] on ${elementLabel(target).slice(0, 30) || target.tagName.toLowerCase()}` }
729
+ }
730
+
731
+ // Read or write the clipboard. write seeds it (so a subsequent paste can be tested);
732
+ // read reports what the page put there (so a "copy" button can be verified). navigator.
733
+ // clipboard needs a focused document and (for read) permission — degrade with a clear
734
+ // error rather than hang.
735
+ async function doClipboard(args: ActionArgs): Promise<ActionResult> {
736
+ try {
737
+ if (args.mode === 'write') {
738
+ await navigator.clipboard.writeText(args.value ?? '')
739
+ return { ok: true, detail: `clipboard ← "${(args.value ?? '').slice(0, 60)}"` }
740
+ }
741
+ const text = await navigator.clipboard.readText()
742
+ return { ok: true, detail: `clipboard: "${text.slice(0, 200)}"` }
743
+ }
744
+ catch (e) {
745
+ return { ok: false, error: `clipboard ${args.mode ?? 'read'} failed: ${e instanceof Error ? e.message : String(e)} (needs document focus / permission)` }
746
+ }
747
+ }