@pa1nd/horse-browser 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +31 -0
- package/agent-helpers.py +15 -0
- package/agent-input.py +275 -0
- package/install.sh +2 -0
- package/package.json +2 -1
package/SKILL.md
CHANGED
|
@@ -42,6 +42,37 @@ If `horse-browser` isn't on your PATH, the one-time setup hasn't been run — te
|
|
|
42
42
|
to run the repo's `./install.sh` (fetches the browser, registers the launcher). Don't
|
|
43
43
|
attempt setup yourself.
|
|
44
44
|
|
|
45
|
+
## Input — use trusted, real events
|
|
46
|
+
|
|
47
|
+
Drive clicks and typing with **`click(css)`** and **`type_into(css, text)`** (or
|
|
48
|
+
`type_text(text)` into the already-focused field). They fire the same key/mouse events a
|
|
49
|
+
real browser generates, so the page's `keyup`/`input`/`mousedown` listeners actually run —
|
|
50
|
+
submit buttons enable, autocompletes fire, React/Vue state updates, menus open. **Never
|
|
51
|
+
drive a form with `el.value = …` or `el.click()` in `js(...)`**: those fire *no* events, so
|
|
52
|
+
the value or click *looks* applied while the page's logic never ran (disabled submit, dead
|
|
53
|
+
dropdown, stale state). This is **correctness, not just bot-evasion** — plain sites break
|
|
54
|
+
too; the anti-detection win rides along free.
|
|
55
|
+
|
|
56
|
+
- `click(css)` — trusted mousedown→mouseup→click (+pointer). `click_xy(x, y)` for coords /
|
|
57
|
+
shadow DOM / cross-iframe (CDP input passes through iframes).
|
|
58
|
+
- `type_into(css, text, clear=?, enter=?)` — real per-char keys, fast. `type_text(text)`
|
|
59
|
+
types the focused element. `press("Enter"|"Tab"|"Escape"|"Arrow…")` for a named key.
|
|
60
|
+
- Escape hatch: `insert_text_fast(text)` dumps via insertText (no key events) — only for a
|
|
61
|
+
plain `<textarea>` with no listeners where speed matters.
|
|
62
|
+
- Fast untrusted (`js("el.click()")`) is fine on trivial internal/dev pages, but **always**
|
|
63
|
+
use trusted input on any **login / signup / checkout**, anything behind a **bot vendor**
|
|
64
|
+
(Akamai / PerimeterX / DataDome / Cloudflare / hCaptcha / reCAPTCHA), or **after any
|
|
65
|
+
challenge appeared**.
|
|
66
|
+
|
|
67
|
+
### Easy challenges: solve them, don't halt
|
|
68
|
+
|
|
69
|
+
Many "captchas" are just a gesture — **click a checkbox, press-&-hold, slide-to-verify**.
|
|
70
|
+
Do them; don't escalate. With a real fingerprint (always-on) plus a trusted click, the easy
|
|
71
|
+
ones usually clear. Call **`solve_challenge()`** — it classifies the challenge and solves the
|
|
72
|
+
easy kind, or returns `escalate:<why>` for the **perception** kind (identify images, read
|
|
73
|
+
distorted text, rotate, audio) — and *only those* go to the operator. The gesture verbs:
|
|
74
|
+
`press_hold(css, seconds)` and `drag(css, dx=… / to=(x,y))`.
|
|
75
|
+
|
|
45
76
|
## Extension
|
|
46
77
|
|
|
47
78
|
Gives each Claude session its own coloured tab group (label = last 4 chars of `CLAUDE_CODE_SESSION_ID`; subagents inherit it and share the group). Keeps RAM and tab-strip clutter from bleeding across parallel sessions, and lets you reason about "my tabs" as a real set. `chrome.tabGroups` is extension-only — no CDP equivalent — which is why an extension exists at all.
|
package/agent-helpers.py
CHANGED
|
@@ -260,3 +260,18 @@ def capture_screenshot(path=None, full=False, max_dim=None):
|
|
|
260
260
|
suffix=".png", dir=str(_bh_ipc._TMP))
|
|
261
261
|
os.close(fd)
|
|
262
262
|
return _real_capture_screenshot(path, full=full, max_dim=max_dim)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
# ── Tier 2 trusted-input layer — shipped as the sibling horse_input.py ───────────
|
|
266
|
+
# horse-browser splits its managed helpers by concern: THIS file drives tabs (focus-safe
|
|
267
|
+
# open/switch/list, per-call screenshots); horse_input.py does trusted, correct INPUT
|
|
268
|
+
# (real click/key events, easy-challenge gestures). We exec the sibling here so the single
|
|
269
|
+
# "do not edit" loader stub in agent_helpers.py bootstraps both. `_hb_path` is the path to
|
|
270
|
+
# THIS file, set by that stub; horse_input.py sits next to it. Missing/failed → skipped, so
|
|
271
|
+
# tab-driving still works even if the input file didn't ship.
|
|
272
|
+
try:
|
|
273
|
+
_hb_input = os.path.join(os.path.dirname(_hb_path), "horse_input.py")
|
|
274
|
+
exec(compile(open(_hb_input).read(), _hb_input, "exec"))
|
|
275
|
+
except Exception as _hb_input_err:
|
|
276
|
+
import sys as _hb_isys
|
|
277
|
+
print("horse-browser: couldn't load horse_input.py (%r) — re-run horse-browser's install.sh" % (_hb_input_err,), file=_hb_isys.stderr)
|
package/agent-input.py
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
# horse-browser Tier 2 — trusted, correct input (installed as horse_input.py).
|
|
2
|
+
#
|
|
3
|
+
# Loaded by horse_helpers.py, which execs this sibling — so every agent that drives
|
|
4
|
+
# the browser gets these by default. This is the AGENT LAYER of realness: input sent
|
|
5
|
+
# over CDP that fires the SAME events a real browser would, applied on every site.
|
|
6
|
+
#
|
|
7
|
+
# WHY IT'S NOT JUST STEALTH — it's correctness. Sites bind real logic to real events:
|
|
8
|
+
# • keyup / keydown / input → enable the submit button, fire autocomplete, validate,
|
|
9
|
+
# update React/Vue/Svelte controlled state.
|
|
10
|
+
# • mousedown / pointerdown → open menus, custom widgets, "close on outside mousedown".
|
|
11
|
+
# insertText sets the value but fires NO key events; `el.value=` fires nothing at all;
|
|
12
|
+
# `el.click()` fires only `click`, not the down/up/pointer chain. In every case the text
|
|
13
|
+
# or click *appears* to work while the page's logic never ran — a silent break that hits
|
|
14
|
+
# plain forms, not just defended ones. These verbs fire the real events, so pages behave.
|
|
15
|
+
# (Bot-detector realness rides along for free.) Human-like MOTION — bezier paths, warm-up,
|
|
16
|
+
# gaussian cadence — is the separate, gated Tier 3 layer; this file stays cheap and fast.
|
|
17
|
+
#
|
|
18
|
+
# Reach for these (they shadow the untrusted shortcuts):
|
|
19
|
+
# click(css) trusted mousedown->mouseup->click at the element's center
|
|
20
|
+
# type_into(css, text) focus + real per-char keyDown/keyUp (fires keyup/input/change)
|
|
21
|
+
# type_text(text) OVERRIDE of the stock insertText typer → real key events
|
|
22
|
+
# press(name, times=1) a trusted named key (Enter, Tab, Escape, Arrow*, Backspace)
|
|
23
|
+
# press_hold(css, s) trusted press-and-hold (Press & Hold challenges)
|
|
24
|
+
# drag(css, to=/dx=/dy=) trusted drag (slide-to-verify)
|
|
25
|
+
# solve_challenge(act=1) classify a challenge → solve the EASY ones (click/hold/drag),
|
|
26
|
+
# or return "escalate:<why>" for image/text/audio ones.
|
|
27
|
+
# Deliberate escape hatch:
|
|
28
|
+
# insert_text_fast(text) raw Input.insertText — fast, but fires NO key events; only for
|
|
29
|
+
# dumping into a plain <textarea> with no listeners.
|
|
30
|
+
#
|
|
31
|
+
# `cdp` is provided by horse_helpers.py (loaded first) — we drive everything through it.
|
|
32
|
+
|
|
33
|
+
import math as _im
|
|
34
|
+
import random as _ir
|
|
35
|
+
import time as _it
|
|
36
|
+
import json as _ij
|
|
37
|
+
import sys as _isys
|
|
38
|
+
|
|
39
|
+
_mouse = {"x": 240.0, "y": 240.0}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _eval(expr):
|
|
43
|
+
return (cdp("Runtime.evaluate", expression=expr, returnByValue=True).get("result") or {}).get("value")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _center(css):
|
|
47
|
+
"""Viewport-center (CSS px) of the first `css` match, scrolled into view; None if absent/hidden."""
|
|
48
|
+
expr = ("(function(){var e=document.querySelector(" + _ij.dumps(css) + ");if(!e)return null;"
|
|
49
|
+
"try{e.scrollIntoView({block:'center',inline:'center'});}catch(_){}"
|
|
50
|
+
"var b=e.getBoundingClientRect();if(b.width===0&&b.height===0)return null;"
|
|
51
|
+
"return [b.x+b.width/2,b.y+b.height/2];})()")
|
|
52
|
+
return _eval(expr)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _focus(css):
|
|
56
|
+
return bool(_eval("(function(){var e=document.querySelector(" + _ij.dumps(css) + ");if(!e)return false;e.focus();return true;})()"))
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
# ── mouse ────────────────────────────────────────────────────────────────────────
|
|
60
|
+
def _move(x, y):
|
|
61
|
+
cdp("Input.dispatchMouseEvent", type="mouseMoved", x=x, y=y)
|
|
62
|
+
_mouse["x"], _mouse["y"] = x, y
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def click_xy(x, y):
|
|
66
|
+
"""Trusted left click at viewport coords — the full mousedown/mouseup/click (+pointer)
|
|
67
|
+
chain, so the page reacts exactly as it would to a person (unlike el.click())."""
|
|
68
|
+
_move(x, y)
|
|
69
|
+
_it.sleep(_ir.uniform(0.02, 0.06))
|
|
70
|
+
cdp("Input.dispatchMouseEvent", type="mousePressed", x=x, y=y, button="left", clickCount=1)
|
|
71
|
+
_it.sleep(_ir.uniform(0.03, 0.08))
|
|
72
|
+
cdp("Input.dispatchMouseEvent", type="mouseReleased", x=x, y=y, button="left", clickCount=1)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def click(css):
|
|
76
|
+
"""Trusted click at the center of `css`. Never el.click() — this fires the whole
|
|
77
|
+
event chain (mousedown/mouseup/pointer/click) so menus, widgets and validation run."""
|
|
78
|
+
c = _center(css)
|
|
79
|
+
if not c:
|
|
80
|
+
raise RuntimeError("click: no visible element " + css)
|
|
81
|
+
click_xy(c[0], c[1])
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def press_hold(css, seconds=6.0):
|
|
85
|
+
"""Trusted press-and-hold at `css` — for Press & Hold challenges (PerimeterX/DataDome).
|
|
86
|
+
Holds the button down with tiny jitter for `seconds`, which real widgets require."""
|
|
87
|
+
c = _center(css)
|
|
88
|
+
if not c:
|
|
89
|
+
raise RuntimeError("press_hold: no visible element " + css)
|
|
90
|
+
x, y = c
|
|
91
|
+
_move(x, y)
|
|
92
|
+
cdp("Input.dispatchMouseEvent", type="mousePressed", x=x, y=y, button="left", clickCount=1)
|
|
93
|
+
end = _it.time() + seconds
|
|
94
|
+
while _it.time() < end:
|
|
95
|
+
cdp("Input.dispatchMouseEvent", type="mouseMoved", x=x + _ir.uniform(-1.4, 1.4), y=y + _ir.uniform(-1.4, 1.4), button="left")
|
|
96
|
+
_it.sleep(_ir.uniform(0.08, 0.2))
|
|
97
|
+
cdp("Input.dispatchMouseEvent", type="mouseReleased", x=x, y=y, button="left", clickCount=1)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def drag(css, to=None, dx=None, dy=0):
|
|
101
|
+
"""Trusted drag from `css` — for slide-to-verify sliders. Give an absolute `to`=(x,y)
|
|
102
|
+
target, or a relative `dx`/`dy`. Moves in small held-button steps (ease-in-out + tiny
|
|
103
|
+
jitter) so the site sees a real pointer drag, not a teleport."""
|
|
104
|
+
c = _center(css)
|
|
105
|
+
if not c:
|
|
106
|
+
raise RuntimeError("drag: no visible element " + css)
|
|
107
|
+
x0, y0 = c
|
|
108
|
+
x1, y1 = (to if to else (x0 + (dx or 0), y0 + dy))
|
|
109
|
+
_move(x0, y0)
|
|
110
|
+
cdp("Input.dispatchMouseEvent", type="mousePressed", x=x0, y=y0, button="left", clickCount=1)
|
|
111
|
+
_it.sleep(_ir.uniform(0.05, 0.12))
|
|
112
|
+
steps = max(14, int(_im.hypot(x1 - x0, y1 - y0) / 10))
|
|
113
|
+
for i in range(1, steps + 1):
|
|
114
|
+
t = i / steps
|
|
115
|
+
e = t * t * (3 - 2 * t) # smoothstep ease
|
|
116
|
+
px = x0 + (x1 - x0) * e + (_ir.uniform(-1.0, 1.0) if i < steps else 0)
|
|
117
|
+
py = y0 + (y1 - y0) * e + (_ir.uniform(-1.0, 1.0) if i < steps else 0)
|
|
118
|
+
cdp("Input.dispatchMouseEvent", type="mouseMoved", x=px, y=py, button="left")
|
|
119
|
+
_it.sleep(_ir.uniform(0.008, 0.022))
|
|
120
|
+
cdp("Input.dispatchMouseEvent", type="mouseReleased", x=x1, y=y1, button="left", clickCount=1)
|
|
121
|
+
_mouse["x"], _mouse["y"] = x1, y1
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# ── keyboard ─────────────────────────────────────────────────────────────────────
|
|
125
|
+
_PUNCT = {'/': ('Slash', 191), '.': ('Period', 190), ',': ('Comma', 188), '-': ('Minus', 189),
|
|
126
|
+
' ': ('Space', 32), ';': ('Semicolon', 186), ':': ('Semicolon', 186), "'": ('Quote', 222),
|
|
127
|
+
'"': ('Quote', 222), '@': ('Digit2', 50), '_': ('Minus', 189), '=': ('Equal', 187),
|
|
128
|
+
'+': ('Equal', 187), '(': ('Digit9', 57), ')': ('Digit0', 48), '!': ('Digit1', 49),
|
|
129
|
+
'?': ('Slash', 191), '#': ('Digit3', 51)}
|
|
130
|
+
_SPECIAL = {'Enter': ('Enter', 13, '\r'), 'Tab': ('Tab', 9, '\t'), 'Backspace': ('Backspace', 8, ''),
|
|
131
|
+
'Escape': ('Escape', 27, ''), 'Delete': ('Delete', 46, ''), 'Space': ('Space', 32, ' '),
|
|
132
|
+
'ArrowDown': ('ArrowDown', 40, ''), 'ArrowUp': ('ArrowUp', 38, ''),
|
|
133
|
+
'ArrowLeft': ('ArrowLeft', 37, ''), 'ArrowRight': ('ArrowRight', 39, '')}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _keyinfo(ch):
|
|
137
|
+
if ch.isalpha():
|
|
138
|
+
u = ch.upper()
|
|
139
|
+
return (ch, 'Key' + u, ord(u))
|
|
140
|
+
if ch.isdigit():
|
|
141
|
+
return (ch, 'Digit' + ch, ord(ch))
|
|
142
|
+
if ch in _PUNCT:
|
|
143
|
+
code, vk = _PUNCT[ch]
|
|
144
|
+
return (ch, code, vk)
|
|
145
|
+
return (ch, '', 0)
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _key(ch):
|
|
149
|
+
key, code, vk = _keyinfo(ch)
|
|
150
|
+
base = dict(key=key, code=code, windowsVirtualKeyCode=vk, nativeVirtualKeyCode=vk)
|
|
151
|
+
# keyDown WITH text makes Chrome actually insert the char (fires a native `input`
|
|
152
|
+
# event); keyUp without text. Real, fully-formed events → site keyup/input listeners fire.
|
|
153
|
+
cdp("Input.dispatchKeyEvent", type="keyDown", text=ch, **base)
|
|
154
|
+
cdp("Input.dispatchKeyEvent", type="keyUp", **base)
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def press(name, times=1):
|
|
158
|
+
"""Press a named key with a real, trusted key event: Enter, Tab, Escape, Backspace,
|
|
159
|
+
Delete, Space, Arrow{Up,Down,Left,Right}."""
|
|
160
|
+
code, vk, txt = _SPECIAL[name]
|
|
161
|
+
base = dict(key=code, code=code, windowsVirtualKeyCode=vk, nativeVirtualKeyCode=vk)
|
|
162
|
+
for _ in range(times):
|
|
163
|
+
cdp("Input.dispatchKeyEvent", type="keyDown", **(dict(base, text=txt) if txt else base))
|
|
164
|
+
cdp("Input.dispatchKeyEvent", type="keyUp", **base)
|
|
165
|
+
_it.sleep(0.03)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def _clear_focused():
|
|
169
|
+
mods = 4 if _isys.platform == "darwin" else 2 # Cmd on macOS, Ctrl elsewhere
|
|
170
|
+
sa = dict(key='a', code='KeyA', windowsVirtualKeyCode=65, nativeVirtualKeyCode=65, modifiers=mods)
|
|
171
|
+
cdp("Input.dispatchKeyEvent", type="rawKeyDown", **sa)
|
|
172
|
+
cdp("Input.dispatchKeyEvent", type="keyUp", **sa)
|
|
173
|
+
press("Delete")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def type_into(css, text, per=0.0, clear=False, enter=False):
|
|
177
|
+
"""Type `text` into `css` with REAL per-char key events so keyup/input/change fire —
|
|
178
|
+
enabling submit buttons, triggering autocompletes, updating framework state. Fast by
|
|
179
|
+
default (per=0); pass per>0 for a light cadence, or use the Tier 3 human_* helpers for
|
|
180
|
+
full human timing. clear=True empties the field first; enter=True presses Enter after."""
|
|
181
|
+
if not _focus(css):
|
|
182
|
+
raise RuntimeError("type_into: no element " + css)
|
|
183
|
+
if clear:
|
|
184
|
+
_clear_focused()
|
|
185
|
+
for ch in text:
|
|
186
|
+
_key(ch)
|
|
187
|
+
if per:
|
|
188
|
+
_it.sleep(per)
|
|
189
|
+
if enter:
|
|
190
|
+
press("Enter")
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def type_text(text):
|
|
194
|
+
"""OVERRIDE of the stock browser-harness typer. Stock type_text used Input.insertText,
|
|
195
|
+
which sets the value but fires NO key events — so keyup/input listeners never run and
|
|
196
|
+
the page silently misbehaves (submit stays disabled, autocomplete dead, React state
|
|
197
|
+
stale). This types the currently-focused element with REAL key events instead."""
|
|
198
|
+
for ch in text:
|
|
199
|
+
_key(ch)
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def insert_text_fast(text):
|
|
203
|
+
"""The old fast path: Input.insertText in one shot. Fires NO key events — use ONLY for
|
|
204
|
+
dumping into a plain <textarea> with no keyup/input listeners, where speed matters."""
|
|
205
|
+
cdp("Input.insertText", text=text)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
# ── easy-challenge solving — a gesture, never perception ───────────────────────────
|
|
209
|
+
# Classify what's on the page. EASY = something a trusted gesture clears with no
|
|
210
|
+
# understanding of content (checkbox, press-&-hold, slide-to-verify). HARD = anything
|
|
211
|
+
# needing to perceive content (pick images, read distorted text, rotate, audio) — we
|
|
212
|
+
# NEVER guess at those; we say escalate. Detection is heuristic (best-effort DOM sniff).
|
|
213
|
+
_DETECT_JS = r"""
|
|
214
|
+
(() => {
|
|
215
|
+
const q = (s) => document.querySelector(s);
|
|
216
|
+
const txt = (document.body ? document.body.innerText : '').toLowerCase();
|
|
217
|
+
const seen = (...ss) => ss.find(s => q(s));
|
|
218
|
+
// An image/interactive challenge popup that's OPEN — reCAPTCHA bframe or hCaptcha
|
|
219
|
+
// challenge iframe. It's a top-document iframe (cross-origin, can't read inside) but
|
|
220
|
+
// we can see it's visibly expanded. That means a checkbox already escalated to the
|
|
221
|
+
// perception kind → hard, escalate (don't re-report the checkbox behind it).
|
|
222
|
+
const pop = q('iframe[src*="recaptcha/api2/bframe"], iframe[src*="hcaptcha.com/captcha"][title*="hallenge"], iframe[title*="recaptcha challenge"]');
|
|
223
|
+
if (pop) { const pb = pop.getBoundingClientRect(); if (pb.height > 120 && pb.width > 120 && getComputedStyle(pop).visibility !== 'hidden') return {kind:'hard', why:'image challenge popup is open'}; }
|
|
224
|
+
// HARD first — if a perception challenge is present, don't attempt a gesture.
|
|
225
|
+
const hardTxt = /select all|click each|images? (with|containing)|type the (characters|text)|what does this say|rotate|listen and|audio challenge/;
|
|
226
|
+
if (hardTxt.test(txt) || q('table.rc-imageselect-table') || q('.geetest_item_wrap')) return {kind:'hard', why:'image/text/audio challenge'};
|
|
227
|
+
// Press & Hold (PerimeterX / DataDome)
|
|
228
|
+
if (/press\s*&?\s*and?\s*hold|press and hold/.test(txt) || q('#px-captcha') || q('[id*="px-captcha"]'))
|
|
229
|
+
{ const el = q('#px-captcha [role=button]') || q('#px-captcha') || q('[id*="press"]'); return {kind:'hold', sel: el ? _sel(el) : '#px-captcha', why:'press & hold'}; }
|
|
230
|
+
// Slider / slide-to-verify
|
|
231
|
+
const slider = q('.slider, [class*="slide"] [class*="btn"], [class*="drag"][class*="btn"], .yidun_slider, .nc_iconfont');
|
|
232
|
+
if (/slide to|drag the slider|slide right|slide to verify/.test(txt) || slider)
|
|
233
|
+
return {kind:'drag', sel: slider ? _sel(slider) : null, why:'slide to verify'};
|
|
234
|
+
// Checkbox captchas (reCAPTCHA / hCaptcha / Turnstile) — usually a cross-origin iframe.
|
|
235
|
+
if (q('iframe[src*="recaptcha/api2/anchor"]') || q('iframe[title*="hCaptcha"]') || q('iframe[src*="challenges.cloudflare.com"]') || q('.cf-turnstile') || q('.g-recaptcha') || q('.h-captcha'))
|
|
236
|
+
return {kind:'checkbox', why:'checkbox captcha (in an iframe — click its coords)'};
|
|
237
|
+
function _sel(e){ if(e.id) return '#'+CSS.escape(e.id); if(e.className && typeof e.className==='string'){const c=e.className.trim().split(/\s+/)[0]; if(c) return e.tagName.toLowerCase()+'.'+CSS.escape(c);} return e.tagName.toLowerCase(); }
|
|
238
|
+
return {kind:'none'};
|
|
239
|
+
})()
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def solve_challenge(act=True, hold_seconds=6.0):
|
|
244
|
+
"""Detect a challenge and, if it's EASY (a trusted gesture — checkbox click, press-&-hold,
|
|
245
|
+
slide-to-verify), solve it; return a short status string. For HARD challenges (identify
|
|
246
|
+
images, read text, rotate, audio) it does NOT guess — it returns 'escalate:<why>' so you
|
|
247
|
+
stop and ask the operator. Returns 'none' if no challenge is found. act=False = classify
|
|
248
|
+
only (don't perform the gesture)."""
|
|
249
|
+
d = _eval(_DETECT_JS) or {"kind": "none"}
|
|
250
|
+
kind, sel, why = d.get("kind"), d.get("sel"), d.get("why", "")
|
|
251
|
+
if kind == "none":
|
|
252
|
+
return "none"
|
|
253
|
+
if kind == "hard":
|
|
254
|
+
return "escalate:" + why + " — needs perception; ask the operator, don't guess"
|
|
255
|
+
if not act:
|
|
256
|
+
return "easy:%s (%s) sel=%s" % (kind, why, sel)
|
|
257
|
+
try:
|
|
258
|
+
if kind == "hold":
|
|
259
|
+
press_hold(sel, seconds=hold_seconds)
|
|
260
|
+
return "solved:hold — press-held %s (verify it cleared; retry once, else escalate)" % sel
|
|
261
|
+
if kind == "drag":
|
|
262
|
+
if not sel:
|
|
263
|
+
return "easy:drag (%s) — found a slider but couldn't pin a selector; drag it by hand with drag(sel, dx=<track width>)" % why
|
|
264
|
+
c = _center(sel)
|
|
265
|
+
if c:
|
|
266
|
+
drag(sel, dx=320) # slide well to the right; simple sliders latch at the end
|
|
267
|
+
return "solved:drag — slid %s right (verify; if it snapped back, escalate)" % sel
|
|
268
|
+
return "easy:drag — slider not locatable; escalate if it blocks you"
|
|
269
|
+
if kind == "checkbox":
|
|
270
|
+
return ("easy:checkbox (%s) — it's in a cross-origin iframe. Screenshot, read the checkbox pixel, "
|
|
271
|
+
"then click_xy(x, y) (a trusted click passes through the iframe). If an image grid appears "
|
|
272
|
+
"after, that's the HARD kind — escalate." % why)
|
|
273
|
+
except Exception as e:
|
|
274
|
+
return "escalate:gesture failed (%r) — ask the operator" % (e,)
|
|
275
|
+
return "none"
|
package/install.sh
CHANGED
|
@@ -84,6 +84,7 @@ if ! command -v browser-harness >/dev/null 2>&1; then
|
|
|
84
84
|
exit 1
|
|
85
85
|
fi
|
|
86
86
|
HELPERS_SRC="$HERE/agent-helpers.py"
|
|
87
|
+
INPUT_SRC="$HERE/agent-input.py" # Tier 2 trusted-input layer → workspace/horse_input.py
|
|
87
88
|
# Legacy marker: pre-0.4.1 installs appended the helpers INLINE under this line, and
|
|
88
89
|
# re-syncs replaced marker→EOF — silently eating anything a user had added below the
|
|
89
90
|
# block. Kept only so those files can be migrated once.
|
|
@@ -93,6 +94,7 @@ install_helpers_into() { # $1 = workspace dir; (re)syncs the helpers — idempo
|
|
|
93
94
|
local ws="$1" dst="$1/agent_helpers.py" # re-running install.sh deploys helper UPDATES too.
|
|
94
95
|
mkdir -p "$ws" 2>/dev/null || return 0
|
|
95
96
|
cp "$HELPERS_SRC" "$ws/horse_helpers.py"
|
|
97
|
+
cp "$INPUT_SRC" "$ws/horse_input.py" # loaded by horse_helpers.py (chain-exec)
|
|
96
98
|
# one-time migration of a legacy inline block: strip it ONLY on an exact byte match
|
|
97
99
|
# with the shipped source, so user additions below it survive. A modified/unknown
|
|
98
100
|
# block is left in place — harmless, the loader runs after it and its defs win.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pa1nd/horse-browser",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"access": "public"
|
|
6
6
|
},
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
"bin",
|
|
13
13
|
"extension",
|
|
14
14
|
"agent-helpers.py",
|
|
15
|
+
"agent-input.py",
|
|
15
16
|
"scripts",
|
|
16
17
|
"install.sh",
|
|
17
18
|
"claude-md.sh",
|