esp-donut 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,30 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .venv/
6
+ .pytest_cache/
7
+ .ruff_cache/
8
+ build/
9
+ dist/
10
+
11
+ # Editors / tooling
12
+ .claude/
13
+ .idea/
14
+ .vscode/
15
+
16
+ # OS
17
+ .DS_Store
18
+
19
+ # Frontend
20
+ frontend/node_modules/
21
+ frontend/dist/
22
+
23
+ # Local env (real secrets; .env.sample is the committed template)
24
+ .env
25
+ .env.local
26
+
27
+ # Local SQLite database (created/migrated on hub startup; WAL sidecars too)
28
+ donut.db
29
+ donut.db-wal
30
+ donut.db-shm
@@ -0,0 +1,38 @@
1
+ Metadata-Version: 2.4
2
+ Name: esp-donut
3
+ Version: 0.1.0
4
+ Summary: Donut client tools for ESP devices: the donut CLI and the donut-hands daemon
5
+ Requires-Python: >=3.13
6
+ Requires-Dist: esptool>=5.0
7
+ Requires-Dist: nanoid<3,>=2
8
+ Requires-Dist: platformdirs<5,>=4
9
+ Requires-Dist: pyserial>=3.5
10
+ Requires-Dist: websockets>=14
11
+ Description-Content-Type: text/markdown
12
+
13
+ # esp-donut
14
+
15
+ Client tools for [Donut](https://donut.espressif.tools), the remote-hands
16
+ service for ESP devices:
17
+
18
+ - **`donut`** — the user-facing CLI. Lists devices exposed by connected
19
+ hands boxes and attaches a local rfc2217 port to a remote device.
20
+ - **`donut-hands`** — the daemon that runs on a lab box next to the
21
+ hardware, dials the hub, and serves its serial ports.
22
+
23
+ ## Install
24
+
25
+ ```sh
26
+ pip install esp-donut
27
+ ```
28
+
29
+ ## Quick start
30
+
31
+ ```sh
32
+ export DONUT_TOKEN=... # mint one with `donut-admin token mint --role agent`
33
+ donut ls
34
+ ```
35
+
36
+ For the hands daemon, create a config with `donut-hands --init` and run
37
+ `donut-hands`. See the hub's documentation for the config schema and the
38
+ wire protocol.
@@ -0,0 +1,26 @@
1
+ # esp-donut
2
+
3
+ Client tools for [Donut](https://donut.espressif.tools), the remote-hands
4
+ service for ESP devices:
5
+
6
+ - **`donut`** — the user-facing CLI. Lists devices exposed by connected
7
+ hands boxes and attaches a local rfc2217 port to a remote device.
8
+ - **`donut-hands`** — the daemon that runs on a lab box next to the
9
+ hardware, dials the hub, and serves its serial ports.
10
+
11
+ ## Install
12
+
13
+ ```sh
14
+ pip install esp-donut
15
+ ```
16
+
17
+ ## Quick start
18
+
19
+ ```sh
20
+ export DONUT_TOKEN=... # mint one with `donut-admin token mint --role agent`
21
+ donut ls
22
+ ```
23
+
24
+ For the hands daemon, create a config with `donut-hands --init` and run
25
+ `donut-hands`. See the hub's documentation for the config schema and the
26
+ wire protocol.
@@ -0,0 +1,34 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "esp-donut"
7
+ # Version is single-sourced from src/donut/__init__.py and kept in
8
+ # lockstep with donut-hub (enforced by tests/test_smoke.py).
9
+ dynamic = ["version"]
10
+ description = "Donut client tools for ESP devices: the donut CLI and the donut-hands daemon"
11
+ readme = "README.md"
12
+ requires-python = ">=3.13"
13
+ dependencies = [
14
+ "websockets>=14",
15
+ "pyserial>=3.5",
16
+ "esptool>=5.0",
17
+ "nanoid>=2,<3",
18
+ "platformdirs>=4,<5",
19
+ ]
20
+
21
+ [project.scripts]
22
+ donut = "donut.cli:main"
23
+ donut-hands = "donut.hands:main"
24
+
25
+ [tool.hatch.version]
26
+ path = "src/donut/__init__.py"
27
+
28
+ [tool.hatch.build.targets.wheel]
29
+ packages = ["src/donut"]
30
+
31
+ # Rebuild editable metadata when the version file changes, not just
32
+ # pyproject.toml — otherwise the installed version goes stale on bumps.
33
+ [tool.uv]
34
+ cache-keys = [{ file = "pyproject.toml" }, { file = "src/donut/__init__.py" }]
@@ -0,0 +1,3 @@
1
+ """Donut client — the ``donut`` CLI and the ``donut-hands`` daemon."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,254 @@
1
+ """Donut CLI — the user-facing ``donut`` command.
2
+
3
+ v0.1 ships a single subcommand, ``ls``, which lists the devices on every
4
+ connected hands box over ``/api/ws/control``. Config comes from the
5
+ environment: ``DONUT_TOKEN`` (required bearer) and ``DONUT_HUB_URL``
6
+ (optional, defaults to the production hub).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import asyncio
13
+ import json
14
+ import os
15
+ import sys
16
+ from collections.abc import Callable
17
+
18
+ from websockets.asyncio.client import connect as ws_connect
19
+ from websockets.exceptions import InvalidStatus, WebSocketException
20
+
21
+ DEFAULT_HUB_URL = "wss://donut.espressif.tools"
22
+ ENV_HUB_URL = "DONUT_HUB_URL"
23
+ ENV_TOKEN = "DONUT_TOKEN"
24
+
25
+ # Binary control sentinel sent on the data WS when a fresh local rfc2217
26
+ # client attaches, telling hands to start a new rfc2217 session. Must equal
27
+ # ``donut.hands.RFC2217_RESET``. See that module for the rationale.
28
+ RFC2217_RESET = b"\x00donut\x00rfc2217-reset\x00"
29
+
30
+
31
+ class CliError(Exception):
32
+ """A user-facing error (bad config, connection failure)."""
33
+
34
+
35
+ def resolve_hub_url() -> str:
36
+ return os.environ.get(ENV_HUB_URL) or DEFAULT_HUB_URL
37
+
38
+
39
+ def resolve_token() -> str:
40
+ token = os.environ.get(ENV_TOKEN)
41
+ if not token:
42
+ raise CliError(
43
+ f"{ENV_TOKEN} is not set "
44
+ "(mint one with `donut-admin token mint --role agent`)"
45
+ )
46
+ return token
47
+
48
+
49
+ async def fetch_devices(hub_url: str, token: str) -> list[dict]:
50
+ """Connect as an agent and return the hub's aggregated device list."""
51
+ url = hub_url.rstrip("/") + "/api/ws/control"
52
+ async with ws_connect(
53
+ url, additional_headers=[("Authorization", f"Bearer {token}")]
54
+ ) as ws:
55
+ await ws.send(json.dumps({"op": "hello", "role": "agent"}))
56
+ await _recv_op(ws, "hello-ok")
57
+ await ws.send(json.dumps({"op": "device.list", "id": "1"}))
58
+ resp = await _recv_op(ws, "device.list-ok")
59
+ return resp.get("devices", [])
60
+
61
+
62
+ async def acquire_lease(hub_url: str, token: str, tags: list[str]) -> dict:
63
+ """Acquire a lease over the control WS; return the lease.acquire-ok.
64
+
65
+ The lease lives in the hub independent of this control connection, so
66
+ we close it once acquired — the data WS (dialed next) carries the
67
+ session, and dropping the data WS is what releases the lease.
68
+ """
69
+ url = hub_url.rstrip("/") + "/api/ws/control"
70
+ async with ws_connect(
71
+ url, additional_headers=[("Authorization", f"Bearer {token}")]
72
+ ) as ws:
73
+ await ws.send(json.dumps({"op": "hello", "role": "agent"}))
74
+ await _recv_op(ws, "hello-ok")
75
+ await ws.send(json.dumps({"op": "lease.acquire", "id": "1", "tags": tags}))
76
+ return await _recv_op(ws, "lease.acquire-ok")
77
+
78
+
79
+ async def serve_local_rfc2217(
80
+ data_ws,
81
+ *,
82
+ host: str = "127.0.0.1",
83
+ port: int = 0,
84
+ on_listening: Callable[[int], None] | None = None,
85
+ ) -> None:
86
+ """Bridge a local TCP port to the data WS until the WS closes.
87
+
88
+ This is the *transparent* half of rfc2217. The listener never parses
89
+ the protocol: idf.py's pyserial rfc2217 client (dialing this port) and
90
+ the hands-side ``PortManager`` are the real endpoints, so DTR/RTS, baud
91
+ changes and modem state travel end-to-end through here untouched.
92
+
93
+ One TCP client at a time. ``idf.py flash monitor`` opens the port twice
94
+ (esptool, then the monitor) — both reuse the same data WS / lease, so
95
+ we keep listening across client disconnects, sending ``RFC2217_RESET``
96
+ on each new attach so hands starts a fresh rfc2217 session for it.
97
+ ``on_listening`` fires once bound with the chosen port (0 means the OS
98
+ picks a free one).
99
+ """
100
+ current_writer: asyncio.StreamWriter | None = None
101
+
102
+ async def handle_client(reader: asyncio.StreamReader, writer) -> None:
103
+ nonlocal current_writer
104
+ if current_writer is not None:
105
+ # Single byte stream per lease; refuse a second concurrent client.
106
+ writer.close()
107
+ return
108
+ current_writer = writer
109
+ try:
110
+ # Tell hands a fresh rfc2217 client attached so it greets us with
111
+ # a new negotiation instead of the previous client's stale state.
112
+ await data_ws.send(RFC2217_RESET)
113
+ while True:
114
+ chunk = await reader.read(4096)
115
+ if not chunk:
116
+ return
117
+ await data_ws.send(chunk)
118
+ finally:
119
+ current_writer = None
120
+ writer.close()
121
+
122
+ async def ws_to_tcp() -> None:
123
+ async for msg in data_ws:
124
+ data = msg.encode() if isinstance(msg, str) else msg
125
+ writer = current_writer
126
+ if writer is not None and not writer.is_closing():
127
+ writer.write(data)
128
+ await writer.drain()
129
+ # No client attached → drop. This is the hands greeting emitted
130
+ # before the first client, or device chatter between clients;
131
+ # each client gets its own greeting via the RFC2217_RESET above.
132
+
133
+ server = await asyncio.start_server(handle_client, host, port)
134
+ if on_listening is not None:
135
+ on_listening(server.sockets[0].getsockname()[1])
136
+ pump = asyncio.create_task(ws_to_tcp())
137
+ async with server:
138
+ # Serve until the data WS closes (lease released / hub gone).
139
+ await pump
140
+
141
+
142
+ async def connect_session(hub_url: str, token: str, tags: list[str]) -> None:
143
+ """Acquire a lease and expose it as a local rfc2217 port for idf.py."""
144
+ ok = await acquire_lease(hub_url, token, tags)
145
+
146
+ def announce(port: int) -> None:
147
+ # ?ign_set_control: the hands-side EspPortManager handles DTR/RTS
148
+ # out of band (reset-into-bootloader) and never echoes them, so the
149
+ # client must not wait for control acks. This is the URL form
150
+ # esp_rfc2217_server documents for esptool / idf.py.
151
+ print(f"ESPPORT=rfc2217://127.0.0.1:{port}?ign_set_control")
152
+ print(
153
+ f"lease {ok['lease_id']} active — point idf.py at $ESPPORT; "
154
+ "Ctrl-C to release",
155
+ file=sys.stderr,
156
+ )
157
+
158
+ # Dial the data WS on the same hub we just leased from, built from our
159
+ # own hub_url — not from ok["data_url"]. Behind a TLS-terminating proxy
160
+ # the hub can't reliably know its own public scheme, so its data_url can
161
+ # come back as ws/http and get 301-redirected to https (which the WS
162
+ # client rejects). hands does the same: it uses its configured hub_url.
163
+ data_ws_url = hub_url.rstrip("/") + "/api/ws/data"
164
+ async with ws_connect(data_ws_url) as data_ws:
165
+ # Ticket rides the first frame, not the URL (keeps it out of logs).
166
+ await data_ws.send(json.dumps({"ticket": ok["ticket"]}))
167
+ await serve_local_rfc2217(data_ws, on_listening=announce)
168
+
169
+
170
+ async def _recv_op(ws, expected: str) -> dict:
171
+ """Read frames until one with ``op == expected``; raise on error frame."""
172
+ while True:
173
+ msg = json.loads(await ws.recv())
174
+ op = msg.get("op")
175
+ if op == expected:
176
+ return msg
177
+ if op == "error":
178
+ raise CliError(msg.get("error", "hub returned an error"))
179
+ # Ignore any other frame (e.g. an unsolicited push) and keep reading.
180
+
181
+
182
+ def format_table(devices: list[dict]) -> str:
183
+ """Render the device list as an aligned NAME / TAGS / HANDS table."""
184
+ if not devices:
185
+ return "no devices available"
186
+
187
+ rows = [(d["name"], ", ".join(d.get("tags", [])), d["hands"]) for d in devices]
188
+ headers = ("NAME", "TAGS", "HANDS")
189
+ widths = [max(len(headers[i]), *(len(row[i]) for row in rows)) for i in range(3)]
190
+ lines = [
191
+ " ".join(cell.ljust(widths[i]) for i, cell in enumerate(headers)),
192
+ *(
193
+ " ".join(cell.ljust(widths[i]) for i, cell in enumerate(row))
194
+ for row in rows
195
+ ),
196
+ ]
197
+ return "\n".join(lines)
198
+
199
+
200
+ def _cmd_ls(args: argparse.Namespace) -> None:
201
+ token = resolve_token()
202
+ hub_url = resolve_hub_url()
203
+ devices = asyncio.run(fetch_devices(hub_url, token))
204
+ if args.json:
205
+ print(json.dumps(devices, indent=2))
206
+ else:
207
+ print(format_table(devices))
208
+
209
+
210
+ def _cmd_connect(args: argparse.Namespace) -> None:
211
+ token = resolve_token()
212
+ hub_url = resolve_hub_url()
213
+ asyncio.run(connect_session(hub_url, token, args.tags))
214
+
215
+
216
+ def main(argv: list[str] | None = None) -> None:
217
+ """Console entry point for ``donut``."""
218
+ parser = argparse.ArgumentParser(prog="donut")
219
+ sub = parser.add_subparsers(dest="command", required=True)
220
+
221
+ ls = sub.add_parser("ls", help="list devices on connected hands boxes")
222
+ ls.add_argument("--json", action="store_true", help="emit JSON instead of a table")
223
+ ls.set_defaults(func=_cmd_ls)
224
+
225
+ connect = sub.add_parser(
226
+ "connect", help="lease a device and expose it as a local rfc2217 port"
227
+ )
228
+ connect.add_argument(
229
+ "--tags",
230
+ nargs="+",
231
+ required=True,
232
+ metavar="TAG",
233
+ help="device tags to match (all must be present)",
234
+ )
235
+ connect.set_defaults(func=_cmd_connect)
236
+
237
+ args = parser.parse_args(argv)
238
+ try:
239
+ args.func(args)
240
+ except CliError as exc:
241
+ print(f"donut: {exc}", file=sys.stderr)
242
+ sys.exit(2)
243
+ except InvalidStatus as exc:
244
+ if exc.response.status_code in (401, 403):
245
+ print("donut: authentication failed — check DONUT_TOKEN", file=sys.stderr)
246
+ else:
247
+ print(
248
+ f"donut: hub rejected connection (HTTP {exc.response.status_code})",
249
+ file=sys.stderr,
250
+ )
251
+ sys.exit(1)
252
+ except (OSError, WebSocketException) as exc:
253
+ print(f"donut: cannot reach hub: {exc}", file=sys.stderr)
254
+ sys.exit(1)
@@ -0,0 +1,760 @@
1
+ """Donut hands daemon — dial the hub, send `hello`, run forever.
2
+
3
+ See ``docs/hands-setup.md`` for the config schema and
4
+ ``docs/design.md`` §"Wire protocol" for the `hello` frame.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import argparse
10
+ import asyncio
11
+ import json
12
+ import logging
13
+ import os
14
+ import random
15
+ import stat
16
+ import sys
17
+ import time
18
+ import tomllib
19
+ from collections.abc import Awaitable, Callable
20
+ from concurrent.futures import ThreadPoolExecutor
21
+ from dataclasses import dataclass, field
22
+ from pathlib import Path
23
+
24
+ import serial
25
+ import serial.tools.list_ports
26
+ from esp_rfc2217_server.esp_port_manager import EspPortManager
27
+ from esp_rfc2217_server.esp_port_manager import cfg as _esptool_cfg
28
+ from esptool.loader import ESPLoader
29
+ from esptool.reset import (
30
+ DEFAULT_RESET_DELAY,
31
+ ClassicReset,
32
+ CustomReset,
33
+ HardReset,
34
+ UnixTightReset,
35
+ USBJTAGSerialReset,
36
+ )
37
+ from nanoid import generate as nanoid_generate
38
+ from platformdirs import user_config_dir
39
+ from websockets.asyncio.client import connect as ws_connect
40
+ from websockets.exceptions import InvalidStatus, WebSocketException
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ DEFAULT_ETC_DIR = Path("/etc/donut")
46
+ CONFIG_FILENAME = "hands.toml"
47
+ ENV_CONFIG = "DONUT_HANDS_CONFIG"
48
+ DEFAULT_HUB_URL = "wss://donut.espressif.tools/"
49
+
50
+ # Reconnect backoff: exponential ceiling with full jitter. The ceiling
51
+ # resets to INITIAL only after a session that stayed up at least
52
+ # STABLE_SESSION_SECONDS — otherwise an accept-then-instant-close
53
+ # flapping hub would pin us to a sub-second redial loop forever. HTTP
54
+ # 401/403 on the upgrade is fatal (bad/revoked token — retrying can't
55
+ # help).
56
+ INITIAL_BACKOFF = 1.0
57
+ MAX_BACKOFF = 30.0
58
+ STABLE_SESSION_SECONDS = 5.0
59
+ FATAL_AUTH_STATUSES = frozenset({401, 403})
60
+ WS_SCHEMES = ("ws://", "wss://")
61
+
62
+ # Control sentinel the CLI sends on the data WS when a fresh local rfc2217
63
+ # client attaches, so we start a new rfc2217 session (esptool then the
64
+ # monitor are two clients on one lease, and telnet option state is sticky —
65
+ # a reused PortManager won't re-ACK the second client's negotiation). Sent
66
+ # as a binary frame so the hub's binary byte-pump relays it untouched; the
67
+ # exact-match magic can't collide with a real rfc2217/telnet frame.
68
+ # Must equal ``donut.cli.RFC2217_RESET``.
69
+ RFC2217_RESET = b"\x00donut\x00rfc2217-reset\x00"
70
+
71
+ # How long to keep reopening a serial port that dropped mid-session before
72
+ # giving up. Native-USB devkits re-enumerate during reset, so the port
73
+ # briefly disappears and returns at the same path.
74
+ SERIAL_REOPEN_TIMEOUT = 10.0
75
+
76
+
77
+ class ConfigError(Exception):
78
+ """Raised for any problem reading or resolving the hands config."""
79
+
80
+
81
+ @dataclass(frozen=True)
82
+ class DeviceConfig:
83
+ name: str
84
+ path: str
85
+ tags: list[str] = field(default_factory=list)
86
+
87
+
88
+ @dataclass(frozen=True)
89
+ class HandsConfig:
90
+ name: str
91
+ hands_id: str
92
+ hub_url: str
93
+ token: str
94
+ devices: list[DeviceConfig] = field(default_factory=list)
95
+
96
+
97
+ _REQUIRED_TOP_LEVEL = ("name", "hands_id", "hub_url", "token")
98
+ _ALLOWED_TOP_LEVEL = {*_REQUIRED_TOP_LEVEL, "devices"}
99
+ _REQUIRED_DEVICE = ("name", "path")
100
+ _ALLOWED_DEVICE = {*_REQUIRED_DEVICE, "tags"}
101
+
102
+
103
+ def load_config(path: Path) -> HandsConfig:
104
+ """Read and validate ``hands.toml`` at *path*."""
105
+ if not path.exists():
106
+ raise ConfigError(f"hands config not found: {path}")
107
+
108
+ _warn_if_world_readable(path)
109
+
110
+ try:
111
+ raw = tomllib.loads(path.read_text())
112
+ except tomllib.TOMLDecodeError as exc:
113
+ raise ConfigError(f"hands config is not valid TOML: {exc}") from exc
114
+
115
+ extra_top = set(raw) - _ALLOWED_TOP_LEVEL
116
+ if extra_top:
117
+ raise ConfigError(f"unknown key(s) in {path}: {', '.join(sorted(extra_top))}")
118
+
119
+ missing = [k for k in _REQUIRED_TOP_LEVEL if k not in raw]
120
+ if missing:
121
+ raise ConfigError(f"missing required key(s) in {path}: {', '.join(missing)}")
122
+
123
+ if not str(raw["hub_url"]).startswith(WS_SCHEMES):
124
+ raise ConfigError(
125
+ f"hub_url in {path} must start with ws:// or wss:// "
126
+ f"(got {raw['hub_url']!r})"
127
+ )
128
+
129
+ devices = [_parse_device(d, path=path) for d in raw.get("devices", [])]
130
+
131
+ return HandsConfig(
132
+ name=raw["name"],
133
+ hands_id=raw["hands_id"],
134
+ hub_url=raw["hub_url"],
135
+ token=raw["token"],
136
+ devices=devices,
137
+ )
138
+
139
+
140
+ def _parse_device(raw: dict, *, path: Path) -> DeviceConfig:
141
+ extra = set(raw) - _ALLOWED_DEVICE
142
+ if extra:
143
+ raise ConfigError(
144
+ f"unknown device key(s) in {path}: {', '.join(sorted(extra))}"
145
+ )
146
+ missing = [k for k in _REQUIRED_DEVICE if k not in raw]
147
+ if missing:
148
+ raise ConfigError(
149
+ f"device entry in {path} missing key(s): {', '.join(missing)}"
150
+ )
151
+ return DeviceConfig(
152
+ name=raw["name"],
153
+ path=raw["path"],
154
+ tags=list(raw.get("tags", [])),
155
+ )
156
+
157
+
158
+ def _warn_if_world_readable(path: Path) -> None:
159
+ try:
160
+ mode = stat.S_IMODE(path.stat().st_mode)
161
+ except OSError:
162
+ return
163
+ if mode & 0o077:
164
+ logger.warning(
165
+ "hands config %s has loose permissions %o (recommend 0600)",
166
+ path,
167
+ mode,
168
+ )
169
+
170
+
171
+ def _ws_scheme_error(value: str) -> str | None:
172
+ if not value.startswith(WS_SCHEMES):
173
+ return "must start with ws:// or wss://"
174
+ return None
175
+
176
+
177
+ def _prompt_required(
178
+ prompt_fn: Callable[[str], str],
179
+ question: str,
180
+ *,
181
+ default: str | None = None,
182
+ validate: Callable[[str], str | None] | None = None,
183
+ ) -> str:
184
+ """Prompt until a valid answer is given.
185
+
186
+ Blank input takes ``default`` when one is set; otherwise it re-prompts.
187
+ ``validate`` returns an error string to reject + re-prompt, or ``None``
188
+ to accept.
189
+ """
190
+ hint = f" [{default}]" if default else ""
191
+ while True:
192
+ answer = prompt_fn(f"{question}{hint}: ").strip()
193
+ if not answer:
194
+ if default is not None:
195
+ return default
196
+ print(" value required", file=sys.stderr)
197
+ continue
198
+ if validate is not None and (err := validate(answer)):
199
+ print(f" {err}", file=sys.stderr)
200
+ continue
201
+ return answer
202
+
203
+
204
+ def init_config(
205
+ path: Path,
206
+ *,
207
+ prompt_fn: Callable[[str], str],
208
+ ) -> HandsConfig:
209
+ """Interactively create a ``hands.toml`` at *path*.
210
+
211
+ Refuses to clobber an existing file. Prompts for the operator-facing
212
+ bits (``name``, ``hub_url``, ``token``), mints a fresh ``hands_id``,
213
+ and writes the file at mode 0600.
214
+ """
215
+ if path.exists():
216
+ raise ConfigError(
217
+ f"hands config already exists: {path} (refusing to overwrite)"
218
+ )
219
+
220
+ name = _prompt_required(
221
+ prompt_fn, "Human-readable name for this hands box (e.g. lab-bench-1)"
222
+ )
223
+ hub_url = _prompt_required(
224
+ prompt_fn,
225
+ "Hub url",
226
+ default=DEFAULT_HUB_URL,
227
+ validate=_ws_scheme_error,
228
+ )
229
+ token = _prompt_required(
230
+ prompt_fn, "Bearer token (from `donut-admin token mint --role hands`)"
231
+ )
232
+
233
+ hands_id = f"h_{nanoid_generate(size=21)}"
234
+
235
+ cfg = HandsConfig(
236
+ name=name,
237
+ hands_id=hands_id,
238
+ hub_url=hub_url,
239
+ token=token,
240
+ devices=[],
241
+ )
242
+
243
+ path.parent.mkdir(parents=True, exist_ok=True)
244
+ # Open with O_CREAT|O_EXCL and the right mode to avoid a brief
245
+ # world-readable window between create and chmod.
246
+ fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600)
247
+ with os.fdopen(fd, "w") as fp:
248
+ fp.write(_render_init_toml(cfg))
249
+
250
+ return cfg
251
+
252
+
253
+ def _render_init_toml(cfg: HandsConfig) -> str:
254
+ # TOML basic strings share JSON's escape syntax for the cases we hit
255
+ # (", \, control chars), so json.dumps yields a safely-escaped,
256
+ # valid TOML right-hand side even when a bench name contains quotes.
257
+ return (
258
+ f"name = {json.dumps(cfg.name)}\n"
259
+ f"hands_id = {json.dumps(cfg.hands_id)}\n"
260
+ f"hub_url = {json.dumps(cfg.hub_url)}\n"
261
+ f"token = {json.dumps(cfg.token)}\n"
262
+ "\n"
263
+ "# Declare each USB devkit attached to this hands box:\n"
264
+ "# [[devices]]\n"
265
+ '# name = "c3-1"\n'
266
+ '# path = "/dev/ttyACM0"\n'
267
+ '# tags = ["esp32c3", "usb-jtag"]\n'
268
+ )
269
+
270
+
271
+ async def run(
272
+ config: HandsConfig,
273
+ *,
274
+ on_connected: Callable[[], None] | None = None,
275
+ ) -> None:
276
+ """Dial the hub, send ``hello``, read frames until the socket closes.
277
+
278
+ Returns on clean disconnect; raises on connect / handshake failures.
279
+ ``on_connected`` (if given) fires once the WS handshake succeeds —
280
+ ``run_forever`` uses it to reset its backoff. Reconnection itself is
281
+ the caller's job; this coroutine handles a single session.
282
+ """
283
+ url = config.hub_url.rstrip("/") + "/api/ws/control"
284
+ hello = {
285
+ "op": "hello",
286
+ "role": "hands",
287
+ "hands_id": config.hands_id,
288
+ "name": config.name,
289
+ "devices": [
290
+ {"name": d.name, "path": d.path, "tags": list(d.tags)}
291
+ for d in config.devices
292
+ ],
293
+ }
294
+
295
+ # name → local device path, so we can resolve the hub-assigned
296
+ # device_id (returned in hello-ok) back to the port a port.open names.
297
+ name_to_path = {d.name: d.path for d in config.devices}
298
+ id_to_path: dict[str, str] = {}
299
+ sessions: set[asyncio.Task] = set()
300
+
301
+ logger.info("connecting to %s as %s (%s)", url, config.name, config.hands_id)
302
+ async with ws_connect(
303
+ url,
304
+ additional_headers=[("Authorization", f"Bearer {config.token}")],
305
+ ) as ws:
306
+ if on_connected is not None:
307
+ on_connected()
308
+ logger.info(
309
+ "connected to hub; sent hello for %d device(s)", len(config.devices)
310
+ )
311
+ await ws.send(json.dumps(hello))
312
+ try:
313
+ async for raw in ws:
314
+ msg = _parse_frame(raw)
315
+ op = msg.get("op") if msg else None
316
+ if op == "hello-ok":
317
+ id_to_path = {
318
+ d["device_id"]: name_to_path.get(d["name"])
319
+ for d in msg.get("devices", [])
320
+ }
321
+ logger.info("registered with hub as %s", config.hands_id)
322
+ elif op == "port.open":
323
+ task = asyncio.create_task(
324
+ _run_port_session(config, ws, id_to_path, msg)
325
+ )
326
+ sessions.add(task)
327
+ task.add_done_callback(sessions.discard)
328
+ elif op == "lease.released":
329
+ # The hub force-closes the data WS too, which tears the
330
+ # bridge down; this is just an informational signal.
331
+ logger.info("lease released by hub: %s", msg.get("lease_id"))
332
+ else:
333
+ logger.debug("ignored hub frame: %s", raw)
334
+ finally:
335
+ for task in sessions:
336
+ task.cancel()
337
+ logger.info("hub closed control WS; will reconnect")
338
+
339
+
340
+ def _port_vid_pid(serial_port: serial.Serial) -> tuple[int | None, int | None] | None:
341
+ """USB ``(vid, pid)`` for an open pyserial port, or ``None``.
342
+
343
+ Back-port of esptool's ``get_port_vid_pid`` (added in MR 942, absent
344
+ from our 5.2.0 pin). Trimmed to the Linux lab-box case: resolve
345
+ ``/dev`` symlinks and match against ``list_ports.comports()``.
346
+ """
347
+ name = getattr(serial_port, "port", None)
348
+ if not name:
349
+ return None
350
+ target = os.path.realpath(name) if name.startswith("/dev/") else name
351
+ for p in serial.tools.list_ports.comports():
352
+ if p.device in (target, name):
353
+ return p.vid, p.pid
354
+ return None
355
+
356
+
357
+ class DonutPortManager(EspPortManager):
358
+ """``EspPortManager`` + USB-Serial-JTAG reset, back-ported from esptool
359
+ MR 942 onto our released esptool pin.
360
+
361
+ Stock ``EspPortManager`` (esptool 5.2.0) only knows the UART-bridge
362
+ DTR/RTS reset sequence. On a native-USB devkit (e.g. esp32c3) that
363
+ sequence fights the chip's own USB stack and causes spurious resets,
364
+ so flashing over rfc2217 fails. This shim detects the USB-JTAG/Serial
365
+ PID and swaps in esptool's ``USBJTAGSerialReset``.
366
+
367
+ **Delete this class** once MR 942 lands in a released esptool and our
368
+ pin moves past it — at that point stock ``EspPortManager`` does the
369
+ right thing on its own.
370
+ """
371
+
372
+ def __init__(self, serial_port, connection, esp32r0_delay=False, logger=None):
373
+ vid_pid = _port_vid_pid(serial_port)
374
+ pid = vid_pid[1] if vid_pid else None
375
+ self.uses_usb_jtag_serial = pid == ESPLoader.USB_JTAG_SERIAL_PID
376
+ super().__init__(serial_port, connection, esp32r0_delay, logger)
377
+
378
+ def _hard_reset_thread(self):
379
+ if self.logger:
380
+ self.logger.info("Activating hard reset in thread")
381
+ custom = _esptool_cfg.get("custom_hard_reset_sequence")
382
+ if custom is not None:
383
+ CustomReset(self.serial, custom)()
384
+ else:
385
+ HardReset(self.serial, uses_usb=self.uses_usb_jtag_serial)()
386
+
387
+ def _reset_thread(self):
388
+ if self.logger:
389
+ self.logger.info("Activating reset in thread")
390
+ delay = DEFAULT_RESET_DELAY
391
+ if self.esp32r0_delay:
392
+ delay += 0.5
393
+ custom = _esptool_cfg.get("custom_reset_sequence")
394
+ if custom is not None:
395
+ CustomReset(self.serial, custom)()
396
+ elif self.uses_usb_jtag_serial:
397
+ USBJTAGSerialReset(self.serial)()
398
+ elif os.name != "nt":
399
+ UnixTightReset(self.serial, delay)()
400
+ else:
401
+ ClassicReset(self.serial, delay)()
402
+
403
+
404
+ async def _run_port_session(
405
+ config: HandsConfig,
406
+ control_ws: object,
407
+ id_to_path: dict[str, str],
408
+ msg: dict,
409
+ ) -> None:
410
+ """Open the named port, ACK, then bridge it to the data WS.
411
+
412
+ On any failure to open the port the hub is told ``port.open-failed``
413
+ (which rolls back the pending lease) and the session ends. Otherwise
414
+ we ACK ``port.open-ok`` so the hub can hand the agent its lease, dial
415
+ the data WS, and shuttle bytes until either side closes.
416
+ """
417
+ req_id = msg.get("id")
418
+ device_id = msg.get("device_id")
419
+ ticket = msg.get("ticket")
420
+ baud = msg.get("baud", 115200)
421
+ path = id_to_path.get(device_id)
422
+
423
+ if path is None:
424
+ await control_ws.send(
425
+ json.dumps(
426
+ {
427
+ "op": "port.open-failed",
428
+ "id": req_id,
429
+ "error": f"unknown device_id {device_id}",
430
+ }
431
+ )
432
+ )
433
+ return
434
+
435
+ loop = asyncio.get_running_loop()
436
+ try:
437
+ # Blocking pyserial port (not serial_asyncio): EspPortManager's reset
438
+ # sequences toggle DTR/RTS on a plain serial.Serial. serial_for_url
439
+ # opens real /dev paths and URL handlers (loop://, socket://) alike.
440
+ serial_instance = await loop.run_in_executor(
441
+ None, lambda: serial.serial_for_url(path, baudrate=baud, timeout=0.1)
442
+ )
443
+ except Exception as exc:
444
+ logger.warning("could not open %s: %s", path, exc)
445
+ await control_ws.send(
446
+ json.dumps({"op": "port.open-failed", "id": req_id, "error": str(exc)})
447
+ )
448
+ return
449
+
450
+ await control_ws.send(json.dumps({"op": "port.open-ok", "id": req_id}))
451
+ data_url = config.hub_url.rstrip("/") + "/api/ws/data"
452
+ logger.info("opened %s; bridging rfc2217 to data WS", path)
453
+ try:
454
+ async with ws_connect(data_url) as data_ws:
455
+ # Ticket rides the first frame, not the URL (keeps it out of logs).
456
+ await data_ws.send(json.dumps({"ticket": ticket}))
457
+ await _bridge_rfc2217_ws(serial_instance, data_ws)
458
+ finally:
459
+ serial_instance.close()
460
+ logger.info("closed %s", path)
461
+
462
+
463
+ def _parse_frame(raw: object) -> dict | None:
464
+ try:
465
+ msg = json.loads(raw)
466
+ except (TypeError, ValueError):
467
+ return None
468
+ return msg if isinstance(msg, dict) else None
469
+
470
+
471
+ async def _bridge_rfc2217_ws(
472
+ serial_instance: serial.Serial,
473
+ ws: object,
474
+ ) -> None:
475
+ """Run an rfc2217 server over the data WS against a real serial port.
476
+
477
+ A ``DonutPortManager`` owns the rfc2217/telnet protocol: ``filter``
478
+ extracts pure serial payload from inbound WS frames (handling option
479
+ negotiation, baud changes, and DTR/RTS reset out of band), ``escape``
480
+ re-frames outbound serial bytes. The agent's pyserial rfc2217 client
481
+ and this server are the protocol endpoints; the WS and hub in between
482
+ just carry bytes.
483
+
484
+ Two real-world wrinkles are handled here:
485
+
486
+ * **Per-client sessions.** ``RFC2217_RESET`` on the WS recreates the
487
+ PortManager so each fresh local client (esptool, then the monitor)
488
+ renegotiates cleanly instead of hitting sticky telnet option state.
489
+ * **Re-enumeration.** Native-USB devkits drop off the bus during reset;
490
+ a read that errors triggers a reopen-with-backoff rather than killing
491
+ the session.
492
+
493
+ ``serial.Serial`` is blocking, so reads and writes run in a dedicated
494
+ two-thread executor that is shut down when the bridge ends — never the
495
+ asyncio default pool, whose threads would otherwise outlive the session
496
+ and wedge interpreter shutdown. A single outbound queue + sender keeps
497
+ the PortManager's negotiation writes ordered with escaped serial data.
498
+ """
499
+ loop = asyncio.get_running_loop()
500
+ executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="donut-serial")
501
+ outbound: asyncio.Queue[bytes] = asyncio.Queue()
502
+
503
+ class _WSConnection:
504
+ """PortManager writes telnet negotiation here; we relay to the WS."""
505
+
506
+ def write(self, data) -> None:
507
+ outbound.put_nowait(bytes(data))
508
+
509
+ connection = _WSConnection()
510
+
511
+ def _new_port_manager() -> DonutPortManager:
512
+ # Constructing the manager emits the initial telnet option requests
513
+ # (via connection.write → outbound), so the client can negotiate.
514
+ return DonutPortManager(serial_instance, connection)
515
+
516
+ port_manager = _new_port_manager()
517
+
518
+ def _reopen_serial() -> None:
519
+ if serial_instance.is_open:
520
+ serial_instance.close()
521
+ serial_instance.open()
522
+
523
+ async def _recover_serial() -> bool:
524
+ deadline = loop.time() + SERIAL_REOPEN_TIMEOUT
525
+ while loop.time() < deadline:
526
+ try:
527
+ await loop.run_in_executor(executor, _reopen_serial)
528
+ logger.info("reopened %s after disconnect", serial_instance.port)
529
+ return True
530
+ except serial.SerialException:
531
+ await asyncio.sleep(0.2)
532
+ return False
533
+
534
+ async def sender() -> None:
535
+ while True:
536
+ await ws.send(await outbound.get())
537
+
538
+ async def serial_to_ws() -> None:
539
+ while True:
540
+ try:
541
+ data = await loop.run_in_executor(
542
+ executor,
543
+ lambda: serial_instance.read(serial_instance.in_waiting or 1),
544
+ )
545
+ except serial.SerialException as exc:
546
+ logger.info(
547
+ "serial read on %s failed (%s); reopening",
548
+ serial_instance.port,
549
+ exc,
550
+ )
551
+ if not await _recover_serial():
552
+ logger.warning(
553
+ "could not reopen %s; ending bridge", serial_instance.port
554
+ )
555
+ return
556
+ continue
557
+ if data:
558
+ outbound.put_nowait(b"".join(port_manager.escape(data)))
559
+
560
+ async def ws_to_serial() -> None:
561
+ nonlocal port_manager
562
+ async for msg in ws:
563
+ if isinstance(msg, bytes | bytearray) and bytes(msg) == RFC2217_RESET:
564
+ # A fresh local rfc2217 client attached: start a new session
565
+ # so its negotiation isn't ignored as already-active.
566
+ port_manager = _new_port_manager()
567
+ continue
568
+ if isinstance(msg, str):
569
+ msg = msg.encode()
570
+ # filter() extracts serial payload and handles telnet/rfc2217
571
+ # negotiation (baud, line params, DTR/RTS) out of band.
572
+ payload = b"".join(port_manager.filter(msg))
573
+ if payload:
574
+ try:
575
+ await loop.run_in_executor(executor, serial_instance.write, payload)
576
+ except serial.SerialException:
577
+ logger.debug("serial write dropped during reset", exc_info=True)
578
+
579
+ async def modem_poll() -> None:
580
+ while True:
581
+ await asyncio.sleep(1.0)
582
+ try:
583
+ port_manager.check_modem_lines()
584
+ except serial.SerialException:
585
+ logger.debug("modem-line poll skipped during reset", exc_info=True)
586
+
587
+ tasks = [
588
+ asyncio.create_task(coro())
589
+ for coro in (sender, serial_to_ws, ws_to_serial, modem_poll)
590
+ ]
591
+ try:
592
+ done, _ = await asyncio.wait(
593
+ (tasks[1], tasks[2]), return_when=asyncio.FIRST_COMPLETED
594
+ )
595
+ for task in done:
596
+ if (exc := task.exception()) is not None:
597
+ logger.warning("rfc2217 bridge ended on error: %r", exc)
598
+ finally:
599
+ for task in tasks:
600
+ task.cancel()
601
+ await asyncio.gather(*tasks, return_exceptions=True)
602
+ # Don't wait: a read may be blocked in-flight, but it returns within
603
+ # the port's read timeout and the thread then exits. wait=True here
604
+ # would stall teardown for that window.
605
+ executor.shutdown(wait=False, cancel_futures=True)
606
+
607
+
608
+ async def run_forever(
609
+ config: HandsConfig,
610
+ *,
611
+ sleep: Callable[[float], Awaitable[None]] = asyncio.sleep,
612
+ jitter: Callable[[], float] = random.random,
613
+ clock: Callable[[], float] = time.monotonic,
614
+ ) -> None:
615
+ """Run ``run`` in a loop, reconnecting with exponential backoff.
616
+
617
+ Backoff ceiling doubles on each consecutive failed/short session
618
+ (capped at ``MAX_BACKOFF``); the actual delay is ``jitter() *
619
+ ceiling`` (full jitter, so a fleet of hands boxes doesn't stampede a
620
+ recovering hub). The ceiling resets to ``INITIAL_BACKOFF`` only after
621
+ a session that stayed up at least ``STABLE_SESSION_SECONDS`` — a hub
622
+ that accepts the upgrade then drops the socket instantly would
623
+ otherwise keep us in a sub-second redial loop.
624
+
625
+ A clean close by the hub (e.g. a deploy) and any network/WS error are
626
+ retryable. An HTTP 401/403 on the upgrade is fatal — the token is bad
627
+ or revoked, so we re-raise instead of spinning forever.
628
+ """
629
+ backoff = INITIAL_BACKOFF
630
+ while True:
631
+ connected_at: float | None = None
632
+
633
+ def _mark_connected() -> None:
634
+ nonlocal connected_at
635
+ connected_at = clock()
636
+
637
+ try:
638
+ await run(config, on_connected=_mark_connected)
639
+ logger.info("hub closed connection; reconnecting")
640
+ except InvalidStatus as exc:
641
+ if exc.response.status_code in FATAL_AUTH_STATUSES:
642
+ logger.error(
643
+ "hub rejected auth (HTTP %s); not retrying",
644
+ exc.response.status_code,
645
+ )
646
+ raise
647
+ logger.warning(
648
+ "hub rejected upgrade (HTTP %s); reconnecting", exc.response.status_code
649
+ )
650
+ except (OSError, WebSocketException) as exc:
651
+ logger.warning("control connection lost (%s); reconnecting", exc)
652
+
653
+ stable = (
654
+ connected_at is not None
655
+ and (clock() - connected_at) >= STABLE_SESSION_SECONDS
656
+ )
657
+ if stable:
658
+ backoff = INITIAL_BACKOFF
659
+
660
+ delay = jitter() * backoff
661
+ logger.info("reconnecting in %.1fs", delay)
662
+ await sleep(delay)
663
+ backoff = min(backoff * 2, MAX_BACKOFF)
664
+
665
+
666
+ def resolve_config_path(
667
+ *,
668
+ explicit: Path | None,
669
+ etc_dir: Path = DEFAULT_ETC_DIR,
670
+ ) -> Path:
671
+ """Pick a config path per the documented search order.
672
+
673
+ --config wins; then $DONUT_HANDS_CONFIG; then the per-user config
674
+ dir reported by ``platformdirs.user_config_dir('donut')`` (on Linux
675
+ that's ``$XDG_CONFIG_HOME/donut`` or ``~/.config/donut``; macOS
676
+ and Windows pick their own conventional location); then
677
+ ``<etc_dir>/hands.toml``.
678
+
679
+ For the explicit and env-var cases the file is returned even if
680
+ missing — the user named it, so they get the missing-file error
681
+ against the exact path they typed. For the user/etc fallbacks we
682
+ only return paths that exist; the search continues otherwise, and
683
+ raises ``ConfigError`` if nothing is found.
684
+ """
685
+ if explicit is not None:
686
+ return explicit
687
+
688
+ env = os.environ.get(ENV_CONFIG)
689
+ if env:
690
+ return Path(env)
691
+
692
+ candidates = [
693
+ Path(user_config_dir("donut")) / CONFIG_FILENAME,
694
+ etc_dir / CONFIG_FILENAME,
695
+ ]
696
+
697
+ for candidate in candidates:
698
+ if candidate.exists():
699
+ return candidate
700
+
701
+ tried = "\n ".join(str(c) for c in candidates)
702
+ raise ConfigError(
703
+ f"no donut/hands.toml found; tried:\n {tried}\n"
704
+ "run `donut-hands --init` to create one"
705
+ )
706
+
707
+
708
+ def _default_init_path() -> Path:
709
+ """Per-user default where ``--init`` writes when ``--config`` isn't given."""
710
+ return Path(user_config_dir("donut")) / CONFIG_FILENAME
711
+
712
+
713
+ def main(argv: list[str] | None = None) -> None:
714
+ """Console entry point for ``donut-hands``.
715
+
716
+ Default: dial the hub from the resolved ``hands.toml`` and stay
717
+ connected, reconnecting with backoff across hub restarts and network
718
+ blips. Exits non-zero only on a fatal error — bad config, or an
719
+ auth rejection (revoked/invalid token) — so systemd can surface it.
720
+
721
+ ``--init``: interactively create a fresh ``hands.toml`` and exit 0.
722
+ """
723
+ parser = argparse.ArgumentParser(prog="donut-hands")
724
+ parser.add_argument(
725
+ "--init",
726
+ action="store_true",
727
+ help="interactively create hands.toml and exit",
728
+ )
729
+ parser.add_argument(
730
+ "--config",
731
+ type=str,
732
+ default=None,
733
+ help="explicit path to hands.toml (overrides search)",
734
+ )
735
+ args = parser.parse_args(argv)
736
+
737
+ explicit = Path(args.config) if args.config else None
738
+
739
+ try:
740
+ if args.init:
741
+ target = explicit if explicit is not None else _default_init_path()
742
+ init_config(target, prompt_fn=input)
743
+ print(f"wrote {target}")
744
+ return
745
+ cfg_path = resolve_config_path(explicit=explicit)
746
+ cfg = load_config(cfg_path)
747
+ except ConfigError as exc:
748
+ print(f"donut-hands: {exc}", file=sys.stderr)
749
+ sys.exit(1)
750
+
751
+ # The daemon is an application entry point, so it owns log config —
752
+ # without this, logger.info on (re)connect is invisible under systemd.
753
+ logging.basicConfig(level=logging.INFO)
754
+ try:
755
+ asyncio.run(run_forever(cfg))
756
+ except KeyboardInterrupt:
757
+ sys.exit(130)
758
+ except Exception as exc:
759
+ print(f"hands daemon exiting: {exc}", file=sys.stderr)
760
+ sys.exit(1)