@zhihand/mcp 0.32.0 → 0.32.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/zhihand +5 -30
- package/dist/core/device.js +1 -0
- package/dist/core/ws.d.ts +2 -0
- package/dist/core/ws.js +16 -6
- package/dist/daemon/index.js +1 -1
- package/dist/daemon/prompt-listener.js +22 -3
- package/dist/index.d.ts +1 -1
- package/dist/index.js +1 -1
- package/package.json +1 -1
- package/README.md +0 -359
package/bin/zhihand
CHANGED
|
@@ -30,7 +30,7 @@ import { fetchUserCredentials } from "../dist/core/ws.js";
|
|
|
30
30
|
import { configureMCP, displayName } from "../dist/cli/mcp-config.js";
|
|
31
31
|
|
|
32
32
|
const DEFAULT_ENDPOINT = "https://api.zhihand.com";
|
|
33
|
-
const VERSION = "0.32.
|
|
33
|
+
const VERSION = "0.32.3";
|
|
34
34
|
|
|
35
35
|
const CLI_TOOL_MAP = {
|
|
36
36
|
claude: "claudecode",
|
|
@@ -75,7 +75,6 @@ Usage:
|
|
|
75
75
|
zhihand claude Switch backend to Claude Code
|
|
76
76
|
zhihand codex Switch backend to Codex CLI
|
|
77
77
|
|
|
78
|
-
zhihand setup Interactive setup: pair + configure + start
|
|
79
78
|
zhihand pair [--label X] Pair new user + first device + auto-configure MCP
|
|
80
79
|
zhihand pair <user_id> Add device to existing user
|
|
81
80
|
zhihand list [<user_id>] List users/devices with real-time online status
|
|
@@ -510,7 +509,7 @@ switch (command) {
|
|
|
510
509
|
const daemonPid = isAlreadyRunning();
|
|
511
510
|
|
|
512
511
|
if (users.length === 0) {
|
|
513
|
-
console.log("No users configured. Run: zhihand
|
|
512
|
+
console.log("No users configured. Run: zhihand pair");
|
|
514
513
|
} else {
|
|
515
514
|
console.log(`Users: ${users.length}`);
|
|
516
515
|
for (const u of users) {
|
|
@@ -550,38 +549,14 @@ switch (command) {
|
|
|
550
549
|
}
|
|
551
550
|
|
|
552
551
|
case "setup": {
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
console.log("No users found. Starting pairing...\n");
|
|
556
|
-
const { userRecord } = await executePairingNewUser(values.label);
|
|
557
|
-
console.log(`\nUser created: ${userRecord.label} (${userRecord.user_id})\n`);
|
|
558
|
-
}
|
|
559
|
-
|
|
560
|
-
const tools = await detectCLITools();
|
|
561
|
-
console.log(formatDetectedTools(tools));
|
|
562
|
-
|
|
563
|
-
if (tools.length === 0) {
|
|
564
|
-
console.log("\nNo CLI tools detected. Install one of: Claude Code, Codex CLI, Gemini CLI.");
|
|
565
|
-
break;
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
const best = tools.find((t) => t.loggedIn) ?? tools[0];
|
|
569
|
-
const config = loadBackendConfig();
|
|
570
|
-
|
|
571
|
-
console.log(`\nAuto-selecting backend: ${displayName(best.name)}...`);
|
|
572
|
-
if (values.port) process.env.ZHIHAND_PORT = values.port;
|
|
573
|
-
configureMCP(best.name, config.activeBackend);
|
|
574
|
-
saveBackendConfig({ activeBackend: best.name });
|
|
575
|
-
|
|
576
|
-
console.log(`\nStarting daemon...\n`);
|
|
577
|
-
const port = values.port ? parseInt(values.port, 10) : undefined;
|
|
578
|
-
await startDaemon({ port });
|
|
552
|
+
console.log("'zhihand setup' has been merged into 'zhihand pair'.");
|
|
553
|
+
console.log("Run: zhihand pair");
|
|
579
554
|
break;
|
|
580
555
|
}
|
|
581
556
|
|
|
582
557
|
case "test": {
|
|
583
558
|
const { createControlCommand, createSystemCommand, enqueueCommand } = await import("../dist/core/command.js");
|
|
584
|
-
const { waitForCommandAck } = await import("../dist/core/
|
|
559
|
+
const { waitForCommandAck } = await import("../dist/core/ws.js");
|
|
585
560
|
const { fetchScreenshot, getSnapshotStaleThresholdMs } = await import("../dist/core/screenshot.js");
|
|
586
561
|
const { fetchDeviceProfileOnce, extractStatic, computeCapabilities, formatDeviceStatus } = await import("../dist/core/device.js");
|
|
587
562
|
|
package/dist/core/device.js
CHANGED
|
@@ -176,6 +176,7 @@ const RAW_ATTRIBUTE_ALLOWLIST = [
|
|
|
176
176
|
"brand", "manufacturer", "model", "rom_family", "rom_version",
|
|
177
177
|
"system_release", "api_level", "app_version", "app_build",
|
|
178
178
|
"display_width_px", "display_height_px", "density", "density_dpi",
|
|
179
|
+
"display_width_pixels", "display_height_pixels", "display_scale",
|
|
179
180
|
"screen_width_dp", "screen_height_dp", "smallest_width_dp",
|
|
180
181
|
"form_factor", "orientation", "touchscreen", "navigation_mode",
|
|
181
182
|
"locale", "language", "timezone", "rtl", "dark_mode", "font_scale",
|
package/dist/core/ws.d.ts
CHANGED
|
@@ -46,6 +46,7 @@ export interface WSEvent {
|
|
|
46
46
|
command?: QueuedCommandRecord;
|
|
47
47
|
device_profile?: Record<string, unknown>;
|
|
48
48
|
credential?: Record<string, unknown>;
|
|
49
|
+
payload?: Record<string, unknown>;
|
|
49
50
|
sequence: number;
|
|
50
51
|
}
|
|
51
52
|
export declare function handleWSEvent(event: WSEvent): void;
|
|
@@ -61,6 +62,7 @@ export interface UserEventStreamHandlers {
|
|
|
61
62
|
onDisconnected: () => void;
|
|
62
63
|
}
|
|
63
64
|
export declare class UserEventWebSocket {
|
|
65
|
+
private controllerToken;
|
|
64
66
|
private handlers;
|
|
65
67
|
private rws;
|
|
66
68
|
private lastProcessedSeq;
|
package/dist/core/ws.js
CHANGED
|
@@ -163,18 +163,26 @@ export function subscribeToCommandAck(commandId, callback) {
|
|
|
163
163
|
return () => { ackCallbacks.delete(commandId); };
|
|
164
164
|
}
|
|
165
165
|
export class UserEventWebSocket {
|
|
166
|
+
controllerToken;
|
|
166
167
|
handlers;
|
|
167
168
|
rws;
|
|
168
169
|
lastProcessedSeq = new Map();
|
|
169
170
|
constructor(userId, controllerToken, endpoint, handlers) {
|
|
171
|
+
this.controllerToken = controllerToken;
|
|
170
172
|
this.handlers = handlers;
|
|
171
|
-
const topics = "commands,device_profile,device.online,device.offline,credential.added,credential.removed";
|
|
172
|
-
const wsUrl = `${endpoint.replace(/^http/, "ws")}/v1/users/${encodeURIComponent(userId)}/ws
|
|
173
|
+
const topics = ["commands", "device_profile", "device.online", "device.offline", "credential.added", "credential.removed"];
|
|
174
|
+
const wsUrl = `${endpoint.replace(/^http/, "ws")}/v1/users/${encodeURIComponent(userId)}/ws`;
|
|
173
175
|
this.rws = new ReconnectingWebSocket({
|
|
174
176
|
url: wsUrl,
|
|
175
177
|
headers: { "Authorization": `Bearer ${controllerToken}` },
|
|
176
178
|
onOpen: () => {
|
|
177
|
-
|
|
179
|
+
// Send auth message as the server requires it as the first frame.
|
|
180
|
+
this.rws.send(JSON.stringify({
|
|
181
|
+
type: "auth",
|
|
182
|
+
bearer: this.controllerToken,
|
|
183
|
+
topics,
|
|
184
|
+
}));
|
|
185
|
+
// onConnected is called after auth_ok is received (see handleMessage)
|
|
178
186
|
},
|
|
179
187
|
onClose: (_code, _reason) => {
|
|
180
188
|
this.handlers.onDisconnected();
|
|
@@ -203,9 +211,11 @@ export class UserEventWebSocket {
|
|
|
203
211
|
this.rws.send(JSON.stringify({ type: "pong" }));
|
|
204
212
|
return;
|
|
205
213
|
}
|
|
206
|
-
// Auth responses
|
|
207
|
-
if (msg.type === "auth_ok")
|
|
214
|
+
// Auth responses
|
|
215
|
+
if (msg.type === "auth_ok") {
|
|
216
|
+
this.handlers.onConnected();
|
|
208
217
|
return;
|
|
218
|
+
}
|
|
209
219
|
if (msg.type === "auth_error") {
|
|
210
220
|
log.error(`[ws] Auth failed: ${msg.error}`);
|
|
211
221
|
this.rws.stop(); // Don't retry with invalid credentials
|
|
@@ -244,7 +254,7 @@ export class UserEventWebSocket {
|
|
|
244
254
|
this.handlers.onCommandAcked(ev);
|
|
245
255
|
break;
|
|
246
256
|
case "credential.added":
|
|
247
|
-
this.handlers.onCredentialAdded(ev.credential ?? { credential_id: ev.credential_id });
|
|
257
|
+
this.handlers.onCredentialAdded(ev.credential ?? ev.payload ?? { credential_id: ev.credential_id });
|
|
248
258
|
break;
|
|
249
259
|
case "credential.removed":
|
|
250
260
|
this.handlers.onCredentialRemoved(ev.credential_id);
|
package/dist/daemon/index.js
CHANGED
|
@@ -171,7 +171,7 @@ export async function startDaemon(options) {
|
|
|
171
171
|
}
|
|
172
172
|
catch (err) {
|
|
173
173
|
log(`Error: ${err.message}`);
|
|
174
|
-
log("Run 'zhihand
|
|
174
|
+
log("Run 'zhihand pair' to pair a device first.");
|
|
175
175
|
process.exit(1);
|
|
176
176
|
}
|
|
177
177
|
// Load backend + model
|
|
@@ -51,9 +51,13 @@ export class PromptListener {
|
|
|
51
51
|
"Authorization": `Bearer ${this.config.controllerToken}`,
|
|
52
52
|
},
|
|
53
53
|
onOpen: () => {
|
|
54
|
-
|
|
55
|
-
this.
|
|
56
|
-
|
|
54
|
+
// Send auth message as first frame (required by server).
|
|
55
|
+
this.rws.send(JSON.stringify({
|
|
56
|
+
type: "auth",
|
|
57
|
+
controller_token: this.config.controllerToken,
|
|
58
|
+
topics: ["prompts"],
|
|
59
|
+
}));
|
|
60
|
+
// onConnected deferred until auth_ok is received (see handleWSMessage)
|
|
57
61
|
},
|
|
58
62
|
onClose: (_code, _reason) => {
|
|
59
63
|
if (this.wsConnected) {
|
|
@@ -73,6 +77,21 @@ export class PromptListener {
|
|
|
73
77
|
}
|
|
74
78
|
handleWSMessage(data) {
|
|
75
79
|
const msg = data;
|
|
80
|
+
// Auth responses
|
|
81
|
+
if (msg.type === "auth_ok") {
|
|
82
|
+
this.wsConnected = true;
|
|
83
|
+
this.stopPolling();
|
|
84
|
+
this.log("[ws] Connected to prompt stream.");
|
|
85
|
+
return;
|
|
86
|
+
}
|
|
87
|
+
if (msg.type === "auth_error") {
|
|
88
|
+
this.log(`[ws] Auth failed: ${msg.error}`);
|
|
89
|
+
this.rws?.stop();
|
|
90
|
+
this.rws = null;
|
|
91
|
+
this.wsConnected = false;
|
|
92
|
+
this.startPolling();
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
76
95
|
// Application-level ping (if server sends these alongside protocol pings)
|
|
77
96
|
if (msg.type === "ping") {
|
|
78
97
|
this.rws?.send(JSON.stringify({ type: "pong" }));
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
-
export declare const PACKAGE_VERSION = "0.
|
|
2
|
+
export declare const PACKAGE_VERSION = "0.32.3";
|
|
3
3
|
export declare function createServer(): McpServer;
|
|
4
4
|
export declare function startStdioServer(): Promise<void>;
|
package/dist/index.js
CHANGED
|
@@ -8,7 +8,7 @@ import { handlePair } from "./tools/pair.js";
|
|
|
8
8
|
import { resolveTargetDevice } from "./tools/resolve.js";
|
|
9
9
|
import { buildControlToolDescription, buildSystemToolDescription, buildScreenshotToolDescription, formatDeviceStatus, extractDynamic, } from "./core/device.js";
|
|
10
10
|
import { registry } from "./core/registry.js";
|
|
11
|
-
export const PACKAGE_VERSION = "0.
|
|
11
|
+
export const PACKAGE_VERSION = "0.32.3";
|
|
12
12
|
function errorResult(message) {
|
|
13
13
|
return { content: [{ type: "text", text: message }], isError: true };
|
|
14
14
|
}
|
package/package.json
CHANGED
package/README.md
DELETED
|
@@ -1,359 +0,0 @@
|
|
|
1
|
-
# @zhihand/mcp
|
|
2
|
-
|
|
3
|
-
ZhiHand MCP Server — let AI agents see and control your phone.
|
|
4
|
-
|
|
5
|
-
Version: `0.26.0`
|
|
6
|
-
|
|
7
|
-
## What is this?
|
|
8
|
-
|
|
9
|
-
`@zhihand/mcp` is the core integration layer for ZhiHand. It runs as a **persistent daemon** that exposes phone control tools to any compatible AI agent via [MCP (Model Context Protocol)](https://modelcontextprotocol.io/), including:
|
|
10
|
-
|
|
11
|
-
- **Claude Code**
|
|
12
|
-
- **Codex CLI**
|
|
13
|
-
- **Gemini CLI**
|
|
14
|
-
- **OpenClaw**
|
|
15
|
-
|
|
16
|
-
The daemon is a single persistent process that bundles three subsystems:
|
|
17
|
-
|
|
18
|
-
| Subsystem | Purpose |
|
|
19
|
-
|---|---|
|
|
20
|
-
| **MCP Server** | HTTP Streamable transport on `localhost:18686/mcp` — serves tool calls to AI agents |
|
|
21
|
-
| **Relay** | Brain heartbeat (30s), prompt listener (phone-initiated tasks), CLI dispatch |
|
|
22
|
-
| **Config API** | IPC endpoint for `zhihand gemini/claude/codex` backend switching |
|
|
23
|
-
|
|
24
|
-
Legacy entry points (backward compatible):
|
|
25
|
-
|
|
26
|
-
| Entry | Purpose |
|
|
27
|
-
|---|---|
|
|
28
|
-
| `zhihand serve` | MCP Server (stdio mode) — legacy, still works for direct CLI integration |
|
|
29
|
-
| `zhihand.openclaw` | OpenClaw Plugin entry — thin wrapper calling the same core |
|
|
30
|
-
|
|
31
|
-
## Requirements
|
|
32
|
-
|
|
33
|
-
- **Node.js >= 22**
|
|
34
|
-
- A **ZhiHand mobile app** (Android or iOS) installed on your phone
|
|
35
|
-
|
|
36
|
-
## Installation
|
|
37
|
-
|
|
38
|
-
```bash
|
|
39
|
-
npm install -g @zhihand/mcp
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
Or use directly with `npx`:
|
|
43
|
-
|
|
44
|
-
```bash
|
|
45
|
-
npx @zhihand/mcp serve
|
|
46
|
-
```
|
|
47
|
-
|
|
48
|
-
## Quick Start
|
|
49
|
-
|
|
50
|
-
### 1. Setup and pair
|
|
51
|
-
|
|
52
|
-
```bash
|
|
53
|
-
zhihand setup
|
|
54
|
-
```
|
|
55
|
-
|
|
56
|
-
This runs the full interactive setup:
|
|
57
|
-
|
|
58
|
-
1. Registers as a plugin with the ZhiHand server
|
|
59
|
-
2. Creates a pairing session and displays a QR code in the terminal
|
|
60
|
-
3. Waits for you to scan the QR code with the ZhiHand mobile app
|
|
61
|
-
4. Saves credentials to `~/.zhihand/credentials.json`
|
|
62
|
-
5. Detects installed CLI tools (Claude Code, Codex, Gemini CLI, OpenClaw)
|
|
63
|
-
6. Auto-selects the best available tool and configures MCP automatically
|
|
64
|
-
7. Starts the daemon (MCP Server + Relay + Config API)
|
|
65
|
-
|
|
66
|
-
No manual MCP configuration needed — `zhihand setup` handles everything.
|
|
67
|
-
|
|
68
|
-
### 2. Start the daemon
|
|
69
|
-
|
|
70
|
-
```bash
|
|
71
|
-
zhihand start # Start daemon in foreground
|
|
72
|
-
zhihand start -d # Start daemon in background (detached)
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
The daemon runs the MCP Server on `localhost:18686/mcp` (HTTP Streamable transport), maintains a brain heartbeat every 30 seconds (keeps the phone Brain indicator green), and listens for phone-initiated prompts.
|
|
76
|
-
|
|
77
|
-
When started with `-d`, daemon logs are written to `~/.zhihand/daemon.log`.
|
|
78
|
-
|
|
79
|
-
### 3. Start using it
|
|
80
|
-
|
|
81
|
-
Once configured, your AI agent can use ZhiHand tools directly. For example, in Claude Code:
|
|
82
|
-
|
|
83
|
-
```
|
|
84
|
-
> Take a screenshot of my phone
|
|
85
|
-
> Tap on the Settings icon
|
|
86
|
-
> Type "hello world" into the search box
|
|
87
|
-
> Scroll down to find the About section
|
|
88
|
-
```
|
|
89
|
-
|
|
90
|
-
## CLI Commands
|
|
91
|
-
|
|
92
|
-
```
|
|
93
|
-
zhihand setup Interactive setup: pair + detect tools + auto-select + configure MCP + start daemon
|
|
94
|
-
zhihand start Start daemon (MCP Server + Relay + Config API)
|
|
95
|
-
zhihand start -d Start daemon in background (logs to ~/.zhihand/daemon.log)
|
|
96
|
-
zhihand start --debug Start daemon with verbose debug logging
|
|
97
|
-
zhihand stop Stop the running daemon
|
|
98
|
-
zhihand status Show daemon status, pairing info, device, backend, and model
|
|
99
|
-
|
|
100
|
-
zhihand test Test device connectivity (screenshot, click, swipe, home, back)
|
|
101
|
-
zhihand pair Pair with a phone (QR code in terminal)
|
|
102
|
-
zhihand detect List detected CLI tools and their login status
|
|
103
|
-
zhihand serve Start MCP Server (stdio mode, backward compatible)
|
|
104
|
-
zhihand --help Show help
|
|
105
|
-
|
|
106
|
-
zhihand gemini Switch backend to Gemini CLI (default model: flash)
|
|
107
|
-
zhihand claude Switch backend to Claude Code (default model: sonnet)
|
|
108
|
-
zhihand codex Switch backend to Codex CLI (default model: gpt-5.4-mini)
|
|
109
|
-
zhihand gemini --model pro Switch backend with custom model
|
|
110
|
-
```
|
|
111
|
-
|
|
112
|
-
### Daemon Lifecycle
|
|
113
|
-
|
|
114
|
-
```bash
|
|
115
|
-
zhihand start # Start daemon in foreground
|
|
116
|
-
zhihand start -d # Start daemon in background
|
|
117
|
-
zhihand start --debug # Start with verbose debug logging
|
|
118
|
-
zhihand stop # Stop the daemon
|
|
119
|
-
zhihand status # Check if daemon is running, show device & backend info
|
|
120
|
-
```
|
|
121
|
-
|
|
122
|
-
The daemon is a single persistent process that runs:
|
|
123
|
-
- **MCP Server** on `localhost:18686/mcp` (HTTP Streamable transport)
|
|
124
|
-
- **Relay**: brain heartbeat every 30s (keeps phone Brain indicator green), prompt listener (phone-initiated tasks dispatched to CLI), CLI dispatch
|
|
125
|
-
- **Config API**: IPC endpoint for backend switching
|
|
126
|
-
|
|
127
|
-
### Switching Backends
|
|
128
|
-
|
|
129
|
-
Use `zhihand claude`, `zhihand codex`, or `zhihand gemini` to switch the active backend:
|
|
130
|
-
|
|
131
|
-
```bash
|
|
132
|
-
zhihand gemini # Switch to Gemini CLI (model: flash)
|
|
133
|
-
zhihand claude # Switch to Claude Code (model: sonnet)
|
|
134
|
-
zhihand codex # Switch to Codex CLI (model: gpt-5.4-mini)
|
|
135
|
-
zhihand gemini --model pro # Use a custom model
|
|
136
|
-
zhihand claude -m opus # Short flag form
|
|
137
|
-
```
|
|
138
|
-
|
|
139
|
-
Each backend has a **default model alias** that resolves to the latest version:
|
|
140
|
-
|
|
141
|
-
| Backend | Default | Alias examples | Resolution |
|
|
142
|
-
|---------|---------|---------------|------------|
|
|
143
|
-
| Gemini CLI | `flash` | `flash`, `pro` | Gemini CLI resolves natively (e.g. flash → gemini-2.5-flash) |
|
|
144
|
-
| Claude Code | `sonnet` | `sonnet`, `opus`, `haiku` | Claude Code resolves natively (e.g. sonnet → claude-sonnet-4) |
|
|
145
|
-
| Codex CLI | `gpt-5.4-mini` | any full model name | Codex requires full model names |
|
|
146
|
-
|
|
147
|
-
Model resolution priority: `--model` flag > `ZHIHAND_MODEL` env > `ZHIHAND_<BACKEND>_MODEL` env > default.
|
|
148
|
-
|
|
149
|
-
When you switch:
|
|
150
|
-
- The command sends an **IPC message to the running daemon**
|
|
151
|
-
- MCP config is **automatically added** to the new backend
|
|
152
|
-
- MCP config is **automatically removed** from the previous backend
|
|
153
|
-
- The model selection is **persisted** to `~/.zhihand/backend.json`
|
|
154
|
-
- If the tool is not installed, an error is shown
|
|
155
|
-
|
|
156
|
-
### Options
|
|
157
|
-
|
|
158
|
-
| Option | Description |
|
|
159
|
-
|---|---|
|
|
160
|
-
| `--device <name>` | Use a specific paired device (if you have multiple) |
|
|
161
|
-
| `--model, -m <name>` | Set model alias (e.g. `flash`, `pro`, `sonnet`, `opus`, `gpt-5.4-mini`) |
|
|
162
|
-
| `--port <port>` | Override daemon port (default: 18686) |
|
|
163
|
-
| `-d, --detach` | Run daemon in background |
|
|
164
|
-
| `--debug` | Enable verbose debug logging (all API requests, CLI args, SSE events) |
|
|
165
|
-
| `-h, --help` | Show help |
|
|
166
|
-
|
|
167
|
-
### Environment Variables
|
|
168
|
-
|
|
169
|
-
| Variable | Description |
|
|
170
|
-
|---|---|
|
|
171
|
-
| `ZHIHAND_DEVICE` | Default device name (same as `--device`) |
|
|
172
|
-
| `ZHIHAND_CLI` | Override CLI tool selection for mobile-initiated tasks |
|
|
173
|
-
| `ZHIHAND_MODEL` | Override model for all backends |
|
|
174
|
-
| `ZHIHAND_GEMINI_MODEL` | Override model for Gemini only |
|
|
175
|
-
| `ZHIHAND_CLAUDE_MODEL` | Override model for Claude only |
|
|
176
|
-
| `ZHIHAND_CODEX_MODEL` | Override model for Codex only |
|
|
177
|
-
|
|
178
|
-
## MCP Tools
|
|
179
|
-
|
|
180
|
-
The server exposes these tools to AI agents:
|
|
181
|
-
|
|
182
|
-
### `zhihand_control`
|
|
183
|
-
|
|
184
|
-
The main phone control tool. Supports these actions:
|
|
185
|
-
|
|
186
|
-
| Action | Parameters | Description |
|
|
187
|
-
|---|---|---|
|
|
188
|
-
| `click` | `xRatio`, `yRatio` | Tap at normalized coordinates [0,1] |
|
|
189
|
-
| `doubleclick` | `xRatio`, `yRatio` | Double-tap |
|
|
190
|
-
| `longclick` | `xRatio`, `yRatio`, `durationMs` | Long press (default 800ms) |
|
|
191
|
-
| `rightclick` | `xRatio`, `yRatio` | Right-click (desktop/BLE HID) |
|
|
192
|
-
| `middleclick` | `xRatio`, `yRatio` | Middle-click (desktop/BLE HID) |
|
|
193
|
-
| `type` | `text` | Type text into the focused field |
|
|
194
|
-
| `swipe` | `startXRatio`, `startYRatio`, `endXRatio`, `endYRatio`, `durationMs` | Swipe gesture (default 300ms) |
|
|
195
|
-
| `scroll` | `xRatio`, `yRatio`, `direction`, `amount` | Scroll up/down/left/right |
|
|
196
|
-
| `keycombo` | `keys` | Key combination (e.g. `"ctrl+c"`, `"alt+tab"`) |
|
|
197
|
-
| `back` | — | Press system Back button |
|
|
198
|
-
| `home` | — | Press system Home button |
|
|
199
|
-
| `enter` | — | Press Enter key |
|
|
200
|
-
| `open_app` | `appPackage`, `bundleId`, `urlScheme`, `appName` | Open an application |
|
|
201
|
-
| `clipboard` | `clipboardAction` (`get`/`set`), `text` | Read or write clipboard |
|
|
202
|
-
| `wait` | `durationMs` | Wait (local sleep, no server round-trip) |
|
|
203
|
-
| `screenshot` | — | Capture screen immediately |
|
|
204
|
-
|
|
205
|
-
Coordinates use **normalized ratios** (0.0 to 1.0), where `(0, 0)` is the top-left corner and `(1, 1)` is the bottom-right. This works across any screen resolution.
|
|
206
|
-
|
|
207
|
-
Every action returns a text summary and a screenshot of the result.
|
|
208
|
-
|
|
209
|
-
### `zhihand_screenshot`
|
|
210
|
-
|
|
211
|
-
Capture the current phone screen without performing any action. Returns an image.
|
|
212
|
-
|
|
213
|
-
No parameters required.
|
|
214
|
-
|
|
215
|
-
### `zhihand_status`
|
|
216
|
-
|
|
217
|
-
Get device status: platform, model, OS version, screen size, battery, network, BLE connection, dark mode, storage, and more. No parameters.
|
|
218
|
-
|
|
219
|
-
Tool description and `open_app` guidance are **automatically adapted** based on the connected device platform (Android/iOS), so AI agents always send correct platform-specific parameters.
|
|
220
|
-
|
|
221
|
-
### `zhihand_pair`
|
|
222
|
-
|
|
223
|
-
Pair with a phone device. Returns a QR code and pairing URL.
|
|
224
|
-
|
|
225
|
-
| Parameter | Type | Description |
|
|
226
|
-
|---|---|---|
|
|
227
|
-
| `forceNew` | `boolean` | Force new pairing even if already paired (default: `false`) |
|
|
228
|
-
|
|
229
|
-
### MCP Resource: `device://profile`
|
|
230
|
-
|
|
231
|
-
Provides full device context (static + dynamic) as JSON. Includes platform, model, OS version, screen size, battery, network, BLE, dark mode, storage, thermal state, locale, and more.
|
|
232
|
-
|
|
233
|
-
## How It Works
|
|
234
|
-
|
|
235
|
-
```
|
|
236
|
-
AI Agent ←HTTP Streamable→ Daemon (localhost:18686/mcp)
|
|
237
|
-
│
|
|
238
|
-
├── MCP Server ──→ ZhiHand Server ──→ Mobile App
|
|
239
|
-
│ (tool calls: control, screenshot, pair)
|
|
240
|
-
│
|
|
241
|
-
├── Relay
|
|
242
|
-
│ ├── Brain heartbeat (30s) ──→ Server
|
|
243
|
-
│ ├── Prompt listener (SSE) ←── Server ←── Phone
|
|
244
|
-
│ └── CLI dispatch ──→ spawn claude/codex/gemini
|
|
245
|
-
│
|
|
246
|
-
└── Config API
|
|
247
|
-
└── IPC from zhihand claude/codex/gemini
|
|
248
|
-
```
|
|
249
|
-
|
|
250
|
-
### Agent-initiated flow (tool calls)
|
|
251
|
-
|
|
252
|
-
1. AI agent calls a tool (e.g. `zhihand_control` with `action: "click"`)
|
|
253
|
-
2. MCP Server translates to a device command and enqueues it via the ZhiHand API
|
|
254
|
-
3. Mobile app picks up the command, executes it, and sends an ACK
|
|
255
|
-
4. MCP Server receives the ACK (via SSE or polling fallback)
|
|
256
|
-
5. MCP Server fetches a fresh screenshot and returns it to the AI agent
|
|
257
|
-
|
|
258
|
-
### Phone-initiated flow (prompt relay)
|
|
259
|
-
|
|
260
|
-
1. User speaks or types a prompt on the phone
|
|
261
|
-
2. Phone sends prompt to ZhiHand Server
|
|
262
|
-
3. Daemon receives prompt via SSE
|
|
263
|
-
4. Daemon spawns the active CLI tool (e.g. `claude`, `codex`, `gemini`) with the prompt
|
|
264
|
-
5. CLI tool executes, result is sent back to the phone
|
|
265
|
-
|
|
266
|
-
### Brain heartbeat
|
|
267
|
-
|
|
268
|
-
The daemon sends a heartbeat to the ZhiHand Server every 30 seconds. This keeps the **Brain indicator green** on the phone, showing the user that an AI backend is connected and ready.
|
|
269
|
-
|
|
270
|
-
Screenshots are transferred as raw JPEG binary and only base64-encoded at the LLM API boundary, minimizing bandwidth.
|
|
271
|
-
|
|
272
|
-
## Credential Storage
|
|
273
|
-
|
|
274
|
-
Pairing credentials are stored at:
|
|
275
|
-
|
|
276
|
-
```
|
|
277
|
-
~/.zhihand/
|
|
278
|
-
├── credentials.json # Device credentials (credentialId, controllerToken, endpoint)
|
|
279
|
-
├── backend.json # Active backend + model selection
|
|
280
|
-
├── daemon.pid # Daemon PID file (for zhihand stop)
|
|
281
|
-
├── daemon.log # Daemon log output (when started with -d)
|
|
282
|
-
└── state.json # Current pairing session state
|
|
283
|
-
```
|
|
284
|
-
|
|
285
|
-
You can manage multiple devices. The `credentials.json` file stores a `default` device name and a `devices` map:
|
|
286
|
-
|
|
287
|
-
```json
|
|
288
|
-
{
|
|
289
|
-
"default": "mcp-myhost",
|
|
290
|
-
"devices": {
|
|
291
|
-
"mcp-myhost": {
|
|
292
|
-
"credentialId": "cred_abc123",
|
|
293
|
-
"controllerToken": "tok_...",
|
|
294
|
-
"endpoint": "https://api.zhihand.com",
|
|
295
|
-
"deviceName": "mcp-myhost",
|
|
296
|
-
"pairedAt": "2026-04-01T00:00:00.000Z"
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
}
|
|
300
|
-
```
|
|
301
|
-
|
|
302
|
-
## Architecture
|
|
303
|
-
|
|
304
|
-
```
|
|
305
|
-
packages/mcp/
|
|
306
|
-
├── bin/
|
|
307
|
-
│ ├── zhihand # Main CLI entry (start/stop/status/setup/serve/pair/detect)
|
|
308
|
-
│ └── zhihand.openclaw # OpenClaw plugin entry
|
|
309
|
-
├── src/
|
|
310
|
-
│ ├── index.ts # MCP Server (stdio transport, legacy)
|
|
311
|
-
│ ├── openclaw.adapter.ts # OpenClaw Plugin adapter (thin wrapper)
|
|
312
|
-
│ ├── core/
|
|
313
|
-
│ │ ├── config.ts # Credential & config management (~/.zhihand/), default models
|
|
314
|
-
│ │ ├── resolve-path.ts # Platform-aware executable path resolution (gemini/claude/codex)
|
|
315
|
-
│ │ ├── device.ts # Device context: static/dynamic profile, fetch, SSE updates
|
|
316
|
-
│ │ ├── command.ts # Command creation, enqueue, ACK formatting
|
|
317
|
-
│ │ ├── screenshot.ts # Binary screenshot fetch (JPEG)
|
|
318
|
-
│ │ ├── sse.ts # SSE client + hybrid ACK (SSE push + polling fallback)
|
|
319
|
-
│ │ └── pair.ts # Plugin registration + device pairing flow
|
|
320
|
-
│ ├── daemon/
|
|
321
|
-
│ │ ├── index.ts # Daemon entry: HTTP server + MCP + Relay + Config API
|
|
322
|
-
│ │ ├── logger.ts # Debug logger (--debug flag)
|
|
323
|
-
│ │ ├── heartbeat.ts # Brain heartbeat loop (30s interval, 5s retry)
|
|
324
|
-
│ │ ├── prompt-listener.ts # SSE + polling prompt listener with dedup
|
|
325
|
-
│ │ └── dispatcher.ts # Async CLI dispatch (spawn + timeout + two-stage kill)
|
|
326
|
-
│ ├── tools/
|
|
327
|
-
│ │ ├── schemas.ts # Zod parameter schemas
|
|
328
|
-
│ │ ├── control.ts # zhihand_control handler
|
|
329
|
-
│ │ ├── screenshot.ts # zhihand_screenshot handler
|
|
330
|
-
│ │ └── pair.ts # zhihand_pair handler
|
|
331
|
-
│ └── cli/
|
|
332
|
-
│ ├── detect.ts # CLI tool detection (Claude Code, Codex, Gemini, OpenClaw)
|
|
333
|
-
│ ├── spawn.ts # CLI process spawning (for mobile-initiated tasks)
|
|
334
|
-
│ ├── mcp-config.ts # MCP auto-configuration (add/remove per backend)
|
|
335
|
-
│ └── openclaw.ts # OpenClaw auto-detect & plugin install
|
|
336
|
-
├── dist/ # Compiled JavaScript (shipped in npm package)
|
|
337
|
-
├── package.json
|
|
338
|
-
└── tsconfig.json
|
|
339
|
-
```
|
|
340
|
-
|
|
341
|
-
## Development
|
|
342
|
-
|
|
343
|
-
```bash
|
|
344
|
-
# Install dependencies
|
|
345
|
-
npm install
|
|
346
|
-
|
|
347
|
-
# Build (compiles TypeScript to dist/)
|
|
348
|
-
npm run build
|
|
349
|
-
|
|
350
|
-
# Run in development mode (uses --experimental-strip-types)
|
|
351
|
-
npm run dev
|
|
352
|
-
|
|
353
|
-
# Run tests
|
|
354
|
-
npm test
|
|
355
|
-
```
|
|
356
|
-
|
|
357
|
-
## License
|
|
358
|
-
|
|
359
|
-
MIT
|