@zhihand/mcp 0.24.1 → 0.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/bin/zhihand +12 -3
- package/dist/core/command.js +14 -5
- package/dist/core/device.d.ts +47 -0
- package/dist/core/device.js +171 -0
- package/dist/core/screenshot.js +11 -3
- package/dist/core/sse.js +4 -0
- package/dist/daemon/dispatcher.js +57 -7
- package/dist/daemon/heartbeat.js +7 -2
- package/dist/daemon/index.d.ts +1 -0
- package/dist/daemon/index.js +23 -0
- package/dist/daemon/logger.d.ts +10 -0
- package/dist/daemon/logger.js +22 -0
- package/dist/daemon/prompt-listener.js +23 -4
- package/dist/index.d.ts +1 -1
- package/dist/index.js +35 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
ZhiHand MCP Server — let AI agents see and control your phone.
|
|
4
4
|
|
|
5
|
-
Version: `0.
|
|
5
|
+
Version: `0.26.0`
|
|
6
6
|
|
|
7
7
|
## What is this?
|
|
8
8
|
|
|
@@ -93,6 +93,7 @@ Once configured, your AI agent can use ZhiHand tools directly. For example, in C
|
|
|
93
93
|
zhihand setup Interactive setup: pair + detect tools + auto-select + configure MCP + start daemon
|
|
94
94
|
zhihand start Start daemon (MCP Server + Relay + Config API)
|
|
95
95
|
zhihand start -d Start daemon in background (logs to ~/.zhihand/daemon.log)
|
|
96
|
+
zhihand start --debug Start daemon with verbose debug logging
|
|
96
97
|
zhihand stop Stop the running daemon
|
|
97
98
|
zhihand status Show daemon status, pairing info, device, backend, and model
|
|
98
99
|
|
|
@@ -113,6 +114,7 @@ zhihand gemini --model pro Switch backend with custom model
|
|
|
113
114
|
```bash
|
|
114
115
|
zhihand start # Start daemon in foreground
|
|
115
116
|
zhihand start -d # Start daemon in background
|
|
117
|
+
zhihand start --debug # Start with verbose debug logging
|
|
116
118
|
zhihand stop # Stop the daemon
|
|
117
119
|
zhihand status # Check if daemon is running, show device & backend info
|
|
118
120
|
```
|
|
@@ -159,6 +161,7 @@ When you switch:
|
|
|
159
161
|
| `--model, -m <name>` | Set model alias (e.g. `flash`, `pro`, `sonnet`, `opus`, `gpt-5.4-mini`) |
|
|
160
162
|
| `--port <port>` | Override daemon port (default: 18686) |
|
|
161
163
|
| `-d, --detach` | Run daemon in background |
|
|
164
|
+
| `--debug` | Enable verbose debug logging (all API requests, CLI args, SSE events) |
|
|
162
165
|
| `-h, --help` | Show help |
|
|
163
166
|
|
|
164
167
|
### Environment Variables
|
|
@@ -174,7 +177,7 @@ When you switch:
|
|
|
174
177
|
|
|
175
178
|
## MCP Tools
|
|
176
179
|
|
|
177
|
-
The server exposes
|
|
180
|
+
The server exposes these tools to AI agents:
|
|
178
181
|
|
|
179
182
|
### `zhihand_control`
|
|
180
183
|
|
|
@@ -209,6 +212,12 @@ Capture the current phone screen without performing any action. Returns an image
|
|
|
209
212
|
|
|
210
213
|
No parameters required.
|
|
211
214
|
|
|
215
|
+
### `zhihand_status`
|
|
216
|
+
|
|
217
|
+
Get device status: platform, model, OS version, screen size, battery, network, BLE connection, dark mode, storage, and more. No parameters.
|
|
218
|
+
|
|
219
|
+
Tool description and `open_app` guidance are **automatically adapted** based on the connected device platform (Android/iOS), so AI agents always send correct platform-specific parameters.
|
|
220
|
+
|
|
212
221
|
### `zhihand_pair`
|
|
213
222
|
|
|
214
223
|
Pair with a phone device. Returns a QR code and pairing URL.
|
|
@@ -217,6 +226,10 @@ Pair with a phone device. Returns a QR code and pairing URL.
|
|
|
217
226
|
|---|---|---|
|
|
218
227
|
| `forceNew` | `boolean` | Force new pairing even if already paired (default: `false`) |
|
|
219
228
|
|
|
229
|
+
### MCP Resource: `device://profile`
|
|
230
|
+
|
|
231
|
+
Provides full device context (static + dynamic) as JSON. Includes platform, model, OS version, screen size, battery, network, BLE, dark mode, storage, thermal state, locale, and more.
|
|
232
|
+
|
|
220
233
|
## How It Works
|
|
221
234
|
|
|
222
235
|
```
|
|
@@ -299,12 +312,14 @@ packages/mcp/
|
|
|
299
312
|
│ ├── core/
|
|
300
313
|
│ │ ├── config.ts # Credential & config management (~/.zhihand/), default models
|
|
301
314
|
│ │ ├── resolve-path.ts # Platform-aware executable path resolution (gemini/claude/codex)
|
|
315
|
+
│ │ ├── device.ts # Device context: static/dynamic profile, fetch, SSE updates
|
|
302
316
|
│ │ ├── command.ts # Command creation, enqueue, ACK formatting
|
|
303
317
|
│ │ ├── screenshot.ts # Binary screenshot fetch (JPEG)
|
|
304
318
|
│ │ ├── sse.ts # SSE client + hybrid ACK (SSE push + polling fallback)
|
|
305
319
|
│ │ └── pair.ts # Plugin registration + device pairing flow
|
|
306
320
|
│ ├── daemon/
|
|
307
321
|
│ │ ├── index.ts # Daemon entry: HTTP server + MCP + Relay + Config API
|
|
322
|
+
│ │ ├── logger.ts # Debug logger (--debug flag)
|
|
308
323
|
│ │ ├── heartbeat.ts # Brain heartbeat loop (30s interval, 5s retry)
|
|
309
324
|
│ │ ├── prompt-listener.ts # SSE + polling prompt listener with dedup
|
|
310
325
|
│ │ └── dispatcher.ts # Async CLI dispatch (spawn + timeout + two-stage kill)
|
package/bin/zhihand
CHANGED
|
@@ -26,6 +26,7 @@ const { positionals, values } = parseArgs({
|
|
|
26
26
|
model: { type: "string", short: "m" },
|
|
27
27
|
help: { type: "boolean", short: "h", default: false },
|
|
28
28
|
detach: { type: "boolean", short: "d", default: false },
|
|
29
|
+
debug: { type: "boolean", default: false },
|
|
29
30
|
port: { type: "string" },
|
|
30
31
|
},
|
|
31
32
|
});
|
|
@@ -39,6 +40,7 @@ zhihand — MCP Server and Relay for phone control
|
|
|
39
40
|
Usage:
|
|
40
41
|
zhihand start Start daemon (MCP Server + Relay, foreground)
|
|
41
42
|
zhihand start -d Start daemon in background (detach)
|
|
43
|
+
zhihand start --debug Start daemon with verbose debug logging
|
|
42
44
|
zhihand stop Stop daemon
|
|
43
45
|
zhihand status Show status (pairing, backend, brain)
|
|
44
46
|
|
|
@@ -59,6 +61,7 @@ Options:
|
|
|
59
61
|
--model, -m <name> Set model alias (e.g. flash, pro, sonnet, opus, gpt-5.4-mini)
|
|
60
62
|
--port <port> Override daemon port (default: 18686)
|
|
61
63
|
-d, --detach Run daemon in background
|
|
64
|
+
--debug Enable verbose debug logging
|
|
62
65
|
-h, --help Show this help
|
|
63
66
|
`);
|
|
64
67
|
process.exit(0);
|
|
@@ -145,6 +148,7 @@ switch (command) {
|
|
|
145
148
|
const args = [process.argv[1], "start"];
|
|
146
149
|
if (values.port) args.push("--port", values.port);
|
|
147
150
|
if (values.device) args.push("--device", values.device);
|
|
151
|
+
if (values.debug) args.push("--debug");
|
|
148
152
|
|
|
149
153
|
// Write daemon logs to ~/.zhihand/daemon.log
|
|
150
154
|
const zhihandDir = pathMod.default.join(osMod.default.homedir(), ".zhihand");
|
|
@@ -167,6 +171,7 @@ switch (command) {
|
|
|
167
171
|
await startDaemon({
|
|
168
172
|
port,
|
|
169
173
|
deviceName: values.device ?? process.env.ZHIHAND_DEVICE,
|
|
174
|
+
debug: values.debug,
|
|
170
175
|
});
|
|
171
176
|
break;
|
|
172
177
|
}
|
|
@@ -306,8 +311,9 @@ switch (command) {
|
|
|
306
311
|
{ label: "3. Swipe up", type: "hid", params: { action: "swipe", startXRatio: 0.5, startYRatio: 0.7, endXRatio: 0.5, endYRatio: 0.3, durationMs: 300 } },
|
|
307
312
|
{ label: "4. Swipe down", type: "hid", params: { action: "swipe", startXRatio: 0.5, startYRatio: 0.3, endXRatio: 0.5, endYRatio: 0.7, durationMs: 300 } },
|
|
308
313
|
{ label: "5. Press Home", type: "hid", params: { action: "home" } },
|
|
309
|
-
{ label: "6.
|
|
310
|
-
{ label: "7.
|
|
314
|
+
{ label: "6. Open WeChat", type: "hid", params: { action: "open_app", appPackage: "com.tencent.mm" } },
|
|
315
|
+
{ label: "7. Press Back", type: "hid", params: { action: "back" } },
|
|
316
|
+
{ label: "8. Screenshot", type: "screenshot" },
|
|
311
317
|
];
|
|
312
318
|
|
|
313
319
|
let passed = 0;
|
|
@@ -337,7 +343,10 @@ switch (command) {
|
|
|
337
343
|
const ack = await waitForCommandAck(testConfig, { commandId: queued.id, timeoutMs: 10_000 });
|
|
338
344
|
const ms = Date.now() - t0;
|
|
339
345
|
if (ack.acked) {
|
|
340
|
-
|
|
346
|
+
const ackStatus = ack.command?.ack_status ?? "ok";
|
|
347
|
+
const detail = ackStatus !== "ok" ? ` [${ackStatus}]` : "";
|
|
348
|
+
const resultInfo = ack.command?.ack_result ? ` ${JSON.stringify(ack.command.ack_result)}` : "";
|
|
349
|
+
console.log(`✅ (${ms}ms)${detail}${resultInfo}`);
|
|
341
350
|
passed++;
|
|
342
351
|
} else {
|
|
343
352
|
console.log(`⏱️ Timeout (${ms}ms)`);
|
package/dist/core/command.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { dbg } from "../daemon/logger.js";
|
|
1
2
|
let messageCounter = 0;
|
|
2
3
|
function nextMessageId() {
|
|
3
4
|
messageCounter = (messageCounter + 1) % 1000;
|
|
@@ -73,30 +74,38 @@ export function createControlCommand(params) {
|
|
|
73
74
|
}
|
|
74
75
|
}
|
|
75
76
|
export async function enqueueCommand(config, command) {
|
|
76
|
-
const
|
|
77
|
+
const url = `${config.controlPlaneEndpoint}/v1/credentials/${encodeURIComponent(config.credentialId)}/commands`;
|
|
78
|
+
const body = { command: { ...command, message_id: command.messageId ?? nextMessageId() } };
|
|
79
|
+
dbg(`[cmd] POST ${url} type=${command.type} payload=${JSON.stringify(command.payload ?? {})}`);
|
|
80
|
+
const response = await fetch(url, {
|
|
77
81
|
method: "POST",
|
|
78
82
|
headers: {
|
|
79
83
|
"Content-Type": "application/json",
|
|
80
84
|
"x-zhihand-controller-token": config.controllerToken,
|
|
81
85
|
},
|
|
82
|
-
body: JSON.stringify(
|
|
83
|
-
command: { ...command, message_id: command.messageId ?? nextMessageId() },
|
|
84
|
-
}),
|
|
86
|
+
body: JSON.stringify(body),
|
|
85
87
|
});
|
|
86
88
|
if (!response.ok) {
|
|
89
|
+
dbg(`[cmd] Enqueue failed: ${response.status} ${response.statusText}`);
|
|
87
90
|
throw new Error(`Enqueue command failed: ${response.status}`);
|
|
88
91
|
}
|
|
89
92
|
const payload = (await response.json());
|
|
93
|
+
dbg(`[cmd] Enqueued: id=${payload.command.id}, status=${payload.command.status}`);
|
|
90
94
|
return payload.command;
|
|
91
95
|
}
|
|
92
96
|
export async function getCommand(config, commandId) {
|
|
93
|
-
const
|
|
97
|
+
const url = `${config.controlPlaneEndpoint}/v1/credentials/${encodeURIComponent(config.credentialId)}/commands/${encodeURIComponent(commandId)}`;
|
|
98
|
+
dbg(`[cmd] GET ${url}`);
|
|
99
|
+
const response = await fetch(url, {
|
|
94
100
|
headers: { "x-zhihand-controller-token": config.controllerToken },
|
|
95
101
|
});
|
|
96
102
|
if (!response.ok) {
|
|
103
|
+
dbg(`[cmd] Get failed: ${response.status}`);
|
|
97
104
|
throw new Error(`Get command failed: ${response.status}`);
|
|
98
105
|
}
|
|
99
106
|
const payload = (await response.json());
|
|
107
|
+
const cmd = payload.command;
|
|
108
|
+
dbg(`[cmd] Got: id=${cmd.id}, status=${cmd.status}, acked=${!!cmd.acked_at}, ack_status=${cmd.ack_status ?? "-"}, ack_result=${JSON.stringify(cmd.ack_result ?? null)}`);
|
|
100
109
|
return payload.command;
|
|
101
110
|
}
|
|
102
111
|
export function formatAckSummary(action, result) {
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Device Context — static + dynamic device info fetched from control plane.
|
|
3
|
+
*
|
|
4
|
+
* Static info (platform, model, screen size) is set once after pairing and
|
|
5
|
+
* injected into MCP tool descriptions so the LLM always knows the device.
|
|
6
|
+
*
|
|
7
|
+
* Dynamic info (battery, network, BLE) is updated via SSE push and exposed
|
|
8
|
+
* through the zhihand_status tool and device://profile resource.
|
|
9
|
+
*/
|
|
10
|
+
import type { ZhiHandConfig } from "./config.ts";
|
|
11
|
+
export interface StaticContext {
|
|
12
|
+
platform: string;
|
|
13
|
+
model: string;
|
|
14
|
+
osVersion: string;
|
|
15
|
+
screenWidthPx: number;
|
|
16
|
+
screenHeightPx: number;
|
|
17
|
+
density: number;
|
|
18
|
+
formFactor: string;
|
|
19
|
+
locale: string;
|
|
20
|
+
textDirection: string;
|
|
21
|
+
timezone: string;
|
|
22
|
+
navigationMode?: string;
|
|
23
|
+
romFamily?: string;
|
|
24
|
+
}
|
|
25
|
+
export interface DynamicContext {
|
|
26
|
+
batteryLevel: number;
|
|
27
|
+
batteryState: string;
|
|
28
|
+
networkType: string;
|
|
29
|
+
bleRssi: number | null;
|
|
30
|
+
darkMode: boolean;
|
|
31
|
+
hidConnected: boolean;
|
|
32
|
+
recordingActive: boolean;
|
|
33
|
+
appInForeground: boolean;
|
|
34
|
+
availableStorageMb: number;
|
|
35
|
+
thermalState?: string;
|
|
36
|
+
fontScale: number;
|
|
37
|
+
}
|
|
38
|
+
export declare function getStaticContext(): StaticContext;
|
|
39
|
+
export declare function getDynamicContext(): DynamicContext;
|
|
40
|
+
export declare function isDeviceProfileLoaded(): boolean;
|
|
41
|
+
export declare function extractStatic(profile: Record<string, unknown>): StaticContext;
|
|
42
|
+
export declare function extractDynamic(profile: Record<string, unknown>): DynamicContext;
|
|
43
|
+
export declare function updateDeviceProfile(profile: Record<string, unknown>): void;
|
|
44
|
+
export declare function fetchDeviceProfile(config: ZhiHandConfig): Promise<void>;
|
|
45
|
+
export declare function buildControlToolDescription(): string;
|
|
46
|
+
export declare function buildScreenshotToolDescription(): string;
|
|
47
|
+
export declare function formatDeviceStatus(): Record<string, unknown>;
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Device Context — static + dynamic device info fetched from control plane.
|
|
3
|
+
*
|
|
4
|
+
* Static info (platform, model, screen size) is set once after pairing and
|
|
5
|
+
* injected into MCP tool descriptions so the LLM always knows the device.
|
|
6
|
+
*
|
|
7
|
+
* Dynamic info (battery, network, BLE) is updated via SSE push and exposed
|
|
8
|
+
* through the zhihand_status tool and device://profile resource.
|
|
9
|
+
*/
|
|
10
|
+
import { dbg } from "../daemon/logger.js";
|
|
11
|
+
// ── Default values ────────────────────────────────────────
|
|
12
|
+
const DEFAULT_STATIC = {
|
|
13
|
+
platform: "unknown",
|
|
14
|
+
model: "unknown",
|
|
15
|
+
osVersion: "unknown",
|
|
16
|
+
screenWidthPx: 0,
|
|
17
|
+
screenHeightPx: 0,
|
|
18
|
+
density: 1,
|
|
19
|
+
formFactor: "phone",
|
|
20
|
+
locale: "en-US",
|
|
21
|
+
textDirection: "ltr",
|
|
22
|
+
timezone: "UTC",
|
|
23
|
+
};
|
|
24
|
+
const DEFAULT_DYNAMIC = {
|
|
25
|
+
batteryLevel: -1,
|
|
26
|
+
batteryState: "unknown",
|
|
27
|
+
networkType: "unknown",
|
|
28
|
+
bleRssi: null,
|
|
29
|
+
darkMode: false,
|
|
30
|
+
hidConnected: false,
|
|
31
|
+
recordingActive: false,
|
|
32
|
+
appInForeground: false,
|
|
33
|
+
availableStorageMb: -1,
|
|
34
|
+
fontScale: 1,
|
|
35
|
+
};
|
|
36
|
+
// ── Module state ──────────────────────────────────────────
|
|
37
|
+
let staticCtx = { ...DEFAULT_STATIC };
|
|
38
|
+
let dynamicCtx = { ...DEFAULT_DYNAMIC };
|
|
39
|
+
let loaded = false;
|
|
40
|
+
export function getStaticContext() {
|
|
41
|
+
return staticCtx;
|
|
42
|
+
}
|
|
43
|
+
export function getDynamicContext() {
|
|
44
|
+
return dynamicCtx;
|
|
45
|
+
}
|
|
46
|
+
export function isDeviceProfileLoaded() {
|
|
47
|
+
return loaded;
|
|
48
|
+
}
|
|
49
|
+
// ── Extract helpers ───────────────────────────────────────
|
|
50
|
+
function str(v, fallback) {
|
|
51
|
+
return typeof v === "string" && v ? v : fallback;
|
|
52
|
+
}
|
|
53
|
+
function num(v, fallback) {
|
|
54
|
+
return typeof v === "number" && !isNaN(v) ? v : fallback;
|
|
55
|
+
}
|
|
56
|
+
function bool(v, fallback) {
|
|
57
|
+
return typeof v === "boolean" ? v : fallback;
|
|
58
|
+
}
|
|
59
|
+
export function extractStatic(profile) {
|
|
60
|
+
return {
|
|
61
|
+
platform: str(profile.platform, DEFAULT_STATIC.platform),
|
|
62
|
+
model: str(profile.model, DEFAULT_STATIC.model),
|
|
63
|
+
osVersion: str(profile.os_version, DEFAULT_STATIC.osVersion),
|
|
64
|
+
screenWidthPx: num(profile.screen_width_px, DEFAULT_STATIC.screenWidthPx),
|
|
65
|
+
screenHeightPx: num(profile.screen_height_px, DEFAULT_STATIC.screenHeightPx),
|
|
66
|
+
density: num(profile.density, DEFAULT_STATIC.density),
|
|
67
|
+
formFactor: str(profile.form_factor, DEFAULT_STATIC.formFactor),
|
|
68
|
+
locale: str(profile.locale, DEFAULT_STATIC.locale),
|
|
69
|
+
textDirection: str(profile.text_direction, DEFAULT_STATIC.textDirection),
|
|
70
|
+
timezone: str(profile.timezone, DEFAULT_STATIC.timezone),
|
|
71
|
+
navigationMode: typeof profile.navigation_mode === "string" ? profile.navigation_mode : undefined,
|
|
72
|
+
romFamily: typeof profile.rom_family === "string" ? profile.rom_family : undefined,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
export function extractDynamic(profile) {
|
|
76
|
+
return {
|
|
77
|
+
batteryLevel: num(profile.battery_level, DEFAULT_DYNAMIC.batteryLevel),
|
|
78
|
+
batteryState: str(profile.battery_state, DEFAULT_DYNAMIC.batteryState),
|
|
79
|
+
networkType: str(profile.network_type, DEFAULT_DYNAMIC.networkType),
|
|
80
|
+
bleRssi: typeof profile.ble_rssi === "number" ? profile.ble_rssi : null,
|
|
81
|
+
darkMode: bool(profile.dark_mode, DEFAULT_DYNAMIC.darkMode),
|
|
82
|
+
hidConnected: bool(profile.hid_connected, DEFAULT_DYNAMIC.hidConnected),
|
|
83
|
+
recordingActive: bool(profile.recording_active, DEFAULT_DYNAMIC.recordingActive),
|
|
84
|
+
appInForeground: bool(profile.app_in_foreground, DEFAULT_DYNAMIC.appInForeground),
|
|
85
|
+
availableStorageMb: num(profile.available_storage_mb, DEFAULT_DYNAMIC.availableStorageMb),
|
|
86
|
+
thermalState: typeof profile.thermal_state === "string" ? profile.thermal_state : undefined,
|
|
87
|
+
fontScale: num(profile.font_scale, DEFAULT_DYNAMIC.fontScale),
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
// ── Update from SSE event ─────────────────────────────────
|
|
91
|
+
export function updateDeviceProfile(profile) {
|
|
92
|
+
staticCtx = extractStatic(profile);
|
|
93
|
+
dynamicCtx = extractDynamic(profile);
|
|
94
|
+
loaded = true;
|
|
95
|
+
dbg(`[device] Profile updated: platform=${staticCtx.platform}, model=${staticCtx.model}, screen=${staticCtx.screenWidthPx}x${staticCtx.screenHeightPx}`);
|
|
96
|
+
}
|
|
97
|
+
// ── Fetch initial profile from API ────────────────────────
|
|
98
|
+
export async function fetchDeviceProfile(config) {
|
|
99
|
+
const url = `${config.controlPlaneEndpoint}/v1/credentials/${encodeURIComponent(config.credentialId)}/device-profile`;
|
|
100
|
+
dbg(`[device] Fetching profile: GET ${url}`);
|
|
101
|
+
try {
|
|
102
|
+
const response = await fetch(url, {
|
|
103
|
+
headers: { "x-zhihand-controller-token": config.controllerToken },
|
|
104
|
+
signal: AbortSignal.timeout(10_000),
|
|
105
|
+
});
|
|
106
|
+
if (!response.ok) {
|
|
107
|
+
dbg(`[device] Profile fetch failed: ${response.status} ${response.statusText}`);
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
const data = await response.json();
|
|
111
|
+
// API may wrap in { device_profile: {...} } or return flat
|
|
112
|
+
const profile = (typeof data.device_profile === "object" && data.device_profile !== null)
|
|
113
|
+
? data.device_profile
|
|
114
|
+
: data;
|
|
115
|
+
updateDeviceProfile(profile);
|
|
116
|
+
}
|
|
117
|
+
catch (err) {
|
|
118
|
+
dbg(`[device] Profile fetch error: ${err.message}`);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
// ── Build tool description with device info ───────────────
|
|
122
|
+
export function buildControlToolDescription() {
|
|
123
|
+
if (!loaded || staticCtx.platform === "unknown") {
|
|
124
|
+
return "Control the connected mobile device. Supports click, swipe, type, scroll, open_app, back, home, and more. All coordinates use normalized ratios [0,1].";
|
|
125
|
+
}
|
|
126
|
+
const parts = [
|
|
127
|
+
`Control a ${staticCtx.platform} device`,
|
|
128
|
+
`(${staticCtx.model}, ${staticCtx.osVersion}`,
|
|
129
|
+
`${staticCtx.screenWidthPx}x${staticCtx.screenHeightPx}`,
|
|
130
|
+
`${staticCtx.formFactor}, ${staticCtx.locale})`,
|
|
131
|
+
];
|
|
132
|
+
let desc = parts.join(", ") + ".";
|
|
133
|
+
desc += " All coordinates use normalized ratios [0,1].";
|
|
134
|
+
// Platform-specific open_app guidance
|
|
135
|
+
if (staticCtx.platform === "android") {
|
|
136
|
+
desc += " For open_app, use appPackage (e.g. 'com.tencent.mm'). Do NOT send bundleId or urlScheme.";
|
|
137
|
+
}
|
|
138
|
+
else if (staticCtx.platform === "ios") {
|
|
139
|
+
desc += " For open_app, use bundleId (e.g. 'com.tencent.xin') or urlScheme (e.g. 'weixin://'). Do NOT send appPackage.";
|
|
140
|
+
}
|
|
141
|
+
return desc;
|
|
142
|
+
}
|
|
143
|
+
export function buildScreenshotToolDescription() {
|
|
144
|
+
if (!loaded || staticCtx.platform === "unknown") {
|
|
145
|
+
return "Take a screenshot of the phone screen.";
|
|
146
|
+
}
|
|
147
|
+
return `Take a screenshot of the ${staticCtx.platform} device (${staticCtx.model}, ${staticCtx.screenWidthPx}x${staticCtx.screenHeightPx}).`;
|
|
148
|
+
}
|
|
149
|
+
// ── Format status for zhihand_status tool ─────────────────
|
|
150
|
+
export function formatDeviceStatus() {
|
|
151
|
+
return {
|
|
152
|
+
platform: staticCtx.platform,
|
|
153
|
+
model: staticCtx.model,
|
|
154
|
+
os_version: staticCtx.osVersion,
|
|
155
|
+
screen: `${staticCtx.screenWidthPx}x${staticCtx.screenHeightPx}`,
|
|
156
|
+
density: staticCtx.density,
|
|
157
|
+
form_factor: staticCtx.formFactor,
|
|
158
|
+
locale: staticCtx.locale,
|
|
159
|
+
timezone: staticCtx.timezone,
|
|
160
|
+
navigation_mode: staticCtx.navigationMode ?? null,
|
|
161
|
+
battery: `${dynamicCtx.batteryLevel}% (${dynamicCtx.batteryState})`,
|
|
162
|
+
network: dynamicCtx.networkType,
|
|
163
|
+
ble: dynamicCtx.hidConnected
|
|
164
|
+
? `connected${dynamicCtx.bleRssi !== null ? ` (RSSI: ${dynamicCtx.bleRssi})` : ""}`
|
|
165
|
+
: "disconnected",
|
|
166
|
+
dark_mode: dynamicCtx.darkMode,
|
|
167
|
+
storage_available_mb: dynamicCtx.availableStorageMb,
|
|
168
|
+
thermal: dynamicCtx.thermalState ?? "normal",
|
|
169
|
+
font_scale: dynamicCtx.fontScale,
|
|
170
|
+
};
|
|
171
|
+
}
|
package/dist/core/screenshot.js
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
|
+
import { dbg } from "../daemon/logger.js";
|
|
1
2
|
export async function fetchScreenshotBinary(config) {
|
|
2
3
|
const controller = new AbortController();
|
|
3
|
-
const
|
|
4
|
+
const timeoutMs = config.timeoutMs ?? 10_000;
|
|
5
|
+
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
|
6
|
+
const url = `${config.controlPlaneEndpoint}/v1/credentials/${encodeURIComponent(config.credentialId)}/screen`;
|
|
7
|
+
dbg(`[screenshot] GET ${url} timeout=${timeoutMs}ms`);
|
|
8
|
+
const t0 = Date.now();
|
|
4
9
|
try {
|
|
5
|
-
const response = await fetch(
|
|
10
|
+
const response = await fetch(url, {
|
|
6
11
|
method: "GET",
|
|
7
12
|
headers: {
|
|
8
13
|
"x-zhihand-controller-token": config.controllerToken,
|
|
@@ -11,9 +16,12 @@ export async function fetchScreenshotBinary(config) {
|
|
|
11
16
|
signal: controller.signal,
|
|
12
17
|
});
|
|
13
18
|
if (!response.ok) {
|
|
19
|
+
dbg(`[screenshot] Failed: ${response.status} ${response.statusText}`);
|
|
14
20
|
throw new Error(`Screenshot fetch failed: ${response.status}`);
|
|
15
21
|
}
|
|
16
|
-
|
|
22
|
+
const buf = Buffer.from(await response.arrayBuffer());
|
|
23
|
+
dbg(`[screenshot] OK: ${(buf.length / 1024).toFixed(0)}KB in ${Date.now() - t0}ms`);
|
|
24
|
+
return buf;
|
|
17
25
|
}
|
|
18
26
|
finally {
|
|
19
27
|
clearTimeout(timeout);
|
package/dist/core/sse.js
CHANGED
|
@@ -1,13 +1,16 @@
|
|
|
1
1
|
import { getCommand } from "./command.js";
|
|
2
|
+
import { dbg } from "../daemon/logger.js";
|
|
2
3
|
// Per-commandId callback registry for SSE-based ACK
|
|
3
4
|
const ackCallbacks = new Map();
|
|
4
5
|
// Active SSE connection state
|
|
5
6
|
let sseAbortController = null;
|
|
6
7
|
let sseConnected = false;
|
|
7
8
|
export function handleSSEEvent(event) {
|
|
9
|
+
dbg(`[sse-cmd] Event: kind=${event.kind}, command=${event.command?.id ?? "-"}`);
|
|
8
10
|
if (event.kind === "command.acked" && event.command) {
|
|
9
11
|
const callback = ackCallbacks.get(event.command.id);
|
|
10
12
|
if (callback) {
|
|
13
|
+
dbg(`[sse-cmd] ACK callback for ${event.command.id}, ack_status=${event.command.ack_status}, ack_result=${JSON.stringify(event.command.ack_result ?? null)}`);
|
|
11
14
|
callback(event.command);
|
|
12
15
|
ackCallbacks.delete(event.command.id);
|
|
13
16
|
}
|
|
@@ -103,6 +106,7 @@ export function isSSEConnected() {
|
|
|
103
106
|
*/
|
|
104
107
|
export async function waitForCommandAck(config, options) {
|
|
105
108
|
const timeoutMs = options.timeoutMs ?? 15_000;
|
|
109
|
+
dbg(`[sse-cmd] Waiting for ACK: commandId=${options.commandId}, timeout=${timeoutMs}ms`);
|
|
106
110
|
// Ensure SSE is connected for real-time ACKs
|
|
107
111
|
connectSSE(config);
|
|
108
112
|
return new Promise((resolve, reject) => {
|
|
@@ -5,6 +5,8 @@ import os from "node:os";
|
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
6
6
|
import { DEFAULT_MODELS } from "../core/config.js";
|
|
7
7
|
import { resolveGemini, resolveClaude, resolveCodex } from "../core/resolve-path.js";
|
|
8
|
+
import { getStaticContext, isDeviceProfileLoaded } from "../core/device.js";
|
|
9
|
+
import { dbg } from "./logger.js";
|
|
8
10
|
const CLI_TIMEOUT = 300_000; // 300s (5min) per prompt — MCP tool chains need multiple turns
|
|
9
11
|
const SIGKILL_DELAY = 2_000; // 2s after SIGTERM
|
|
10
12
|
const MCP_PORT = parseInt(process.env.ZHIHAND_PORT ?? "", 10) || 18686;
|
|
@@ -263,6 +265,7 @@ function pollGeminiSession(child, startTime, promptText, log, knownSessionFile,
|
|
|
263
265
|
}
|
|
264
266
|
if (Date.now() - outcomeAt >= SESSION_STABILITY_DELAY) {
|
|
265
267
|
const [status, text] = finalResult ?? outcome;
|
|
268
|
+
dbg(`[gemini] Session outcome: status=${status}, text (${text.length} chars): ${text.slice(0, 200)}${text.length > 200 ? "..." : ""}`);
|
|
266
269
|
settle({
|
|
267
270
|
text,
|
|
268
271
|
success: status === "success",
|
|
@@ -349,7 +352,30 @@ export async function killActiveChild() {
|
|
|
349
352
|
conversationHistory.length = 0;
|
|
350
353
|
}
|
|
351
354
|
// ── System Prompt ─────────────────────────────────────────
|
|
352
|
-
|
|
355
|
+
/**
|
|
356
|
+
* Build system context dynamically — injects device platform info when available
|
|
357
|
+
* so the AI sends correct platform-specific parameters (e.g. appPackage vs bundleId).
|
|
358
|
+
*/
|
|
359
|
+
function buildSystemContext() {
|
|
360
|
+
const static_ = isDeviceProfileLoaded() ? getStaticContext() : null;
|
|
361
|
+
const deviceLine = static_
|
|
362
|
+
? `Connected device: ${static_.platform} ${static_.model} (${static_.osVersion}), ${static_.screenWidthPx}x${static_.screenHeightPx}, ${static_.formFactor}, ${static_.locale}`
|
|
363
|
+
: "Connected device: unknown platform";
|
|
364
|
+
// Platform-specific open_app guidance
|
|
365
|
+
let openAppDoc;
|
|
366
|
+
if (static_?.platform === "android") {
|
|
367
|
+
openAppDoc = "- open_app: Open an app. Params: appPackage (e.g. 'com.tencent.mm'). Do NOT send bundleId or urlScheme on Android.";
|
|
368
|
+
}
|
|
369
|
+
else if (static_?.platform === "ios") {
|
|
370
|
+
openAppDoc = "- open_app: Open an app. Params: bundleId (e.g. 'com.tencent.xin') or urlScheme (e.g. 'weixin://'). Do NOT send appPackage on iOS.";
|
|
371
|
+
}
|
|
372
|
+
else {
|
|
373
|
+
openAppDoc = "- open_app: Open an app. Params: appPackage (Android, e.g. 'com.tencent.mm'), bundleId (iOS), urlScheme (e.g. 'weixin://')";
|
|
374
|
+
}
|
|
375
|
+
return `You are ZhiHand, an AI assistant connected to the user's mobile phone via MCP tools.
|
|
376
|
+
|
|
377
|
+
## Device
|
|
378
|
+
${deviceLine}
|
|
353
379
|
|
|
354
380
|
## Available MCP Tools
|
|
355
381
|
|
|
@@ -370,22 +396,26 @@ Control the phone. Requires "action" parameter. All coordinates use normalized r
|
|
|
370
396
|
- back: Press Back button (no params)
|
|
371
397
|
- home: Press Home button (no params)
|
|
372
398
|
- enter: Press Enter key (no params)
|
|
373
|
-
|
|
399
|
+
${openAppDoc}
|
|
374
400
|
- clipboard: Read/write clipboard. Params: clipboardAction ("get"/"set"), text
|
|
375
401
|
- screenshot: Capture screen via control (same as zhihand_screenshot)
|
|
376
402
|
- wait: Wait before next action. Params: durationMs (default 1000)
|
|
377
403
|
|
|
404
|
+
### zhihand_status
|
|
405
|
+
Get device status: platform, battery, network, BLE connection, dark mode, storage, etc.
|
|
406
|
+
|
|
378
407
|
## Rules
|
|
379
408
|
- When the user asks to see their screen, ALWAYS call zhihand_screenshot first.
|
|
380
409
|
- When the user asks to open an app (e.g. WeChat, Settings), use open_app action.
|
|
381
410
|
- When the user asks to go back/home, use back/home actions.
|
|
382
411
|
- For all tap/click operations, use xRatio and yRatio (0-1 normalized coordinates based on the screenshot).`;
|
|
412
|
+
}
|
|
383
413
|
/**
|
|
384
414
|
* Build the full system prompt with optional conversation history.
|
|
385
415
|
* Used for first prompt in persistent sessions and all one-shot calls.
|
|
386
416
|
*/
|
|
387
417
|
function wrapPrompt(userPrompt, history) {
|
|
388
|
-
let result =
|
|
418
|
+
let result = buildSystemContext();
|
|
389
419
|
if (history && history.length > 0) {
|
|
390
420
|
result += "\n\n## Recent Conversation\n";
|
|
391
421
|
for (const turn of history) {
|
|
@@ -419,6 +449,7 @@ export function dispatchToCLI(backend, prompt, log, model) {
|
|
|
419
449
|
}
|
|
420
450
|
const sessionLabel = canReuse ? `#${session.promptCount + 1}` : "new";
|
|
421
451
|
log(`[dispatch] Backend: ${backend}, Model: ${resolvedModel}, Session: ${sessionLabel}`);
|
|
452
|
+
dbg(`[dispatch] Prompt (${prompt.length} chars): ${prompt.slice(0, 200)}${prompt.length > 200 ? "..." : ""}`);
|
|
422
453
|
if (backend === "gemini") {
|
|
423
454
|
return dispatchGeminiPersistent(prompt, startTime, log, resolvedModel);
|
|
424
455
|
}
|
|
@@ -461,6 +492,7 @@ async function dispatchGeminiPersistent(prompt, startTime, log, model) {
|
|
|
461
492
|
session.promptCount++;
|
|
462
493
|
const turnNum = session.promptCount;
|
|
463
494
|
log(`[gemini] Reusing session — sending prompt #${turnNum}`);
|
|
495
|
+
dbg(`[gemini] Writing to PTY stdin: ${prompt.slice(0, 200)}${prompt.length > 200 ? "..." : ""}`);
|
|
464
496
|
// Write raw prompt to PTY stdin (gemini already has system context from first prompt)
|
|
465
497
|
session.child.stdin?.write(prompt + "\n");
|
|
466
498
|
const result = await pollGeminiSession(session.child, startTime, prompt, log, session.geminiSessionFile, turnNum);
|
|
@@ -483,6 +515,10 @@ async function dispatchGeminiPersistent(prompt, startTime, log, model) {
|
|
|
483
515
|
};
|
|
484
516
|
const geminiPath = resolveGemini();
|
|
485
517
|
log(`[gemini] Starting new persistent session (model: ${model})`);
|
|
518
|
+
dbg(`[gemini] Executable: ${geminiPath}`);
|
|
519
|
+
dbg(`[gemini] PTY wrap: python3 ${PTY_WRAP_SCRIPT}`);
|
|
520
|
+
dbg(`[gemini] Args: ${JSON.stringify(cliArgs)}`);
|
|
521
|
+
dbg(`[gemini] Wrapped prompt (${wrappedPrompt.length} chars): ${wrappedPrompt.slice(0, 300)}...`);
|
|
486
522
|
const child = spawn("python3", [PTY_WRAP_SCRIPT, geminiPath, ...cliArgs], {
|
|
487
523
|
env,
|
|
488
524
|
stdio: ["pipe", "pipe", "pipe"], // stdin=pipe for subsequent prompts
|
|
@@ -522,6 +558,9 @@ async function dispatchCodexWithHistory(prompt, startTime, log, model) {
|
|
|
522
558
|
args.push("-");
|
|
523
559
|
const codexPath = resolveCodex();
|
|
524
560
|
log(`[codex] One-shot dispatch (history: ${conversationHistory.length} turns)`);
|
|
561
|
+
dbg(`[codex] Executable: ${codexPath}`);
|
|
562
|
+
dbg(`[codex] Args: ${JSON.stringify(args)}`);
|
|
563
|
+
dbg(`[codex] Stdin prompt (${fullPrompt.length} chars): ${fullPrompt.slice(0, 300)}...`);
|
|
525
564
|
const child = spawn(codexPath, args, {
|
|
526
565
|
env: process.env,
|
|
527
566
|
stdio: ["pipe", "pipe", "pipe"],
|
|
@@ -531,6 +570,7 @@ async function dispatchCodexWithHistory(prompt, startTime, log, model) {
|
|
|
531
570
|
child.stdin?.write(fullPrompt);
|
|
532
571
|
child.stdin?.end();
|
|
533
572
|
const result = await collectCodexOutput(child, startTime);
|
|
573
|
+
dbg(`[codex] Output: success=${result.success}, duration=${result.durationMs}ms, text (${result.text.length} chars): ${result.text.slice(0, 300)}${result.text.length > 300 ? "..." : ""}`);
|
|
534
574
|
recordTurn("user", prompt);
|
|
535
575
|
recordTurn("assistant", result.text);
|
|
536
576
|
return result;
|
|
@@ -544,13 +584,17 @@ async function dispatchClaudeWithHistory(prompt, startTime, log, model) {
|
|
|
544
584
|
// --permission-mode bypassPermissions: auto-approve all tool calls (like gemini's --approval-mode yolo)
|
|
545
585
|
// --mcp-config: explicitly pass MCP server URL so Claude finds it regardless of cwd
|
|
546
586
|
const mcpConfig = JSON.stringify({ mcpServers: { zhihand: { type: "http", url: MCP_URL } } });
|
|
547
|
-
const
|
|
587
|
+
const claudeArgs = [
|
|
548
588
|
"-p", "-",
|
|
549
589
|
"--model", model,
|
|
550
590
|
"--output-format", "json",
|
|
551
591
|
"--permission-mode", "bypassPermissions",
|
|
552
592
|
"--mcp-config", mcpConfig,
|
|
553
|
-
]
|
|
593
|
+
];
|
|
594
|
+
dbg(`[claude] Executable: ${claudePath}`);
|
|
595
|
+
dbg(`[claude] Args: ${JSON.stringify(claudeArgs)}`);
|
|
596
|
+
dbg(`[claude] Stdin prompt (${fullPrompt.length} chars): ${fullPrompt.slice(0, 300)}...`);
|
|
597
|
+
const child = spawn(claudePath, claudeArgs, {
|
|
554
598
|
env: process.env,
|
|
555
599
|
stdio: ["pipe", "pipe", "pipe"],
|
|
556
600
|
detached: false,
|
|
@@ -559,8 +603,10 @@ async function dispatchClaudeWithHistory(prompt, startTime, log, model) {
|
|
|
559
603
|
child.stdin?.write(fullPrompt);
|
|
560
604
|
child.stdin?.end();
|
|
561
605
|
const raw = await collectChildOutput(child, startTime);
|
|
606
|
+
dbg(`[claude] Raw output (${raw.text.length} chars): ${raw.text.slice(0, 500)}${raw.text.length > 500 ? "..." : ""}`);
|
|
562
607
|
// Claude --output-format json wraps the result in a JSON envelope — extract the actual text
|
|
563
608
|
const result = extractClaudeResult(raw);
|
|
609
|
+
dbg(`[claude] Parsed result: success=${result.success}, text (${result.text.length} chars)`);
|
|
564
610
|
recordTurn("user", prompt);
|
|
565
611
|
recordTurn("assistant", result.text);
|
|
566
612
|
return result;
|
|
@@ -691,8 +737,10 @@ function collectChildOutput(child, startTime) {
|
|
|
691
737
|
}
|
|
692
738
|
// ── Reply ──────────────────────────────────────────────────
|
|
693
739
|
export async function postReply(config, promptId, text) {
|
|
740
|
+
const url = `${config.controlPlaneEndpoint}/v1/credentials/${encodeURIComponent(config.credentialId)}/prompts/${encodeURIComponent(promptId)}/reply`;
|
|
741
|
+
dbg(`[reply] POST ${url}`);
|
|
742
|
+
dbg(`[reply] Body (${text.length} chars): ${text.slice(0, 300)}${text.length > 300 ? "..." : ""}`);
|
|
694
743
|
try {
|
|
695
|
-
const url = `${config.controlPlaneEndpoint}/v1/credentials/${encodeURIComponent(config.credentialId)}/prompts/${encodeURIComponent(promptId)}/reply`;
|
|
696
744
|
const response = await fetch(url, {
|
|
697
745
|
method: "POST",
|
|
698
746
|
headers: {
|
|
@@ -702,9 +750,11 @@ export async function postReply(config, promptId, text) {
|
|
|
702
750
|
body: JSON.stringify({ role: "assistant", text }),
|
|
703
751
|
signal: AbortSignal.timeout(30_000),
|
|
704
752
|
});
|
|
753
|
+
dbg(`[reply] Response: ${response.status} ${response.statusText}`);
|
|
705
754
|
return response.ok || (response.status >= 400 && response.status < 500);
|
|
706
755
|
}
|
|
707
|
-
catch {
|
|
756
|
+
catch (err) {
|
|
757
|
+
dbg(`[reply] Error: ${err.message}`);
|
|
708
758
|
return false;
|
|
709
759
|
}
|
|
710
760
|
}
|
package/dist/daemon/heartbeat.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { dbg } from "./logger.js";
|
|
1
2
|
const HEARTBEAT_INTERVAL = 30_000; // 30s
|
|
2
3
|
const HEARTBEAT_RETRY_INTERVAL = 5_000; // 5s on failure
|
|
3
4
|
let heartbeatTimer;
|
|
@@ -18,7 +19,9 @@ async function sendHeartbeat(config, online) {
|
|
|
18
19
|
body.backend = currentMeta.backend;
|
|
19
20
|
if (currentMeta.model)
|
|
20
21
|
body.model = currentMeta.model;
|
|
21
|
-
const
|
|
22
|
+
const url = buildUrl(config);
|
|
23
|
+
dbg(`[heartbeat] POST ${url} body=${JSON.stringify(body)}`);
|
|
24
|
+
const response = await fetch(url, {
|
|
22
25
|
method: "POST",
|
|
23
26
|
headers: {
|
|
24
27
|
"Content-Type": "application/json",
|
|
@@ -27,9 +30,11 @@ async function sendHeartbeat(config, online) {
|
|
|
27
30
|
body: JSON.stringify(body),
|
|
28
31
|
signal: AbortSignal.timeout(10_000),
|
|
29
32
|
});
|
|
33
|
+
dbg(`[heartbeat] Response: ${response.status} ${response.statusText}`);
|
|
30
34
|
return response.ok;
|
|
31
35
|
}
|
|
32
|
-
catch {
|
|
36
|
+
catch (err) {
|
|
37
|
+
dbg(`[heartbeat] Error: ${err.message}`);
|
|
33
38
|
return false;
|
|
34
39
|
}
|
|
35
40
|
}
|
package/dist/daemon/index.d.ts
CHANGED
package/dist/daemon/index.js
CHANGED
|
@@ -10,6 +10,8 @@ import { PACKAGE_VERSION } from "../index.js";
|
|
|
10
10
|
import { startHeartbeatLoop, stopHeartbeatLoop, sendBrainOffline, setBrainMeta } from "./heartbeat.js";
|
|
11
11
|
import { PromptListener } from "./prompt-listener.js";
|
|
12
12
|
import { dispatchToCLI, postReply, killActiveChild } from "./dispatcher.js";
|
|
13
|
+
import { setDebugEnabled, dbg } from "./logger.js";
|
|
14
|
+
import { fetchDeviceProfile, getStaticContext, isDeviceProfileLoaded } from "../core/device.js";
|
|
13
15
|
const DEFAULT_PORT = 18686;
|
|
14
16
|
const PID_FILE = "daemon.pid";
|
|
15
17
|
// ── State ──────────────────────────────────────────────────
|
|
@@ -30,7 +32,11 @@ async function processPrompt(config, prompt) {
|
|
|
30
32
|
}
|
|
31
33
|
const preview = prompt.text.length > 40 ? prompt.text.slice(0, 40) + "..." : prompt.text;
|
|
32
34
|
log(`[relay] Prompt: "${preview}" → dispatching to ${activeBackend}...`);
|
|
35
|
+
dbg(`[relay] Prompt ID: ${prompt.id}, full text (${prompt.text.length} chars): ${prompt.text}`);
|
|
36
|
+
dbg(`[relay] Prompt metadata: status=${prompt.status}, edge_id=${prompt.edge_id}, created_at=${prompt.created_at}`);
|
|
33
37
|
const result = await dispatchToCLI(activeBackend, prompt.text, log, activeModel ?? undefined);
|
|
38
|
+
dbg(`[relay] Dispatch result: success=${result.success}, duration=${result.durationMs}ms, text length=${result.text.length}`);
|
|
39
|
+
dbg(`[relay] Reply text: ${result.text.slice(0, 500)}${result.text.length > 500 ? "..." : ""}`);
|
|
34
40
|
const ok = await postReply(config, prompt.id, result.text);
|
|
35
41
|
const dur = (result.durationMs / 1000).toFixed(1);
|
|
36
42
|
if (ok) {
|
|
@@ -50,6 +56,7 @@ async function processQueue(config) {
|
|
|
50
56
|
}
|
|
51
57
|
function onPromptReceived(config, prompt) {
|
|
52
58
|
promptQueue.push(prompt);
|
|
59
|
+
dbg(`[queue] Enqueued prompt ${prompt.id}, queue length: ${promptQueue.length}, processing: ${isProcessing}`);
|
|
53
60
|
if (!isProcessing) {
|
|
54
61
|
processQueue(config);
|
|
55
62
|
}
|
|
@@ -58,6 +65,7 @@ function onPromptReceived(config, prompt) {
|
|
|
58
65
|
function handleInternalAPI(req, res) {
|
|
59
66
|
const url = req.url ?? "";
|
|
60
67
|
if (url === "/internal/backend" && req.method === "POST") {
|
|
68
|
+
dbg(`[api] POST /internal/backend from ${req.socket.remoteAddress}`);
|
|
61
69
|
let body = "";
|
|
62
70
|
const MAX_BODY = 10 * 1024; // 10KB
|
|
63
71
|
req.on("data", (chunk) => {
|
|
@@ -95,6 +103,7 @@ function handleInternalAPI(req, res) {
|
|
|
95
103
|
return true;
|
|
96
104
|
}
|
|
97
105
|
if (url === "/internal/status" && req.method === "GET") {
|
|
106
|
+
dbg(`[api] GET /internal/status`);
|
|
98
107
|
const effectiveModel = activeBackend ? (activeModel ?? DEFAULT_MODELS[activeBackend]) : null;
|
|
99
108
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
100
109
|
res.end(JSON.stringify({
|
|
@@ -146,6 +155,8 @@ export function isAlreadyRunning() {
|
|
|
146
155
|
}
|
|
147
156
|
// ── Main Daemon Entry ──────────────────────────────────────
|
|
148
157
|
export async function startDaemon(options) {
|
|
158
|
+
if (options?.debug)
|
|
159
|
+
setDebugEnabled(true);
|
|
149
160
|
const port = options?.port ?? (parseInt(process.env.ZHIHAND_PORT ?? "", 10) || DEFAULT_PORT);
|
|
150
161
|
// Check if already running
|
|
151
162
|
const existingPid = readPid();
|
|
@@ -169,6 +180,8 @@ export async function startDaemon(options) {
|
|
|
169
180
|
activeModel = backendConfig.model ?? null;
|
|
170
181
|
// Log startup info + set brain meta for heartbeat
|
|
171
182
|
log(`ZhiHand v${PACKAGE_VERSION} starting...`);
|
|
183
|
+
if (options?.debug)
|
|
184
|
+
log(`[config] Debug mode enabled — verbose logging active`);
|
|
172
185
|
if (activeBackend) {
|
|
173
186
|
const effectiveModel = activeModel ?? DEFAULT_MODELS[activeBackend];
|
|
174
187
|
log(`[config] Backend: ${activeBackend}, Model: ${effectiveModel}`);
|
|
@@ -177,6 +190,15 @@ export async function startDaemon(options) {
|
|
|
177
190
|
else {
|
|
178
191
|
log(`[config] No backend configured. Use: zhihand gemini / zhihand claude / zhihand codex`);
|
|
179
192
|
}
|
|
193
|
+
// Fetch device profile (platform, model, screen size) — non-blocking, best-effort
|
|
194
|
+
await fetchDeviceProfile(config);
|
|
195
|
+
if (isDeviceProfileLoaded()) {
|
|
196
|
+
const s = getStaticContext();
|
|
197
|
+
log(`[device] ${s.platform} ${s.model} (${s.osVersion}), ${s.screenWidthPx}x${s.screenHeightPx}, ${s.locale}`);
|
|
198
|
+
}
|
|
199
|
+
else {
|
|
200
|
+
log(`[device] Device profile not available — tool descriptions will use generic defaults`);
|
|
201
|
+
}
|
|
180
202
|
// MCP sessions: each client gets its own McpServer + Transport pair
|
|
181
203
|
// because McpServer.connect() can only be called once per instance
|
|
182
204
|
const MAX_MCP_SESSIONS = 20;
|
|
@@ -207,6 +229,7 @@ export async function startDaemon(options) {
|
|
|
207
229
|
if (req.url === "/mcp" || req.url?.startsWith("/mcp")) {
|
|
208
230
|
try {
|
|
209
231
|
const sessionId = req.headers["mcp-session-id"];
|
|
232
|
+
dbg(`[mcp] ${req.method} /mcp session=${sessionId?.slice(0, 8) ?? "(new)"} sessions=${mcpSessions.size}`);
|
|
210
233
|
if (sessionId && mcpSessions.has(sessionId)) {
|
|
211
234
|
// Existing session
|
|
212
235
|
const session = mcpSessions.get(sessionId);
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Debug logger for ZhiHand daemon.
|
|
3
|
+
*
|
|
4
|
+
* Enable with `zhihand start --debug` to see detailed request/response,
|
|
5
|
+
* CLI spawn args, stdin/stdout data, SSE events, and timing information.
|
|
6
|
+
*/
|
|
7
|
+
export declare function setDebugEnabled(enabled: boolean): void;
|
|
8
|
+
export declare function isDebugEnabled(): boolean;
|
|
9
|
+
/** Debug log — only outputs when --debug is active. */
|
|
10
|
+
export declare function dbg(msg: string): void;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Debug logger for ZhiHand daemon.
|
|
3
|
+
*
|
|
4
|
+
* Enable with `zhihand start --debug` to see detailed request/response,
|
|
5
|
+
* CLI spawn args, stdin/stdout data, SSE events, and timing information.
|
|
6
|
+
*/
|
|
7
|
+
let debugEnabled = false;
|
|
8
|
+
export function setDebugEnabled(enabled) {
|
|
9
|
+
debugEnabled = enabled;
|
|
10
|
+
}
|
|
11
|
+
export function isDebugEnabled() {
|
|
12
|
+
return debugEnabled;
|
|
13
|
+
}
|
|
14
|
+
function ts() {
|
|
15
|
+
return new Date().toLocaleTimeString();
|
|
16
|
+
}
|
|
17
|
+
/** Debug log — only outputs when --debug is active. */
|
|
18
|
+
export function dbg(msg) {
|
|
19
|
+
if (!debugEnabled)
|
|
20
|
+
return;
|
|
21
|
+
process.stdout.write(`[${ts()}] [DEBUG] ${msg}\n`);
|
|
22
|
+
}
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { updateDeviceProfile } from "../core/device.js";
|
|
2
|
+
import { dbg } from "./logger.js";
|
|
1
3
|
const SSE_WATCHDOG_TIMEOUT = 120_000; // 120s no data → reconnect (servers may not send keepalive frequently)
|
|
2
4
|
const SSE_RECONNECT_DELAY = 3_000;
|
|
3
5
|
const POLL_INTERVAL = 2_000;
|
|
@@ -29,9 +31,12 @@ export class PromptListener {
|
|
|
29
31
|
}
|
|
30
32
|
}
|
|
31
33
|
dispatchPrompt(prompt) {
|
|
32
|
-
if (this.processedIds.has(prompt.id))
|
|
34
|
+
if (this.processedIds.has(prompt.id)) {
|
|
35
|
+
dbg(`[prompt] Skipping duplicate prompt: ${prompt.id}`);
|
|
33
36
|
return;
|
|
37
|
+
}
|
|
34
38
|
this.processedIds.add(prompt.id);
|
|
39
|
+
dbg(`[prompt] Dispatching prompt: id=${prompt.id}, status=${prompt.status}, text="${prompt.text.slice(0, 100)}${prompt.text.length > 100 ? "..." : ""}"`);
|
|
35
40
|
// Prevent unbounded growth
|
|
36
41
|
if (this.processedIds.size > 500) {
|
|
37
42
|
const arr = [...this.processedIds];
|
|
@@ -44,6 +49,7 @@ export class PromptListener {
|
|
|
44
49
|
try {
|
|
45
50
|
this.sseAbort = new AbortController();
|
|
46
51
|
const url = `${this.config.controlPlaneEndpoint}/v1/credentials/${encodeURIComponent(this.config.credentialId)}/events/stream?topic=prompts`;
|
|
52
|
+
dbg(`[sse] Connecting to ${url}`);
|
|
47
53
|
const response = await fetch(url, {
|
|
48
54
|
headers: {
|
|
49
55
|
"Accept": "text/event-stream",
|
|
@@ -52,6 +58,7 @@ export class PromptListener {
|
|
|
52
58
|
signal: this.sseAbort.signal,
|
|
53
59
|
});
|
|
54
60
|
if (!response.ok) {
|
|
61
|
+
dbg(`[sse] Connect failed: ${response.status} ${response.statusText}`);
|
|
55
62
|
throw new Error(`SSE connect failed: ${response.status}`);
|
|
56
63
|
}
|
|
57
64
|
this.sseConnected = true;
|
|
@@ -114,6 +121,7 @@ export class PromptListener {
|
|
|
114
121
|
}, SSE_WATCHDOG_TIMEOUT);
|
|
115
122
|
}
|
|
116
123
|
handleSSEEvent(event) {
|
|
124
|
+
dbg(`[sse] Event: kind=${event.kind}, prompt=${event.prompt?.id ?? "-"}, prompts=${event.prompts?.length ?? 0}`);
|
|
117
125
|
if (event.kind === "prompt.queued" && event.prompt) {
|
|
118
126
|
this.dispatchPrompt(event.prompt);
|
|
119
127
|
}
|
|
@@ -124,6 +132,13 @@ export class PromptListener {
|
|
|
124
132
|
}
|
|
125
133
|
}
|
|
126
134
|
}
|
|
135
|
+
else if (event.kind === "device_profile.updated" && event.device_profile) {
|
|
136
|
+
// NOTE: This event may only arrive if the server sends cross-topic events on
|
|
137
|
+
// the prompts stream, or if the API is updated to support multi-topic SSE.
|
|
138
|
+
// If not received, device profile is still fetched at daemon startup.
|
|
139
|
+
this.log("[device] Device profile updated via SSE");
|
|
140
|
+
updateDeviceProfile(event.device_profile);
|
|
141
|
+
}
|
|
127
142
|
}
|
|
128
143
|
startPolling() {
|
|
129
144
|
if (this.pollTimer)
|
|
@@ -152,19 +167,23 @@ export class PromptListener {
|
|
|
152
167
|
async poll() {
|
|
153
168
|
try {
|
|
154
169
|
const url = `${this.config.controlPlaneEndpoint}/v1/credentials/${encodeURIComponent(this.config.credentialId)}/prompts?limit=5`;
|
|
170
|
+
dbg(`[poll] GET ${url}`);
|
|
155
171
|
const response = await fetch(url, {
|
|
156
172
|
headers: { "x-zhihand-controller-token": this.config.controllerToken },
|
|
157
173
|
signal: AbortSignal.timeout(10_000),
|
|
158
174
|
});
|
|
159
|
-
if (!response.ok)
|
|
175
|
+
if (!response.ok) {
|
|
176
|
+
dbg(`[poll] Response: ${response.status}`);
|
|
160
177
|
return;
|
|
178
|
+
}
|
|
161
179
|
const data = (await response.json());
|
|
180
|
+
dbg(`[poll] Got ${data.items?.length ?? 0} prompt(s)`);
|
|
162
181
|
for (const prompt of data.items ?? []) {
|
|
163
182
|
this.dispatchPrompt(prompt);
|
|
164
183
|
}
|
|
165
184
|
}
|
|
166
|
-
catch {
|
|
167
|
-
|
|
185
|
+
catch (err) {
|
|
186
|
+
dbg(`[poll] Error: ${err.message}`);
|
|
168
187
|
}
|
|
169
188
|
}
|
|
170
189
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
-
export declare const PACKAGE_VERSION = "0.
|
|
2
|
+
export declare const PACKAGE_VERSION = "0.26.0";
|
|
3
3
|
export declare function createServer(deviceName?: string): McpServer;
|
|
4
4
|
export declare function startStdioServer(deviceName?: string): Promise<void>;
|
package/dist/index.js
CHANGED
|
@@ -5,29 +5,60 @@ import { controlSchema, screenshotSchema, pairSchema } from "./tools/schemas.js"
|
|
|
5
5
|
import { executeControl } from "./tools/control.js";
|
|
6
6
|
import { handleScreenshot } from "./tools/screenshot.js";
|
|
7
7
|
import { handlePair } from "./tools/pair.js";
|
|
8
|
-
|
|
8
|
+
import { getStaticContext, getDynamicContext, fetchDeviceProfile, buildControlToolDescription, buildScreenshotToolDescription, formatDeviceStatus, } from "./core/device.js";
|
|
9
|
+
export const PACKAGE_VERSION = "0.26.0";
|
|
9
10
|
export function createServer(deviceName) {
|
|
10
11
|
const server = new McpServer({
|
|
11
12
|
name: "zhihand",
|
|
12
13
|
version: PACKAGE_VERSION,
|
|
13
14
|
});
|
|
14
15
|
// zhihand_control — main phone control tool
|
|
15
|
-
|
|
16
|
+
// Description includes device info (platform, model, screen size) when available
|
|
17
|
+
server.tool("zhihand_control", buildControlToolDescription(), controlSchema, async (params) => {
|
|
16
18
|
const config = resolveConfig(deviceName);
|
|
17
19
|
return await executeControl(config, params);
|
|
18
20
|
});
|
|
19
21
|
// zhihand_screenshot — capture current screen without any action
|
|
20
|
-
server.tool("zhihand_screenshot", screenshotSchema, async () => {
|
|
22
|
+
server.tool("zhihand_screenshot", buildScreenshotToolDescription(), screenshotSchema, async () => {
|
|
21
23
|
const config = resolveConfig(deviceName);
|
|
22
24
|
return await handleScreenshot(config);
|
|
23
25
|
});
|
|
26
|
+
// zhihand_status — return device context for LLM to query on demand
|
|
27
|
+
server.tool("zhihand_status", "Get device status: platform, model, OS version, screen size, battery, network, BLE, dark mode, storage, and more.", {}, async () => {
|
|
28
|
+
return {
|
|
29
|
+
content: [{
|
|
30
|
+
type: "text",
|
|
31
|
+
text: JSON.stringify(formatDeviceStatus(), null, 2),
|
|
32
|
+
}],
|
|
33
|
+
};
|
|
34
|
+
});
|
|
24
35
|
// zhihand_pair — device pairing
|
|
25
|
-
server.tool("zhihand_pair", pairSchema, async (params) => {
|
|
36
|
+
server.tool("zhihand_pair", "Pair a new mobile device via QR code.", pairSchema, async (params) => {
|
|
26
37
|
return await handlePair(params);
|
|
27
38
|
});
|
|
39
|
+
// device://profile — MCP resource for device profile
|
|
40
|
+
server.resource("device-profile", "device://profile", { description: "Device static and dynamic context (platform, model, screen, battery, network, etc.)" }, async () => {
|
|
41
|
+
const staticCtx = getStaticContext();
|
|
42
|
+
const dynamicCtx = getDynamicContext();
|
|
43
|
+
return {
|
|
44
|
+
contents: [{
|
|
45
|
+
uri: "device://profile",
|
|
46
|
+
mimeType: "application/json",
|
|
47
|
+
text: JSON.stringify({ ...staticCtx, ...dynamicCtx }, null, 2),
|
|
48
|
+
}],
|
|
49
|
+
};
|
|
50
|
+
});
|
|
28
51
|
return server;
|
|
29
52
|
}
|
|
30
53
|
export async function startStdioServer(deviceName) {
|
|
54
|
+
// Fetch device profile before creating server so tool descriptions have platform info
|
|
55
|
+
try {
|
|
56
|
+
const config = resolveConfig(deviceName);
|
|
57
|
+
await fetchDeviceProfile(config);
|
|
58
|
+
}
|
|
59
|
+
catch {
|
|
60
|
+
// Non-fatal — server will use generic descriptions
|
|
61
|
+
}
|
|
31
62
|
const server = createServer(deviceName);
|
|
32
63
|
const transport = new StdioServerTransport();
|
|
33
64
|
await server.connect(transport);
|