@zhihand/mcp 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/zhihand ADDED
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env node --experimental-strip-types
2
+
3
+ import os from "node:os";
4
+ import { parseArgs } from "node:util";
5
+ import { startStdioServer } from "../src/index.ts";
6
+ import { detectCLITools, formatDetectedTools } from "../src/cli/detect.ts";
7
+ import { detectAndSetupOpenClaw } from "../src/cli/openclaw.ts";
8
+ import { loadDefaultCredential } from "../src/core/config.ts";
9
+ import { executePairing } from "../src/core/pair.ts";
10
+
11
+ const DEFAULT_ENDPOINT = "https://api.zhihand.com";
12
+
13
+ const { positionals, values } = parseArgs({
14
+ allowPositionals: true,
15
+ options: {
16
+ device: { type: "string" },
17
+ http: { type: "boolean", default: false },
18
+ help: { type: "boolean", short: "h", default: false },
19
+ },
20
+ });
21
+
22
+ const command = positionals[0] ?? "serve";
23
+
24
+ if (values.help) {
25
+ console.log(`
26
+ zhihand — MCP Server for phone control
27
+
28
+ Usage:
29
+ zhihand serve Start MCP Server (stdio mode)
30
+ zhihand serve --http Start MCP Server (HTTP mode)
31
+ zhihand pair Pair with a phone device
32
+ zhihand status Show pairing status and device info
33
+ zhihand detect Detect available CLI tools
34
+ zhihand setup Interactive setup: pair + configure
35
+
36
+ Options:
37
+ --device <name> Use a specific paired device
38
+ -h, --help Show this help
39
+ `);
40
+ process.exit(0);
41
+ }
42
+
43
+ switch (command) {
44
+ case "serve": {
45
+ await startStdioServer(values.device ?? process.env.ZHIHAND_DEVICE);
46
+ break;
47
+ }
48
+
49
+ case "pair": {
50
+ const edgeId = `mcp-${Date.now().toString(36)}`;
51
+ const deviceName = values.device ?? `mcp-${os.hostname()}`;
52
+ await executePairing(DEFAULT_ENDPOINT, edgeId, deviceName);
53
+ break;
54
+ }
55
+
56
+ case "status": {
57
+ const cred = loadDefaultCredential();
58
+ if (cred) {
59
+ console.log(`Paired device: ${cred.deviceName}`);
60
+ console.log(`Credential ID: ${cred.credentialId}`);
61
+ console.log(`Endpoint: ${cred.endpoint}`);
62
+ console.log(`Paired at: ${cred.pairedAt}`);
63
+ } else {
64
+ console.log("No paired device. Run: zhihand pair");
65
+ }
66
+ break;
67
+ }
68
+
69
+ case "detect": {
70
+ const tools = await detectCLITools();
71
+ console.log(formatDetectedTools(tools));
72
+ break;
73
+ }
74
+
75
+ case "setup": {
76
+ // 1. Check/create pairing
77
+ let cred = loadDefaultCredential();
78
+ if (!cred) {
79
+ console.log("No paired device found. Starting pairing...\n");
80
+ const edgeId = `mcp-${Date.now().toString(36)}`;
81
+ const deviceName = values.device ?? `mcp-${os.hostname()}`;
82
+ await executePairing(DEFAULT_ENDPOINT, edgeId, deviceName);
83
+ cred = loadDefaultCredential();
84
+ }
85
+ if (cred) {
86
+ console.log(`\nPaired: ${cred.deviceName} (${cred.credentialId})\n`);
87
+ }
88
+
89
+ // 2. Detect CLI tools
90
+ const tools = await detectCLITools();
91
+ console.log(formatDetectedTools(tools));
92
+
93
+ // 3. Setup OpenClaw if present
94
+ await detectAndSetupOpenClaw();
95
+
96
+ console.log("\nSetup complete. Add to your CLI tool's MCP config:");
97
+ console.log(' { "mcpServers": { "zhihand": { "command": "zhihand", "args": ["serve"] } } }');
98
+ break;
99
+ }
100
+
101
+ default:
102
+ console.error(`Unknown command: ${command}. Run 'zhihand --help' for usage.`);
103
+ process.exit(1);
104
+ }
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env node --experimental-strip-types
2
+
3
+ /**
4
+ * OpenClaw Plugin entry point.
5
+ * This script is invoked by OpenClaw when the zhihand plugin is loaded.
6
+ * It bridges the OpenClaw Plugin API to MCP core logic via the adapter.
7
+ */
8
+ import { registerOpenClawTools } from "../src/openclaw.adapter.ts";
9
+
10
+ // OpenClaw injects the plugin API as the default export's argument
11
+ export default function activate(api) {
12
+ registerOpenClawTools(api);
13
+ }
package/package.json ADDED
@@ -0,0 +1,42 @@
1
+ {
2
+ "name": "@zhihand/mcp",
3
+ "version": "0.12.0",
4
+ "private": false,
5
+ "type": "module",
6
+ "description": "ZhiHand MCP Server — phone control tools for Claude Code, Codex, Gemini CLI, and OpenClaw",
7
+ "license": "MIT",
8
+ "homepage": "https://github.com/handgpt/zhihand/tree/main/packages/mcp",
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "git+https://github.com/handgpt/zhihand.git",
12
+ "directory": "packages/mcp"
13
+ },
14
+ "bin": {
15
+ "zhihand": "./bin/zhihand",
16
+ "zhihand.openclaw": "./bin/zhihand.openclaw"
17
+ },
18
+ "exports": {
19
+ ".": "./src/index.ts",
20
+ "./openclaw": "./src/openclaw.adapter.ts"
21
+ },
22
+ "files": [
23
+ "README.md",
24
+ "bin/",
25
+ "src/"
26
+ ],
27
+ "publishConfig": {
28
+ "access": "public"
29
+ },
30
+ "scripts": {
31
+ "start": "node --experimental-strip-types src/index.ts",
32
+ "test": "node --test --experimental-strip-types src/**/*.test.ts"
33
+ },
34
+ "dependencies": {
35
+ "@modelcontextprotocol/sdk": "^1.12.1",
36
+ "qrcode": "^1.5.4",
37
+ "zod": "^3.24.0"
38
+ },
39
+ "engines": {
40
+ "node": ">=22"
41
+ }
42
+ }
@@ -0,0 +1,90 @@
1
+ import { execSync } from "node:child_process";
2
+
3
+ export interface CLITool {
4
+ name: "claudecode" | "codex" | "gemini" | "openclaw";
5
+ command: string;
6
+ version: string;
7
+ loggedIn: boolean;
8
+ priority: number;
9
+ }
10
+
11
+ function tryExec(cmd: string): string | null {
12
+ try {
13
+ return execSync(cmd, { encoding: "utf8", timeout: 5000, stdio: ["pipe", "pipe", "pipe"] }).trim();
14
+ } catch {
15
+ return null;
16
+ }
17
+ }
18
+
19
+ function isCommandAvailable(cmd: string): boolean {
20
+ return tryExec(`which ${cmd}`) !== null;
21
+ }
22
+
23
+ async function detectClaudeCode(): Promise<CLITool | null> {
24
+ if (!isCommandAvailable("claude")) return null;
25
+ const version = tryExec("claude --version") ?? "unknown";
26
+ // Check login: claude has config in ~/.claude/
27
+ const loggedIn = tryExec("ls ~/.claude/settings.json") !== null;
28
+ return { name: "claudecode", command: "claude", version, loggedIn, priority: 1 };
29
+ }
30
+
31
+ async function detectCodex(): Promise<CLITool | null> {
32
+ if (!isCommandAvailable("codex")) return null;
33
+ const version = tryExec("codex --version") ?? "unknown";
34
+ // Check login: OPENAI_API_KEY env var or config
35
+ const loggedIn = !!process.env.OPENAI_API_KEY || tryExec("ls ~/.codex/") !== null;
36
+ return { name: "codex", command: "codex", version, loggedIn, priority: 2 };
37
+ }
38
+
39
+ async function detectGemini(): Promise<CLITool | null> {
40
+ if (!isCommandAvailable("gemini")) return null;
41
+ const version = tryExec("gemini --version") ?? "unknown";
42
+ // Check login: Google Cloud auth
43
+ const loggedIn = tryExec("gemini auth status") !== null;
44
+ return { name: "gemini", command: "gemini", version, loggedIn, priority: 3 };
45
+ }
46
+
47
+ async function detectOpenClaw(): Promise<CLITool | null> {
48
+ if (!isCommandAvailable("openclaw")) return null;
49
+ const version = tryExec("openclaw --version") ?? "unknown";
50
+ const loggedIn = tryExec("ls ~/.openclaw/openclaw.json") !== null;
51
+ return { name: "openclaw", command: "openclaw", version, loggedIn, priority: 4 };
52
+ }
53
+
54
+ export async function detectCLITools(): Promise<CLITool[]> {
55
+ const results = await Promise.allSettled([
56
+ detectClaudeCode(),
57
+ detectCodex(),
58
+ detectGemini(),
59
+ detectOpenClaw(),
60
+ ]);
61
+
62
+ return results
63
+ .filter((r): r is PromiseFulfilledResult<CLITool | null> => r.status === "fulfilled")
64
+ .map((r) => r.value)
65
+ .filter((t): t is CLITool => t !== null)
66
+ .sort((a, b) => a.priority - b.priority);
67
+ }
68
+
69
+ export async function detectBestCLI(): Promise<CLITool | null> {
70
+ const cliOverride = process.env.ZHIHAND_CLI;
71
+ const tools = await detectCLITools();
72
+
73
+ if (cliOverride) {
74
+ const match = tools.find((t) => t.name === cliOverride || t.command === cliOverride);
75
+ if (match) return match;
76
+ }
77
+
78
+ // Return best available tool (logged in + highest priority)
79
+ return tools.find((t) => t.loggedIn) ?? tools[0] ?? null;
80
+ }
81
+
82
+ export function formatDetectedTools(tools: CLITool[]): string {
83
+ if (tools.length === 0) return "No CLI tools detected.";
84
+ return [
85
+ "Detected CLI tools:",
86
+ ...tools.map((t) =>
87
+ ` ${t.loggedIn ? "✓" : "✗"} ${t.name} (${t.command} ${t.version})${t.loggedIn ? "" : " — not logged in"}`
88
+ ),
89
+ ].join("\n");
90
+ }
@@ -0,0 +1,50 @@
1
+ import { execSync } from "node:child_process";
2
+
3
+ function tryExec(cmd: string): string | null {
4
+ try {
5
+ return execSync(cmd, { encoding: "utf8", timeout: 30_000, stdio: ["pipe", "pipe", "pipe"] }).trim();
6
+ } catch {
7
+ return null;
8
+ }
9
+ }
10
+
11
+ function isCommandAvailable(cmd: string): boolean {
12
+ return tryExec(`which ${cmd}`) !== null;
13
+ }
14
+
15
+ export async function isZhiHandPluginInstalled(): Promise<boolean> {
16
+ const output = tryExec("openclaw plugins list");
17
+ if (!output) return false;
18
+ return output.includes("zhihand") || output.includes("@zhihand/mcp");
19
+ }
20
+
21
+ export async function installZhiHandPlugin(
22
+ options: { timeoutMs?: number; autoConfirm?: boolean } = {}
23
+ ): Promise<boolean> {
24
+ const timeout = options.timeoutMs ?? 30_000;
25
+ try {
26
+ execSync("openclaw plugins install @zhihand/mcp", {
27
+ encoding: "utf8",
28
+ timeout,
29
+ stdio: options.autoConfirm ? ["pipe", "pipe", "pipe"] : "inherit",
30
+ });
31
+ return true;
32
+ } catch {
33
+ return false;
34
+ }
35
+ }
36
+
37
+ export async function detectAndSetupOpenClaw(): Promise<void> {
38
+ if (!isCommandAvailable("openclaw")) return;
39
+
40
+ const pluginInstalled = await isZhiHandPluginInstalled();
41
+ if (pluginInstalled) return;
42
+
43
+ process.stderr.write("[zhihand] Detected OpenClaw without ZhiHand plugin. Installing...\n");
44
+ const success = await installZhiHandPlugin({ timeoutMs: 30_000, autoConfirm: true });
45
+ if (success) {
46
+ process.stderr.write("[zhihand] ZhiHand plugin installed to OpenClaw.\n");
47
+ } else {
48
+ process.stderr.write("[zhihand] Failed to install ZhiHand plugin to OpenClaw.\n");
49
+ }
50
+ }
@@ -0,0 +1,34 @@
1
+ import { execSync } from "node:child_process";
2
+ import type { CLITool } from "./detect.ts";
3
+
4
+ function shellEscape(s: string): string {
5
+ return `'${s.replace(/'/g, "'\\''")}'`;
6
+ }
7
+
8
+ export async function spawnCLITask(tool: CLITool, prompt: string): Promise<string> {
9
+ const escaped = shellEscape(prompt);
10
+ switch (tool.name) {
11
+ case "claudecode":
12
+ return execSync(`${tool.command} -p ${escaped} --output-format json`, {
13
+ encoding: "utf8",
14
+ timeout: 300_000,
15
+ });
16
+ case "codex":
17
+ return execSync(`${tool.command} -q ${escaped} --json`, {
18
+ encoding: "utf8",
19
+ timeout: 300_000,
20
+ });
21
+ case "gemini":
22
+ return execSync(`${tool.command} -p ${escaped}`, {
23
+ encoding: "utf8",
24
+ timeout: 300_000,
25
+ });
26
+ case "openclaw":
27
+ return execSync(`${tool.command} run ${escaped}`, {
28
+ encoding: "utf8",
29
+ timeout: 300_000,
30
+ });
31
+ default:
32
+ throw new Error(`Unsupported CLI tool: ${tool.name}`);
33
+ }
34
+ }
@@ -0,0 +1,144 @@
1
+ import type { ZhiHandConfig } from "./config.ts";
2
+
3
+ export type ScrollDirection = "up" | "down" | "left" | "right";
4
+ export type ClipboardAction = "get" | "set";
5
+
6
+ export interface ControlParams {
7
+ action: string;
8
+ xRatio?: number;
9
+ yRatio?: number;
10
+ text?: string;
11
+ direction?: ScrollDirection;
12
+ amount?: number;
13
+ keys?: string;
14
+ clipboardAction?: ClipboardAction;
15
+ durationMs?: number;
16
+ startXRatio?: number;
17
+ startYRatio?: number;
18
+ endXRatio?: number;
19
+ endYRatio?: number;
20
+ }
21
+
22
+ export interface QueuedControlCommand {
23
+ type: string;
24
+ payload?: Record<string, unknown>;
25
+ messageId?: number;
26
+ }
27
+
28
+ export interface QueuedCommandRecord {
29
+ id: string;
30
+ credential_id: string;
31
+ status: string;
32
+ command: QueuedControlCommand;
33
+ created_at: string;
34
+ acked_at?: string;
35
+ ack_status?: string;
36
+ ack_result?: Record<string, unknown>;
37
+ }
38
+
39
+ export interface WaitForCommandAckResult {
40
+ acked: boolean;
41
+ command?: QueuedCommandRecord;
42
+ }
43
+
44
+ let messageCounter = 0;
45
+
46
+ function nextMessageId(): number {
47
+ messageCounter = (messageCounter + 1) % 1000;
48
+ return (Date.now() * 1000) + messageCounter;
49
+ }
50
+
51
+ export function createControlCommand(params: ControlParams): QueuedControlCommand {
52
+ switch (params.action) {
53
+ case "click":
54
+ return { type: "receive_click", payload: { x: params.xRatio, y: params.yRatio } };
55
+ case "doubleclick":
56
+ return { type: "receive_doubleclick", payload: { x: params.xRatio, y: params.yRatio } };
57
+ case "rightclick":
58
+ return { type: "receive_rightclick", payload: { x: params.xRatio, y: params.yRatio } };
59
+ case "middleclick":
60
+ return { type: "receive_middleclick", payload: { x: params.xRatio, y: params.yRatio } };
61
+ case "type":
62
+ return { type: "receive_type", payload: { text: params.text } };
63
+ case "swipe":
64
+ return {
65
+ type: "receive_swipe",
66
+ payload: {
67
+ startX: params.startXRatio,
68
+ startY: params.startYRatio,
69
+ endX: params.endXRatio,
70
+ endY: params.endYRatio,
71
+ },
72
+ };
73
+ case "scroll":
74
+ return {
75
+ type: "receive_scroll",
76
+ payload: {
77
+ x: params.xRatio,
78
+ y: params.yRatio,
79
+ direction: params.direction,
80
+ amount: params.amount ?? 3,
81
+ },
82
+ };
83
+ case "keycombo":
84
+ return { type: "receive_keycombo", payload: { keys: params.keys } };
85
+ case "clipboard":
86
+ return {
87
+ type: "receive_clipboard",
88
+ payload: { action: params.clipboardAction, text: params.text },
89
+ };
90
+ case "screenshot":
91
+ return { type: "receive_screenshot", payload: {} };
92
+ default:
93
+ throw new Error(`Unsupported action: ${params.action}`);
94
+ }
95
+ }
96
+
97
+ export async function enqueueCommand(
98
+ config: ZhiHandConfig,
99
+ command: QueuedControlCommand
100
+ ): Promise<QueuedCommandRecord> {
101
+ const response = await fetch(
102
+ `${config.controlPlaneEndpoint}/v1/credentials/${encodeURIComponent(config.credentialId)}/commands`,
103
+ {
104
+ method: "POST",
105
+ headers: {
106
+ "Content-Type": "application/json",
107
+ "x-zhihand-controller-token": config.controllerToken,
108
+ },
109
+ body: JSON.stringify({
110
+ command: { ...command, message_id: command.messageId ?? nextMessageId() },
111
+ }),
112
+ }
113
+ );
114
+ if (!response.ok) {
115
+ throw new Error(`Enqueue command failed: ${response.status}`);
116
+ }
117
+ const payload = (await response.json()) as { command: QueuedCommandRecord };
118
+ return payload.command;
119
+ }
120
+
121
+ export async function getCommand(
122
+ config: ZhiHandConfig,
123
+ commandId: string
124
+ ): Promise<QueuedCommandRecord> {
125
+ const response = await fetch(
126
+ `${config.controlPlaneEndpoint}/v1/credentials/${encodeURIComponent(config.credentialId)}/commands/${encodeURIComponent(commandId)}`,
127
+ {
128
+ headers: { "x-zhihand-controller-token": config.controllerToken },
129
+ }
130
+ );
131
+ if (!response.ok) {
132
+ throw new Error(`Get command failed: ${response.status}`);
133
+ }
134
+ const payload = (await response.json()) as { command: QueuedCommandRecord };
135
+ return payload.command;
136
+ }
137
+
138
+ export function formatAckSummary(action: string, result: WaitForCommandAckResult): string {
139
+ if (!result.acked) {
140
+ return `Sent ${action}, waiting for ACK (timed out).`;
141
+ }
142
+ const ackStatus = result.command?.ack_status ?? "ok";
143
+ return `Sent ${action}. ACK: ${ackStatus}`;
144
+ }
@@ -0,0 +1,91 @@
1
+ import fs from "node:fs";
2
+ import path from "node:path";
3
+ import os from "node:os";
4
+
5
+ export interface DeviceCredential {
6
+ credentialId: string;
7
+ controllerToken: string;
8
+ endpoint: string;
9
+ deviceName?: string;
10
+ pairedAt?: string;
11
+ }
12
+
13
+ export interface CredentialStore {
14
+ default: string;
15
+ devices: Record<string, DeviceCredential>;
16
+ }
17
+
18
+ export interface ZhiHandConfig {
19
+ controlPlaneEndpoint: string;
20
+ credentialId: string;
21
+ controllerToken: string;
22
+ edgeId?: string;
23
+ timeoutMs?: number;
24
+ }
25
+
26
+ const ZHIHAND_DIR = path.join(os.homedir(), ".zhihand");
27
+ const CREDENTIALS_PATH = path.join(ZHIHAND_DIR, "credentials.json");
28
+ const STATE_PATH = path.join(ZHIHAND_DIR, "state.json");
29
+
30
+ export function resolveZhiHandDir(): string {
31
+ return ZHIHAND_DIR;
32
+ }
33
+
34
+ export function ensureZhiHandDir(): void {
35
+ fs.mkdirSync(ZHIHAND_DIR, { recursive: true });
36
+ }
37
+
38
+ export function loadCredentialStore(): CredentialStore | null {
39
+ if (!fs.existsSync(CREDENTIALS_PATH)) return null;
40
+ try {
41
+ return JSON.parse(fs.readFileSync(CREDENTIALS_PATH, "utf8")) as CredentialStore;
42
+ } catch {
43
+ return null;
44
+ }
45
+ }
46
+
47
+ export function loadDefaultCredential(): DeviceCredential | null {
48
+ const store = loadCredentialStore();
49
+ if (!store) return null;
50
+ return store.devices[store.default] ?? null;
51
+ }
52
+
53
+ export function saveCredential(name: string, cred: DeviceCredential, setDefault: boolean = true): void {
54
+ ensureZhiHandDir();
55
+ let store = loadCredentialStore() ?? { default: name, devices: {} };
56
+ store.devices[name] = cred;
57
+ if (setDefault) store.default = name;
58
+ fs.writeFileSync(CREDENTIALS_PATH, JSON.stringify(store, null, 2));
59
+ }
60
+
61
+ export function resolveConfig(deviceName?: string): ZhiHandConfig {
62
+ const store = loadCredentialStore();
63
+ if (!store) {
64
+ throw new Error("No ZhiHand credentials found. Run 'zhihand pair' first.");
65
+ }
66
+ const name = deviceName ?? store.default;
67
+ const cred = store.devices[name];
68
+ if (!cred) {
69
+ throw new Error(`Device '${name}' not found. Available: ${Object.keys(store.devices).join(", ")}`);
70
+ }
71
+ return {
72
+ controlPlaneEndpoint: cred.endpoint,
73
+ credentialId: cred.credentialId,
74
+ controllerToken: cred.controllerToken,
75
+ timeoutMs: 10_000,
76
+ };
77
+ }
78
+
79
+ export function loadState<T = unknown>(): T | null {
80
+ if (!fs.existsSync(STATE_PATH)) return null;
81
+ try {
82
+ return JSON.parse(fs.readFileSync(STATE_PATH, "utf8")) as T;
83
+ } catch {
84
+ return null;
85
+ }
86
+ }
87
+
88
+ export function saveState(state: unknown): void {
89
+ ensureZhiHandDir();
90
+ fs.writeFileSync(STATE_PATH, JSON.stringify(state, null, 2));
91
+ }
@@ -0,0 +1,143 @@
1
+ import QRCode from "qrcode";
2
+ import type { ZhiHandConfig, DeviceCredential } from "./config.ts";
3
+ import { saveCredential, loadDefaultCredential, ensureZhiHandDir, saveState } from "./config.ts";
4
+
5
+ export interface PairingSession {
6
+ id: string;
7
+ pair_url: string;
8
+ qr_payload: string;
9
+ controller_token?: string;
10
+ edge_id: string;
11
+ status: "pending" | "claimed" | "expired" | string;
12
+ credential_id?: string;
13
+ expires_at: string;
14
+ requested_scopes?: string[];
15
+ }
16
+
17
+ export interface CreatePairingOptions {
18
+ edgeId: string;
19
+ ttlSeconds?: number;
20
+ requestedScopes?: string[];
21
+ }
22
+
23
+ const DEFAULT_SCOPES = [
24
+ "observe",
25
+ "session.control",
26
+ "screen.read",
27
+ "screen.capture",
28
+ "ble.control",
29
+ ];
30
+
31
+ export async function createPairingSession(
32
+ endpoint: string,
33
+ options: CreatePairingOptions
34
+ ): Promise<PairingSession> {
35
+ const response = await fetch(`${endpoint}/v1/pairing/sessions`, {
36
+ method: "POST",
37
+ headers: { "Content-Type": "application/json" },
38
+ body: JSON.stringify({
39
+ edge_id: options.edgeId,
40
+ ttl_seconds: options.ttlSeconds ?? 600,
41
+ requested_scopes: options.requestedScopes ?? DEFAULT_SCOPES,
42
+ }),
43
+ });
44
+ if (!response.ok) {
45
+ throw new Error(`Create pairing session failed: ${response.status}`);
46
+ }
47
+ const payload = (await response.json()) as { session: PairingSession; controller_token?: string };
48
+ return {
49
+ ...payload.session,
50
+ controller_token: payload.controller_token ?? payload.session.controller_token,
51
+ };
52
+ }
53
+
54
+ export async function getPairingSession(
55
+ endpoint: string,
56
+ sessionId: string
57
+ ): Promise<PairingSession> {
58
+ const response = await fetch(
59
+ `${endpoint}/v1/pairing/sessions/${encodeURIComponent(sessionId)}`
60
+ );
61
+ if (!response.ok) {
62
+ throw new Error(`Get pairing session failed: ${response.status}`);
63
+ }
64
+ const payload = (await response.json()) as { session: PairingSession };
65
+ return payload.session;
66
+ }
67
+
68
+ export async function waitForPairingClaim(
69
+ endpoint: string,
70
+ sessionId: string,
71
+ timeoutMs: number = 600_000
72
+ ): Promise<PairingSession> {
73
+ const deadline = Date.now() + timeoutMs;
74
+ while (Date.now() < deadline) {
75
+ const session = await getPairingSession(endpoint, sessionId);
76
+ if (session.status === "claimed" && session.credential_id) {
77
+ return session;
78
+ }
79
+ if (session.status === "expired") {
80
+ throw new Error("Pairing session expired.");
81
+ }
82
+ await new Promise((r) => setTimeout(r, 2000));
83
+ }
84
+ throw new Error("Pairing timeout.");
85
+ }
86
+
87
+ export async function renderPairingQRCode(url: string): Promise<string> {
88
+ return QRCode.toString(url, { type: "utf8", margin: 1 });
89
+ }
90
+
91
+ export async function executePairing(
92
+ endpoint: string,
93
+ edgeId: string,
94
+ deviceName?: string
95
+ ): Promise<{ session: PairingSession; credential: DeviceCredential }> {
96
+ const session = await createPairingSession(endpoint, { edgeId });
97
+
98
+ // Save pending state
99
+ saveState({
100
+ sessionId: session.id,
101
+ controllerToken: session.controller_token,
102
+ edgeId: session.edge_id,
103
+ pairUrl: session.pair_url,
104
+ status: "pending",
105
+ expiresAt: session.expires_at,
106
+ });
107
+
108
+ // Wait for phone to scan
109
+ const claimed = await waitForPairingClaim(endpoint, session.id);
110
+
111
+ const credential: DeviceCredential = {
112
+ credentialId: claimed.credential_id!,
113
+ controllerToken: claimed.controller_token ?? session.controller_token!,
114
+ endpoint,
115
+ deviceName: deviceName ?? `device_${Date.now()}`,
116
+ pairedAt: new Date().toISOString(),
117
+ };
118
+
119
+ const name = deviceName ?? credential.deviceName!;
120
+ saveCredential(name, credential, true);
121
+
122
+ // Update state
123
+ saveState({
124
+ sessionId: session.id,
125
+ controllerToken: credential.controllerToken,
126
+ edgeId: session.edge_id,
127
+ credentialId: credential.credentialId,
128
+ pairUrl: session.pair_url,
129
+ status: "claimed",
130
+ });
131
+
132
+ return { session: claimed, credential };
133
+ }
134
+
135
+ export function formatPairingStatus(cred: DeviceCredential | null): string {
136
+ if (!cred) return "Not paired. Run 'zhihand pair' to connect a device.";
137
+ return [
138
+ `Paired to: ${cred.deviceName ?? "unknown device"}`,
139
+ `Endpoint: ${cred.endpoint}`,
140
+ `Credential: ${cred.credentialId}`,
141
+ `Paired at: ${cred.pairedAt ?? "unknown"}`,
142
+ ].join("\n");
143
+ }
@@ -0,0 +1,28 @@
1
+ import type { ZhiHandConfig } from "./config.ts";
2
+
3
+ export async function fetchScreenshotBinary(config: ZhiHandConfig): Promise<Buffer> {
4
+ const controller = new AbortController();
5
+ const timeout = setTimeout(() => controller.abort(), config.timeoutMs ?? 10_000);
6
+
7
+ try {
8
+ const response = await fetch(
9
+ `${config.controlPlaneEndpoint}/v1/credentials/${encodeURIComponent(config.credentialId)}/screen`,
10
+ {
11
+ method: "GET",
12
+ headers: {
13
+ "x-zhihand-controller-token": config.controllerToken,
14
+ "Accept": "image/jpeg",
15
+ },
16
+ signal: controller.signal,
17
+ }
18
+ );
19
+
20
+ if (!response.ok) {
21
+ throw new Error(`Screenshot fetch failed: ${response.status}`);
22
+ }
23
+
24
+ return Buffer.from(await response.arrayBuffer());
25
+ } finally {
26
+ clearTimeout(timeout);
27
+ }
28
+ }
@@ -0,0 +1,88 @@
1
+ import type { ZhiHandConfig } from "./config.ts";
2
+ import type { QueuedCommandRecord, WaitForCommandAckResult } from "./command.ts";
3
+ import { getCommand } from "./command.ts";
4
+
5
+ export interface SSEEvent {
6
+ id: string;
7
+ topic: string;
8
+ kind: string;
9
+ credential_id: string;
10
+ command?: QueuedCommandRecord;
11
+ sequence: number;
12
+ }
13
+
14
+ // Per-commandId callback registry for SSE-based ACK
15
+ const ackCallbacks = new Map<string, (command: QueuedCommandRecord) => void>();
16
+
17
+ export function handleSSEEvent(event: SSEEvent): void {
18
+ if (event.kind === "command.acked" && event.command) {
19
+ const callback = ackCallbacks.get(event.command.id);
20
+ if (callback) {
21
+ callback(event.command);
22
+ ackCallbacks.delete(event.command.id);
23
+ }
24
+ }
25
+ }
26
+
27
+ export function subscribeToCommandAck(
28
+ commandId: string,
29
+ callback: (cmd: QueuedCommandRecord) => void
30
+ ): () => void {
31
+ ackCallbacks.set(commandId, callback);
32
+ return () => { ackCallbacks.delete(commandId); };
33
+ }
34
+
35
+ /**
36
+ * Wait for command ACK via SSE push.
37
+ * Falls back to polling if SSE is not active.
38
+ */
39
+ export async function waitForCommandAck(
40
+ config: ZhiHandConfig,
41
+ options: { commandId: string; timeoutMs?: number; signal?: AbortSignal }
42
+ ): Promise<WaitForCommandAckResult> {
43
+ const timeoutMs = options.timeoutMs ?? 15_000;
44
+
45
+ // Try SSE-based ACK first (if callbacks are being dispatched by an active SSE stream)
46
+ return new Promise<WaitForCommandAckResult>((resolve, reject) => {
47
+ let resolved = false;
48
+ let pollInterval: ReturnType<typeof setInterval> | undefined;
49
+
50
+ const timeout = setTimeout(() => {
51
+ cleanup();
52
+ resolve({ acked: false });
53
+ }, timeoutMs);
54
+
55
+ const unsubscribe = subscribeToCommandAck(options.commandId, (ackedCommand) => {
56
+ if (resolved) return;
57
+ resolved = true;
58
+ cleanup();
59
+ resolve({ acked: true, command: ackedCommand });
60
+ });
61
+
62
+ // Also poll as fallback (SSE may not be active)
63
+ pollInterval = setInterval(async () => {
64
+ if (resolved) return;
65
+ try {
66
+ const cmd = await getCommand(config, options.commandId);
67
+ if (cmd.acked_at) {
68
+ resolved = true;
69
+ cleanup();
70
+ resolve({ acked: true, command: cmd });
71
+ }
72
+ } catch {
73
+ // Polling failure is non-fatal; SSE or next poll may succeed
74
+ }
75
+ }, 500);
76
+
77
+ options.signal?.addEventListener("abort", () => {
78
+ cleanup();
79
+ reject(new Error("The operation was aborted"));
80
+ }, { once: true });
81
+
82
+ function cleanup() {
83
+ clearTimeout(timeout);
84
+ unsubscribe();
85
+ if (pollInterval) clearInterval(pollInterval);
86
+ }
87
+ });
88
+ }
package/src/index.ts ADDED
@@ -0,0 +1,53 @@
1
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
+ import { z } from "zod";
4
+
5
+ import { resolveConfig } from "./core/config.ts";
6
+ import { controlSchema, screenshotSchema, pairSchema } from "./tools/schemas.ts";
7
+ import { executeControl } from "./tools/control.ts";
8
+ import { handleScreenshot } from "./tools/screenshot.ts";
9
+ import { handlePair } from "./tools/pair.ts";
10
+
11
+ const PACKAGE_VERSION = "0.11.0";
12
+
13
+ export function createServer(deviceName?: string): McpServer {
14
+ const server = new McpServer({
15
+ name: "zhihand",
16
+ version: PACKAGE_VERSION,
17
+ });
18
+
19
+ // zhihand_control — main phone control tool
20
+ server.tool("zhihand_control", controlSchema, async (params) => {
21
+ const config = resolveConfig(deviceName);
22
+ return await executeControl(config, params);
23
+ });
24
+
25
+ // zhihand_screenshot — capture current screen without any action
26
+ server.tool("zhihand_screenshot", screenshotSchema, async () => {
27
+ const config = resolveConfig(deviceName);
28
+ return await handleScreenshot(config);
29
+ });
30
+
31
+ // zhihand_pair — device pairing
32
+ server.tool("zhihand_pair", pairSchema, async (params) => {
33
+ return await handlePair(params);
34
+ });
35
+
36
+ return server;
37
+ }
38
+
39
+ export async function startStdioServer(deviceName?: string): Promise<void> {
40
+ const server = createServer(deviceName);
41
+ const transport = new StdioServerTransport();
42
+ await server.connect(transport);
43
+ }
44
+
45
+ // Direct execution: start stdio server
46
+ const isDirectRun = process.argv[1]?.endsWith("index.ts") || process.argv[1]?.endsWith("index.js");
47
+ if (isDirectRun) {
48
+ const deviceArg = process.argv.find((a) => a.startsWith("--device="))?.split("=")[1];
49
+ startStdioServer(deviceArg ?? process.env.ZHIHAND_DEVICE).catch((err) => {
50
+ process.stderr.write(`ZhiHand MCP Server failed: ${err.message}\n`);
51
+ process.exit(1);
52
+ });
53
+ }
@@ -0,0 +1,116 @@
1
+ /**
2
+ * OpenClaw Plugin adapter — thin wrapper that bridges OpenClaw Plugin API
3
+ * to MCP core logic. All business logic lives in core/ and tools/.
4
+ */
5
+ import { resolveConfig } from "./core/config.ts";
6
+ import { executeControl } from "./tools/control.ts";
7
+ import { handleScreenshot } from "./tools/screenshot.ts";
8
+ import { handlePair } from "./tools/pair.ts";
9
+ import { detectCLITools, formatDetectedTools } from "./cli/detect.ts";
10
+ import { controlSchema, screenshotSchema, pairSchema } from "./tools/schemas.ts";
11
+
12
+ type OpenClawLogger = {
13
+ info?: (message: string) => void;
14
+ warn?: (message: string) => void;
15
+ error?: (message: string) => void;
16
+ };
17
+
18
+ type OpenClawRuntime = {
19
+ state: { resolveStateDir: () => string };
20
+ stt?: { transcribeAudioFile: (input: { path: string }) => Promise<{ text?: string } | string> };
21
+ };
22
+
23
+ type OpenClawToolRegistration = {
24
+ name: string;
25
+ label: string;
26
+ description: string;
27
+ parameters: Record<string, unknown>;
28
+ execute: (id: string, params: Record<string, unknown>) => Promise<Record<string, unknown>>;
29
+ };
30
+
31
+ type OpenClawPluginApi = {
32
+ logger: OpenClawLogger;
33
+ runtime: OpenClawRuntime;
34
+ pluginConfig?: Record<string, unknown>;
35
+ registerService: (service: { id: string; start: () => Promise<void>; stop: () => Promise<void> }) => void;
36
+ registerCommand: (command: {
37
+ name: string;
38
+ description: string;
39
+ acceptsArgs?: boolean;
40
+ handler: (ctx: { args?: string }) => Promise<{ text: string }>;
41
+ }) => void;
42
+ registerTool: (tool: OpenClawToolRegistration, options?: { optional?: boolean }) => void;
43
+ };
44
+
45
+ function zodSchemaToJsonSchema(zodShape: Record<string, unknown>): Record<string, unknown> {
46
+ // Simplified conversion — OpenClaw uses JSON Schema-like parameter objects.
47
+ // The actual Zod schemas are used for validation inside tool handlers.
48
+ const properties: Record<string, unknown> = {};
49
+ for (const [key, value] of Object.entries(zodShape)) {
50
+ const v = value as { description?: string; _def?: { typeName?: string } };
51
+ properties[key] = {
52
+ type: "string",
53
+ description: v.description ?? key,
54
+ };
55
+ }
56
+ return { type: "object", properties };
57
+ }
58
+
59
+ export function registerOpenClawTools(api: OpenClawPluginApi, deviceName?: string): void {
60
+ const log = (msg: string) => api.logger.info?.(msg);
61
+
62
+ // zhihand_control
63
+ api.registerTool({
64
+ name: "zhihand_control",
65
+ label: "ZhiHand Control",
66
+ description: "Control a paired phone: tap, swipe, type, scroll, screenshot, and more.",
67
+ parameters: zodSchemaToJsonSchema(controlSchema),
68
+ execute: async (_id, params) => {
69
+ const config = resolveConfig(deviceName);
70
+ const result = await executeControl(config, params as Parameters<typeof executeControl>[1]);
71
+ return result as unknown as Record<string, unknown>;
72
+ },
73
+ });
74
+
75
+ // zhihand_screenshot
76
+ api.registerTool({
77
+ name: "zhihand_screenshot",
78
+ label: "ZhiHand Screenshot",
79
+ description: "Capture current phone screen without performing any action.",
80
+ parameters: zodSchemaToJsonSchema(screenshotSchema),
81
+ execute: async (_id, _params) => {
82
+ const config = resolveConfig(deviceName);
83
+ const result = await handleScreenshot(config);
84
+ return result as unknown as Record<string, unknown>;
85
+ },
86
+ });
87
+
88
+ // zhihand_pair
89
+ api.registerTool(
90
+ {
91
+ name: "zhihand_pair",
92
+ label: "ZhiHand Pair",
93
+ description: "Pair with a phone. Returns QR code and pairing URL.",
94
+ parameters: zodSchemaToJsonSchema(pairSchema),
95
+ execute: async (_id, params) => {
96
+ const result = await handlePair(params as { forceNew?: boolean });
97
+ return result as unknown as Record<string, unknown>;
98
+ },
99
+ },
100
+ { optional: true },
101
+ );
102
+
103
+ // detect command
104
+ api.registerCommand({
105
+ name: "zhihand-detect",
106
+ description: "Detect available CLI tools (Claude Code, Codex, Gemini, OpenClaw)",
107
+ handler: async () => {
108
+ const tools = await detectCLITools();
109
+ return { text: formatDetectedTools(tools) };
110
+ },
111
+ });
112
+
113
+ log("[zhihand] OpenClaw tools registered via MCP core adapter");
114
+ }
115
+
116
+ export default registerOpenClawTools;
@@ -0,0 +1,66 @@
1
+ import type { ZhiHandConfig } from "../core/config.ts";
2
+ import { createControlCommand, enqueueCommand, formatAckSummary } from "../core/command.ts";
3
+ import type { ControlParams } from "../core/command.ts";
4
+ import { fetchScreenshotBinary } from "../core/screenshot.ts";
5
+ import { waitForCommandAck } from "../core/sse.ts";
6
+
7
+ function sleep(ms: number): Promise<void> {
8
+ return new Promise((r) => setTimeout(r, ms));
9
+ }
10
+
11
+ export async function executeControl(
12
+ config: ZhiHandConfig,
13
+ params: ControlParams
14
+ ): Promise<{ content: Array<{ type: string; text?: string; data?: string; mimeType?: string }> }> {
15
+ // wait: Plugin-local implementation, no server round-trip
16
+ if (params.action === "wait") {
17
+ await sleep(params.durationMs ?? 1000);
18
+ const screenshot = await fetchScreenshotBinary(config);
19
+ return {
20
+ content: [
21
+ { type: "text", text: `Waited ${params.durationMs ?? 1000}ms` },
22
+ { type: "image", data: screenshot.toString("base64"), mimeType: "image/jpeg" },
23
+ ],
24
+ };
25
+ }
26
+
27
+ // screenshot: send receive_screenshot, App captures immediately (no 2s delay)
28
+ if (params.action === "screenshot") {
29
+ return await executeScreenshot(config);
30
+ }
31
+
32
+ // HID operations: enqueue → ACK → GET screenshot
33
+ const command = createControlCommand(params);
34
+ const queued = await enqueueCommand(config, command);
35
+ const ack = await waitForCommandAck(config, { commandId: queued.id, timeoutMs: 15_000 });
36
+
37
+ const content: Array<{ type: string; text?: string; data?: string; mimeType?: string }> = [
38
+ { type: "text", text: formatAckSummary(params.action, ack) },
39
+ ];
40
+
41
+ if (ack.acked) {
42
+ try {
43
+ const screenshot = await fetchScreenshotBinary(config);
44
+ content.push({ type: "image", data: screenshot.toString("base64"), mimeType: "image/jpeg" });
45
+ } catch {
46
+ // Screenshot is best-effort after ACK
47
+ }
48
+ }
49
+
50
+ return { content };
51
+ }
52
+
53
+ export async function executeScreenshot(
54
+ config: ZhiHandConfig
55
+ ): Promise<{ content: Array<{ type: string; text?: string; data?: string; mimeType?: string }> }> {
56
+ const command = createControlCommand({ action: "screenshot" });
57
+ const queued = await enqueueCommand(config, command);
58
+ const ack = await waitForCommandAck(config, { commandId: queued.id, timeoutMs: 5_000 });
59
+ const screenshot = await fetchScreenshotBinary(config);
60
+ return {
61
+ content: [
62
+ { type: "text", text: `Screenshot captured (acked: ${ack.acked})` },
63
+ { type: "image", data: screenshot.toString("base64"), mimeType: "image/jpeg" },
64
+ ],
65
+ };
66
+ }
@@ -0,0 +1,58 @@
1
+ import { loadDefaultCredential } from "../core/config.ts";
2
+ import {
3
+ createPairingSession,
4
+ renderPairingQRCode,
5
+ formatPairingStatus,
6
+ } from "../core/pair.ts";
7
+
8
+ const DEFAULT_ENDPOINT = "https://api.zhihand.com";
9
+ const DEFAULT_EDGE_ID_PREFIX = "mcp-";
10
+
11
+ function generateEdgeId(): string {
12
+ return `${DEFAULT_EDGE_ID_PREFIX}${Date.now().toString(36)}`;
13
+ }
14
+
15
+ export async function handlePair(
16
+ params: { forceNew?: boolean },
17
+ endpoint?: string
18
+ ): Promise<{ content: Array<{ type: string; text?: string }> }> {
19
+ const resolvedEndpoint = endpoint ?? DEFAULT_ENDPOINT;
20
+
21
+ // Check existing credential
22
+ if (!params.forceNew) {
23
+ const existing = loadDefaultCredential();
24
+ if (existing) {
25
+ return {
26
+ content: [
27
+ { type: "text", text: formatPairingStatus(existing) },
28
+ ],
29
+ };
30
+ }
31
+ }
32
+
33
+ // Create new pairing session
34
+ const session = await createPairingSession(resolvedEndpoint, {
35
+ edgeId: generateEdgeId(),
36
+ });
37
+
38
+ const qr = await renderPairingQRCode(session.pair_url);
39
+
40
+ return {
41
+ content: [
42
+ {
43
+ type: "text",
44
+ text: [
45
+ "Scan QR code or open URL on your phone to pair:",
46
+ "",
47
+ qr,
48
+ "",
49
+ `URL: ${session.pair_url}`,
50
+ `Expires at: ${session.expires_at}`,
51
+ "",
52
+ "Waiting for phone to scan...",
53
+ "(Call zhihand_pair again after scanning to check status)",
54
+ ].join("\n"),
55
+ },
56
+ ],
57
+ };
58
+ }
@@ -0,0 +1,28 @@
1
+ import { z } from "zod";
2
+
3
+ export const controlSchema = {
4
+ action: z.enum([
5
+ "click", "doubleclick", "rightclick", "middleclick",
6
+ "type", "swipe", "scroll", "keycombo",
7
+ "clipboard",
8
+ "wait", "screenshot",
9
+ ]),
10
+ xRatio: z.number().min(0).max(1).optional().describe("Normalized horizontal position [0,1]"),
11
+ yRatio: z.number().min(0).max(1).optional().describe("Normalized vertical position [0,1]"),
12
+ text: z.string().optional().describe("Text for type or clipboard set"),
13
+ direction: z.enum(["up", "down", "left", "right"]).optional().describe("Scroll direction"),
14
+ amount: z.number().int().positive().default(3).optional().describe("Scroll steps (default 3)"),
15
+ keys: z.string().optional().describe("Key combo string, e.g. 'ctrl+c', 'alt+tab'"),
16
+ clipboardAction: z.enum(["get", "set"]).optional().describe("Clipboard action"),
17
+ durationMs: z.number().int().positive().max(10000).default(1000).optional().describe("Duration in ms for wait (default 1000, max 10000)"),
18
+ startXRatio: z.number().min(0).max(1).optional().describe("Swipe start X [0,1]"),
19
+ startYRatio: z.number().min(0).max(1).optional().describe("Swipe start Y [0,1]"),
20
+ endXRatio: z.number().min(0).max(1).optional().describe("Swipe end X [0,1]"),
21
+ endYRatio: z.number().min(0).max(1).optional().describe("Swipe end Y [0,1]"),
22
+ };
23
+
24
+ export const screenshotSchema = {};
25
+
26
+ export const pairSchema = {
27
+ forceNew: z.boolean().default(false).optional().describe("Force new pairing even if already paired"),
28
+ };
@@ -0,0 +1,8 @@
1
+ import type { ZhiHandConfig } from "../core/config.ts";
2
+ import { executeScreenshot } from "./control.ts";
3
+
4
+ export async function handleScreenshot(
5
+ config: ZhiHandConfig
6
+ ): Promise<{ content: Array<{ type: string; text?: string; data?: string; mimeType?: string }> }> {
7
+ return await executeScreenshot(config);
8
+ }