@zhihand/mcp 0.17.1 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/command.d.ts +4 -0
- package/dist/core/command.js +28 -7
- package/dist/daemon/dispatcher.js +76 -63
- package/dist/index.js +1 -1
- package/dist/tools/schemas.d.ts +5 -1
- package/dist/tools/schemas.js +8 -3
- package/package.json +1 -1
package/dist/core/command.d.ts
CHANGED
|
@@ -15,6 +15,10 @@ export interface ControlParams {
|
|
|
15
15
|
startYRatio?: number;
|
|
16
16
|
endXRatio?: number;
|
|
17
17
|
endYRatio?: number;
|
|
18
|
+
appPackage?: string;
|
|
19
|
+
bundleId?: string;
|
|
20
|
+
urlScheme?: string;
|
|
21
|
+
appName?: string;
|
|
18
22
|
}
|
|
19
23
|
export interface QueuedControlCommand {
|
|
20
24
|
type: string;
|
package/dist/core/command.js
CHANGED
|
@@ -9,20 +9,23 @@ export function createControlCommand(params) {
|
|
|
9
9
|
return { type: "receive_click", payload: { x: params.xRatio, y: params.yRatio } };
|
|
10
10
|
case "doubleclick":
|
|
11
11
|
return { type: "receive_doubleclick", payload: { x: params.xRatio, y: params.yRatio } };
|
|
12
|
+
case "longclick":
|
|
13
|
+
return { type: "receive_longclick", payload: { x: params.xRatio, y: params.yRatio, time: params.durationMs ?? 800 } };
|
|
12
14
|
case "rightclick":
|
|
13
15
|
return { type: "receive_rightclick", payload: { x: params.xRatio, y: params.yRatio } };
|
|
14
16
|
case "middleclick":
|
|
15
17
|
return { type: "receive_middleclick", payload: { x: params.xRatio, y: params.yRatio } };
|
|
16
18
|
case "type":
|
|
17
|
-
return { type: "
|
|
19
|
+
return { type: "receive_input", payload: { input: params.text, mode: "auto", submit: false } };
|
|
18
20
|
case "swipe":
|
|
19
21
|
return {
|
|
20
|
-
type: "
|
|
22
|
+
type: "receive_slide",
|
|
21
23
|
payload: {
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
24
|
+
x1: params.startXRatio,
|
|
25
|
+
y1: params.startYRatio,
|
|
26
|
+
x2: params.endXRatio,
|
|
27
|
+
y2: params.endYRatio,
|
|
28
|
+
time: params.durationMs ?? 300,
|
|
26
29
|
},
|
|
27
30
|
};
|
|
28
31
|
case "scroll":
|
|
@@ -36,12 +39,30 @@ export function createControlCommand(params) {
|
|
|
36
39
|
},
|
|
37
40
|
};
|
|
38
41
|
case "keycombo":
|
|
39
|
-
return { type: "
|
|
42
|
+
return { type: "receive_key_combo", payload: { keys: params.keys } };
|
|
43
|
+
case "back":
|
|
44
|
+
return { type: "receive_back", payload: {} };
|
|
45
|
+
case "home":
|
|
46
|
+
return { type: "receive_home", payload: {} };
|
|
47
|
+
case "enter":
|
|
48
|
+
return { type: "receive_enter", payload: {} };
|
|
40
49
|
case "clipboard":
|
|
41
50
|
return {
|
|
42
51
|
type: "receive_clipboard",
|
|
43
52
|
payload: { action: params.clipboardAction, text: params.text },
|
|
44
53
|
};
|
|
54
|
+
case "open_app": {
|
|
55
|
+
const appPayload = {};
|
|
56
|
+
if (params.appPackage)
|
|
57
|
+
appPayload.app_package = params.appPackage;
|
|
58
|
+
if (params.bundleId)
|
|
59
|
+
appPayload.bundle_id = params.bundleId;
|
|
60
|
+
if (params.urlScheme)
|
|
61
|
+
appPayload.url_scheme = params.urlScheme;
|
|
62
|
+
if (params.appName)
|
|
63
|
+
appPayload.app_name = params.appName;
|
|
64
|
+
return { type: "receive_app", payload: appPayload };
|
|
65
|
+
}
|
|
45
66
|
case "screenshot":
|
|
46
67
|
return { type: "receive_screenshot", payload: {} };
|
|
47
68
|
default:
|
|
@@ -329,13 +329,35 @@ export function killActiveChild() {
|
|
|
329
329
|
function wrapPrompt(userPrompt) {
|
|
330
330
|
return `You are ZhiHand, an AI assistant connected to the user's mobile phone via MCP tools.
|
|
331
331
|
|
|
332
|
-
|
|
333
|
-
- zhihand_screenshot: Take a screenshot of the phone screen. Use this when the user asks to see, check, or look at their screen.
|
|
334
|
-
- zhihand_control: Control the phone — click, type, swipe, scroll, key combos, clipboard, wait. Requires "action" parameter. For clicks, provide xRatio/yRatio (0-1 normalized coordinates).
|
|
335
|
-
- zhihand_pair: Pair a new device (rarely needed).
|
|
332
|
+
## Available MCP Tools
|
|
336
333
|
|
|
337
|
-
|
|
338
|
-
|
|
334
|
+
### zhihand_screenshot
|
|
335
|
+
Take a screenshot of the phone screen. Use this when the user asks to see, check, or look at their screen.
|
|
336
|
+
|
|
337
|
+
### zhihand_control
|
|
338
|
+
Control the phone. Requires "action" parameter. All coordinates use normalized ratios [0,1].
|
|
339
|
+
|
|
340
|
+
**Supported actions:**
|
|
341
|
+
- click: Tap at position. Params: xRatio, yRatio
|
|
342
|
+
- doubleclick: Double tap. Params: xRatio, yRatio
|
|
343
|
+
- longclick: Long press. Params: xRatio, yRatio, durationMs (default 800)
|
|
344
|
+
- type: Type text into focused field. Params: text
|
|
345
|
+
- swipe: Swipe gesture. Params: startXRatio, startYRatio, endXRatio, endYRatio, durationMs (default 300)
|
|
346
|
+
- scroll: Scroll at position. Params: xRatio, yRatio, direction (up/down/left/right), amount (default 3)
|
|
347
|
+
- keycombo: Keyboard shortcut. Params: keys (e.g. "ctrl+c", "alt+tab")
|
|
348
|
+
- back: Press Back button (no params)
|
|
349
|
+
- home: Press Home button (no params)
|
|
350
|
+
- enter: Press Enter key (no params)
|
|
351
|
+
- open_app: Open an app. Params: appPackage (Android, e.g. "com.tencent.mm"), bundleId (iOS), urlScheme (e.g. "weixin://")
|
|
352
|
+
- clipboard: Read/write clipboard. Params: clipboardAction ("get"/"set"), text
|
|
353
|
+
- screenshot: Capture screen via control (same as zhihand_screenshot)
|
|
354
|
+
- wait: Wait before next action. Params: durationMs (default 1000)
|
|
355
|
+
|
|
356
|
+
## Rules
|
|
357
|
+
- When the user asks to see their screen, ALWAYS call zhihand_screenshot first.
|
|
358
|
+
- When the user asks to open an app (e.g. WeChat, Settings), use open_app action.
|
|
359
|
+
- When the user asks to go back/home, use back/home actions.
|
|
360
|
+
- For all tap/click operations, use xRatio and yRatio (0-1 normalized coordinates based on the screenshot).
|
|
339
361
|
|
|
340
362
|
User message:
|
|
341
363
|
${userPrompt}`;
|
|
@@ -413,48 +435,16 @@ function dispatchClaude(prompt, startTime, model) {
|
|
|
413
435
|
activeChild = child;
|
|
414
436
|
return collectChildOutput(child, startTime);
|
|
415
437
|
}
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
let hasError = false;
|
|
422
|
-
for (const line of lines) {
|
|
423
|
-
try {
|
|
424
|
-
const event = JSON.parse(line);
|
|
425
|
-
const type = String(event.type ?? "");
|
|
426
|
-
// Extract text from completed agent messages
|
|
427
|
-
if (type === "item.completed") {
|
|
428
|
-
const item = event.item;
|
|
429
|
-
if (item && typeof item.text === "string" && item.text.trim()) {
|
|
430
|
-
texts.push(item.text.trim());
|
|
431
|
-
}
|
|
432
|
-
}
|
|
433
|
-
// Capture errors
|
|
434
|
-
if (type === "error") {
|
|
435
|
-
const msg = String(event.message ?? "");
|
|
436
|
-
if (msg)
|
|
437
|
-
texts.push(`Error: ${msg}`);
|
|
438
|
-
hasError = true;
|
|
439
|
-
}
|
|
440
|
-
if (type === "turn.failed") {
|
|
441
|
-
hasError = true;
|
|
442
|
-
}
|
|
443
|
-
}
|
|
444
|
-
catch {
|
|
445
|
-
// Not valid JSON — skip (truncated line or stderr mixed in)
|
|
446
|
-
}
|
|
447
|
-
}
|
|
448
|
-
if (texts.length > 0) {
|
|
449
|
-
return { text: texts.join("\n\n"), success: !hasError };
|
|
450
|
-
}
|
|
451
|
-
return { text: raw.trim(), success: false };
|
|
452
|
-
}
|
|
438
|
+
/**
|
|
439
|
+
* Collect codex JSONL output with streaming line parsing.
|
|
440
|
+
* Processes each JSONL line as it arrives so we extract agent text
|
|
441
|
+
* without buffering large binary payloads (e.g. base64 screenshots).
|
|
442
|
+
*/
|
|
453
443
|
function collectCodexOutput(child, startTime) {
|
|
454
444
|
return new Promise((resolve) => {
|
|
455
|
-
const
|
|
456
|
-
let
|
|
457
|
-
let
|
|
445
|
+
const texts = [];
|
|
446
|
+
let hasError = false;
|
|
447
|
+
let lineBuffer = "";
|
|
458
448
|
let settled = false;
|
|
459
449
|
function settle(result) {
|
|
460
450
|
if (settled)
|
|
@@ -462,36 +452,59 @@ function collectCodexOutput(child, startTime) {
|
|
|
462
452
|
settled = true;
|
|
463
453
|
resolve(result);
|
|
464
454
|
}
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
if (truncated)
|
|
455
|
+
function processLine(line) {
|
|
456
|
+
if (!line.trim())
|
|
468
457
|
return;
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
458
|
+
try {
|
|
459
|
+
const event = JSON.parse(line);
|
|
460
|
+
const type = String(event.type ?? "");
|
|
461
|
+
if (type === "item.completed") {
|
|
462
|
+
const item = event.item;
|
|
463
|
+
if (item && typeof item.text === "string" && item.text.trim()) {
|
|
464
|
+
texts.push(item.text.trim());
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
if (type === "error") {
|
|
468
|
+
const msg = String(event.message ?? "");
|
|
469
|
+
if (msg)
|
|
470
|
+
texts.push(`Error: ${msg}`);
|
|
471
|
+
hasError = true;
|
|
472
|
+
}
|
|
473
|
+
if (type === "turn.failed") {
|
|
474
|
+
hasError = true;
|
|
475
|
+
}
|
|
473
476
|
}
|
|
474
|
-
|
|
475
|
-
|
|
477
|
+
catch {
|
|
478
|
+
// Not valid JSON — skip
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
const timer = setTimeout(() => { closeChild(child); }, CLI_TIMEOUT);
|
|
482
|
+
const onData = (data) => {
|
|
483
|
+
lineBuffer += data.toString("utf8");
|
|
484
|
+
const lines = lineBuffer.split("\n");
|
|
485
|
+
// Keep the last (possibly incomplete) line in the buffer
|
|
486
|
+
lineBuffer = lines.pop() ?? "";
|
|
487
|
+
for (const line of lines) {
|
|
488
|
+
processLine(line);
|
|
476
489
|
}
|
|
477
490
|
};
|
|
478
|
-
child.stdout?.on("data",
|
|
479
|
-
|
|
491
|
+
child.stdout?.on("data", onData);
|
|
492
|
+
// stderr is not JSONL, just discard
|
|
493
|
+
child.stderr?.resume();
|
|
480
494
|
child.on("close", (code) => {
|
|
481
495
|
clearTimeout(timer);
|
|
482
496
|
activeChild = null;
|
|
497
|
+
// Process any remaining data in the buffer
|
|
498
|
+
if (lineBuffer.trim())
|
|
499
|
+
processLine(lineBuffer);
|
|
483
500
|
const durationMs = Date.now() - startTime;
|
|
484
|
-
|
|
485
|
-
const parsed = parseCodexJsonl(raw);
|
|
486
|
-
let text = parsed.text;
|
|
487
|
-
if (truncated)
|
|
488
|
-
text += "\n\n[Output truncated at 100KB]";
|
|
501
|
+
let text = texts.join("\n\n");
|
|
489
502
|
if (!text) {
|
|
490
503
|
text = code === 0
|
|
491
504
|
? "Task completed (no output)."
|
|
492
505
|
: `CLI process exited with code ${code}.`;
|
|
493
506
|
}
|
|
494
|
-
settle({ text, success:
|
|
507
|
+
settle({ text, success: !hasError && code === 0, durationMs });
|
|
495
508
|
});
|
|
496
509
|
child.on("error", (err) => {
|
|
497
510
|
clearTimeout(timer);
|
package/dist/index.js
CHANGED
|
@@ -5,7 +5,7 @@ import { controlSchema, screenshotSchema, pairSchema } from "./tools/schemas.js"
|
|
|
5
5
|
import { executeControl } from "./tools/control.js";
|
|
6
6
|
import { handleScreenshot } from "./tools/screenshot.js";
|
|
7
7
|
import { handlePair } from "./tools/pair.js";
|
|
8
|
-
const PACKAGE_VERSION = "0.
|
|
8
|
+
const PACKAGE_VERSION = "0.18.0";
|
|
9
9
|
export function createServer(deviceName) {
|
|
10
10
|
const server = new McpServer({
|
|
11
11
|
name: "zhihand",
|
package/dist/tools/schemas.d.ts
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
export declare const controlSchema: {
|
|
3
|
-
action: z.ZodEnum<["click", "doubleclick", "rightclick", "middleclick", "type", "swipe", "scroll", "keycombo", "clipboard", "wait", "screenshot"]>;
|
|
3
|
+
action: z.ZodEnum<["click", "doubleclick", "longclick", "rightclick", "middleclick", "type", "swipe", "scroll", "keycombo", "back", "home", "enter", "clipboard", "open_app", "wait", "screenshot"]>;
|
|
4
4
|
xRatio: z.ZodOptional<z.ZodNumber>;
|
|
5
5
|
yRatio: z.ZodOptional<z.ZodNumber>;
|
|
6
6
|
text: z.ZodOptional<z.ZodString>;
|
|
@@ -13,6 +13,10 @@ export declare const controlSchema: {
|
|
|
13
13
|
startYRatio: z.ZodOptional<z.ZodNumber>;
|
|
14
14
|
endXRatio: z.ZodOptional<z.ZodNumber>;
|
|
15
15
|
endYRatio: z.ZodOptional<z.ZodNumber>;
|
|
16
|
+
appPackage: z.ZodOptional<z.ZodString>;
|
|
17
|
+
bundleId: z.ZodOptional<z.ZodString>;
|
|
18
|
+
urlScheme: z.ZodOptional<z.ZodString>;
|
|
19
|
+
appName: z.ZodOptional<z.ZodString>;
|
|
16
20
|
};
|
|
17
21
|
export declare const screenshotSchema: {};
|
|
18
22
|
export declare const pairSchema: {
|
package/dist/tools/schemas.js
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { z } from "zod";
|
|
2
2
|
export const controlSchema = {
|
|
3
3
|
action: z.enum([
|
|
4
|
-
"click", "doubleclick", "rightclick", "middleclick",
|
|
4
|
+
"click", "doubleclick", "longclick", "rightclick", "middleclick",
|
|
5
5
|
"type", "swipe", "scroll", "keycombo",
|
|
6
|
-
"
|
|
6
|
+
"back", "home", "enter",
|
|
7
|
+
"clipboard", "open_app",
|
|
7
8
|
"wait", "screenshot",
|
|
8
9
|
]),
|
|
9
10
|
xRatio: z.number().min(0).max(1).optional().describe("Normalized horizontal position [0,1]"),
|
|
@@ -13,11 +14,15 @@ export const controlSchema = {
|
|
|
13
14
|
amount: z.number().int().positive().default(3).optional().describe("Scroll steps (default 3)"),
|
|
14
15
|
keys: z.string().optional().describe("Key combo string, e.g. 'ctrl+c', 'alt+tab'"),
|
|
15
16
|
clipboardAction: z.enum(["get", "set"]).optional().describe("Clipboard action"),
|
|
16
|
-
durationMs: z.number().int().positive().max(10000).default(1000).optional().describe("Duration in ms for wait (default 1000, max 10000)"),
|
|
17
|
+
durationMs: z.number().int().positive().max(10000).default(1000).optional().describe("Duration in ms for wait, longclick, or swipe (default 1000, max 10000)"),
|
|
17
18
|
startXRatio: z.number().min(0).max(1).optional().describe("Swipe start X [0,1]"),
|
|
18
19
|
startYRatio: z.number().min(0).max(1).optional().describe("Swipe start Y [0,1]"),
|
|
19
20
|
endXRatio: z.number().min(0).max(1).optional().describe("Swipe end X [0,1]"),
|
|
20
21
|
endYRatio: z.number().min(0).max(1).optional().describe("Swipe end Y [0,1]"),
|
|
22
|
+
appPackage: z.string().optional().describe("Android package name, e.g. 'com.tencent.mm'"),
|
|
23
|
+
bundleId: z.string().optional().describe("iOS bundle ID, e.g. 'com.tencent.xin'"),
|
|
24
|
+
urlScheme: z.string().optional().describe("URL scheme, e.g. 'weixin://'"),
|
|
25
|
+
appName: z.string().optional().describe("Human-readable app name (for logging)"),
|
|
21
26
|
};
|
|
22
27
|
export const screenshotSchema = {};
|
|
23
28
|
export const pairSchema = {
|