@zhihand/mcp 0.26.4 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/zhihand CHANGED
@@ -27,6 +27,7 @@ const { positionals, values } = parseArgs({
27
27
  help: { type: "boolean", short: "h", default: false },
28
28
  detach: { type: "boolean", short: "d", default: false },
29
29
  debug: { type: "boolean", default: false },
30
+ force: { type: "boolean", default: false },
30
31
  port: { type: "string" },
31
32
  },
32
33
  });
@@ -53,7 +54,11 @@ Usage:
53
54
  zhihand pair Pair with a phone device
54
55
  zhihand detect Detect available CLI tools
55
56
 
56
- zhihand test Test device connectivity (sends click + type commands)
57
+ zhihand list List all available tests with IDs
58
+ zhihand test Run all safe device tests (skips capability-gated)
59
+ zhihand test <ids> Run specific test(s), e.g. 'zhihand test 4' or '4,9,20'
60
+ zhihand test all Run ALL tests (including unsafe, e.g. power button)
61
+ zhihand test --force Bypass capability gates (run anyway even if NOT ready)
57
62
  zhihand serve Start MCP Server (stdio mode, backward compat)
58
63
 
59
64
  Options:
@@ -62,6 +67,7 @@ Options:
62
67
  --port <port> Override daemon port (default: 18686)
63
68
  -d, --detach Run daemon in background
64
69
  --debug Enable verbose debug logging
70
+ --force (test only) Run tests even if capability not ready
65
71
  -h, --help Show this help
66
72
  `);
67
73
  process.exit(0);
@@ -286,13 +292,115 @@ switch (command) {
286
292
  break;
287
293
  }
288
294
 
295
+ case "list":
289
296
  case "test": {
290
297
  const { resolveConfig: resolveTestConfig } = await import("../dist/core/config.js");
291
- const { createControlCommand, enqueueCommand } = await import("../dist/core/command.js");
298
+ const { createControlCommand, createSystemCommand, enqueueCommand } = await import("../dist/core/command.js");
292
299
  const { waitForCommandAck } = await import("../dist/core/sse.js");
293
- const { fetchScreenshotBinary } = await import("../dist/core/screenshot.js");
294
- const { fetchDeviceProfile, getStaticContext, isDeviceProfileLoaded, formatDeviceStatus } = await import("../dist/core/device.js");
300
+ const { fetchScreenshot, getSnapshotStaleThresholdMs } = await import("../dist/core/screenshot.js");
301
+ const { fetchDeviceProfile, getStaticContext, isDeviceProfileLoaded, formatDeviceStatus, getCapabilities } = await import("../dist/core/device.js");
302
+
303
+ // ── Test Registry ────────────────────────────────────────
304
+ // Kind: "profile" | "status" | "screenshot" | "hid" | "system"
305
+ // - Each kind maps to a required capability (see KIND_CAPABILITY).
306
+ // Platform: undefined | "android" | "ios" (skipped on non-matching)
307
+ // Unsafe: won't run in full-suite unless explicitly requested
308
+
309
+ // Required capability per test kind. Tests whose required capability
310
+ // is not ready are SKIPPED (not failed), unless --force is passed.
311
+ //
312
+ // NOTE: `system` commands (volume, brightness, notification, media,
313
+ // etc.) are executed by the phone app via native OS APIs
314
+ // (AccessibilityService on Android, Shortcuts/system hooks on iOS)
315
+ // and do NOT depend on the BLE HID channel. Only the `hid` kind
316
+ // (click, swipe, type, keycombo — which inject into the paired
317
+ // target via the ZhiHand peripheral) needs the HID capability.
318
+ const KIND_CAPABILITY = {
319
+ profile: "none",
320
+ status: "none",
321
+ screenshot: "screen",
322
+ hid: "hid",
323
+ system: "none",
324
+ };
325
+ const REGISTRY = [
326
+ // Phase A — Device Info API
327
+ { id: 1, phase: "Device Info", label: "Fetch device profile", kind: "profile" },
328
+ { id: 2, phase: "Device Info", label: "Device status fields", kind: "status" },
329
+ // Phase B — Screenshot
330
+ { id: 3, phase: "Screenshot", label: "Screenshot", kind: "screenshot" },
331
+ // Phase C — Tap / Touch
332
+ { id: 4, phase: "Tap/Touch", label: "Click center", kind: "hid", params: { action: "click", xRatio: 0.5, yRatio: 0.5 } },
333
+ { id: 5, phase: "Tap/Touch", label: "Double click", kind: "hid", params: { action: "doubleclick", xRatio: 0.5, yRatio: 0.5 } },
334
+ { id: 6, phase: "Tap/Touch", label: "Long click (800ms)", kind: "hid", params: { action: "longclick", xRatio: 0.5, yRatio: 0.5, durationMs: 800 } },
335
+ { id: 7, phase: "Tap/Touch", label: "Right click", kind: "hid", params: { action: "rightclick", xRatio: 0.5, yRatio: 0.5 } },
336
+ { id: 8, phase: "Tap/Touch", label: "Middle click", kind: "hid", params: { action: "middleclick", xRatio: 0.5, yRatio: 0.5 } },
337
+ // Phase D — Swipe / Scroll
338
+ { id: 9, phase: "Swipe/Scroll", label: "Swipe up", kind: "hid", params: { action: "swipe", startXRatio: 0.5, startYRatio: 0.7, endXRatio: 0.5, endYRatio: 0.3, durationMs: 300 } },
339
+ { id: 10, phase: "Swipe/Scroll", label: "Swipe down", kind: "hid", params: { action: "swipe", startXRatio: 0.5, startYRatio: 0.3, endXRatio: 0.5, endYRatio: 0.7, durationMs: 300 } },
340
+ { id: 11, phase: "Swipe/Scroll", label: "Swipe left", kind: "hid", params: { action: "swipe", startXRatio: 0.7, startYRatio: 0.5, endXRatio: 0.3, endYRatio: 0.5, durationMs: 300 } },
341
+ { id: 12, phase: "Swipe/Scroll", label: "Swipe right", kind: "hid", params: { action: "swipe", startXRatio: 0.3, startYRatio: 0.5, endXRatio: 0.7, endYRatio: 0.5, durationMs: 300 } },
342
+ { id: 13, phase: "Swipe/Scroll", label: "Scroll down", kind: "hid", params: { action: "scroll", xRatio: 0.5, yRatio: 0.5, direction: "down", amount: 3 } },
343
+ { id: 14, phase: "Swipe/Scroll", label: "Scroll up", kind: "hid", params: { action: "scroll", xRatio: 0.5, yRatio: 0.5, direction: "up", amount: 3 } },
344
+ // Phase E — Text + Keys
345
+ { id: 15, phase: "Text+Keys", label: "Type text", kind: "hid", params: { action: "type", text: "zhihand" } },
346
+ { id: 16, phase: "Text+Keys", label: "Enter key", kind: "hid", params: { action: "enter" } },
347
+ { id: 17, phase: "Text+Keys", label: "Key combo (select all)", kind: "hid", platformAware: "select_all" },
348
+ // Phase F — App Navigation
349
+ { id: 18, phase: "Navigation", label: "Press Home", kind: "hid", params: { action: "home" } },
350
+ { id: 19, phase: "Navigation", label: "Press Back", kind: "hid", params: { action: "back" } },
351
+ { id: 20, phase: "Navigation", label: "Open WeChat", kind: "hid", platformAware: "open_wechat" },
352
+ // Phase G — Clipboard
353
+ { id: 21, phase: "Clipboard", label: "Clipboard set", kind: "hid", platformAware: "clipboard_set" },
354
+ // Phase H — System Navigation
355
+ { id: 22, phase: "System Nav", label: "Notification shade", kind: "system", params: { action: "notification" } },
356
+ { id: 23, phase: "System Nav", label: "Recent apps", kind: "system", params: { action: "recent" } },
357
+ { id: 24, phase: "System Nav", label: "Search (query='zhihand')", kind: "system", params: { action: "search", text: "zhihand" } },
358
+ { id: 25, phase: "System Nav", label: "Switch input", kind: "system", params: { action: "switch_input" } },
359
+ { id: 26, phase: "System Nav", label: "Siri", kind: "system", params: { action: "siri" }, platform: "ios" },
360
+ { id: 27, phase: "System Nav", label: "Control Center", kind: "system", params: { action: "control_center" }, platform: "ios" },
361
+ { id: 28, phase: "System Nav", label: "Open browser", kind: "system", params: { action: "open_browser" }, platform: "android" },
362
+ { id: 29, phase: "System Nav", label: "Shortcut help", kind: "system", params: { action: "shortcut_help" }, platform: "android" },
363
+ // Phase I — Media
364
+ { id: 30, phase: "Media", label: "Volume up", kind: "system", params: { action: "volume_up" } },
365
+ { id: 31, phase: "Media", label: "Volume down", kind: "system", params: { action: "volume_down" } },
366
+ { id: 32, phase: "Media", label: "Mute toggle", kind: "system", params: { action: "mute" } },
367
+ { id: 33, phase: "Media", label: "Play/Pause", kind: "system", params: { action: "play_pause" } },
368
+ { id: 34, phase: "Media", label: "Next track", kind: "system", params: { action: "next_track" } },
369
+ { id: 35, phase: "Media", label: "Prev track", kind: "system", params: { action: "prev_track" } },
370
+ { id: 36, phase: "Media", label: "Fast forward", kind: "system", params: { action: "fast_forward" } },
371
+ { id: 37, phase: "Media", label: "Rewind", kind: "system", params: { action: "rewind" } },
372
+ { id: 38, phase: "Media", label: "Stop", kind: "system", params: { action: "stop" } },
373
+ // Phase J — Hardware
374
+ { id: 39, phase: "Hardware", label: "Brightness up", kind: "system", params: { action: "brightness_up" } },
375
+ { id: 40, phase: "Hardware", label: "Brightness down", kind: "system", params: { action: "brightness_down" } },
376
+ { id: 41, phase: "Hardware", label: "Power button (⚠️ may lock screen)", kind: "system", params: { action: "power" }, unsafe: true },
377
+ ];
378
+
379
+ // ── "list" sub-command ───────────────────────────────────
380
+ if (command === "list") {
381
+ console.log("📋 ZhiHand Test Registry\n");
382
+ let currentPhase = "";
383
+ for (const t of REGISTRY) {
384
+ if (t.phase !== currentPhase) {
385
+ console.log(`\n ── ${t.phase} ──`);
386
+ currentPhase = t.phase;
387
+ }
388
+ const tags = [];
389
+ if (t.platform) tags.push(`${t.platform}-only`);
390
+ if (t.unsafe) tags.push("unsafe");
391
+ const tagStr = tags.length ? ` [${tags.join(", ")}]` : "";
392
+ console.log(` ${String(t.id).padStart(2)}. ${t.label}${tagStr}`);
393
+ }
394
+ console.log(`\n Total: ${REGISTRY.length} tests`);
395
+ console.log("\nUsage:");
396
+ console.log(" zhihand test # run all safe tests");
397
+ console.log(" zhihand test 4 # run test #4 only");
398
+ console.log(" zhihand test 4,9,20 # run tests #4, #9, #20");
399
+ console.log(" zhihand test all # run ALL tests (including unsafe)");
400
+ process.exit(0);
401
+ }
295
402
 
403
+ // ── "test" sub-command ───────────────────────────────────
296
404
  let testConfig;
297
405
  try {
298
406
  testConfig = resolveTestConfig(values.device ?? process.env.ZHIHAND_DEVICE);
@@ -302,152 +410,270 @@ switch (command) {
302
410
  process.exit(1);
303
411
  }
304
412
 
413
+ // Parse which tests to run from positional args
414
+ const filterArg = positionals[1]; // e.g. "4" or "4,9,20" or "all"
415
+ const forceRun = values.force === true;
416
+ let selectedIds = null; // null = default (all safe)
417
+ let includeUnsafe = false;
418
+ if (filterArg) {
419
+ if (filterArg === "all") {
420
+ includeUnsafe = true;
421
+ } else {
422
+ selectedIds = new Set(
423
+ filterArg.split(",").map((s) => {
424
+ const trimmed = s.trim();
425
+ const n = Number(trimmed);
426
+ // Strict: reject ranges like "4-10" (Number returns NaN), floats, empty
427
+ return Number.isInteger(n) && n > 0 ? n : NaN;
428
+ }).filter((n) => !isNaN(n))
429
+ );
430
+ if (selectedIds.size === 0) {
431
+ console.error(`Invalid test IDs: ${filterArg}`);
432
+ console.error("Run 'zhihand list' to see available tests.");
433
+ process.exit(1);
434
+ }
435
+ // Explicit selection implies user knows what they're doing
436
+ includeUnsafe = true;
437
+ }
438
+ }
439
+
305
440
  console.log("🧪 ZhiHand Device Test");
306
441
  console.log(` Device: ${testConfig.credentialId}`);
307
442
  console.log(` Endpoint: ${testConfig.controlPlaneEndpoint}\n`);
308
443
 
444
+ // Pre-fetch device profile so platform-aware tests work.
445
+ // Note: platform is read dynamically via getDevicePlatform() so Test 1
446
+ // (Fetch device profile) can populate it before later tests consume it.
447
+ try {
448
+ await fetchDeviceProfile(testConfig);
449
+ } catch { /* non-fatal — Test 1 will retry and platform will update */ }
450
+ const getDevicePlatform = () => isDeviceProfileLoaded() ? getStaticContext().platform : "unknown";
451
+
452
+ // ── Capability readiness pre-flight ──
453
+ console.log(" ── Capability readiness ──");
454
+ const preCaps = isDeviceProfileLoaded() ? getCapabilities() : null;
455
+ if (!preCaps) {
456
+ console.log(" ⚠️ Device profile not loaded — all capability gates will allow tests through.");
457
+ } else {
458
+ const fmt = (name, cap) => ` ${cap.ready ? "✅" : "⚠️"} ${name.padEnd(14)} ${cap.ready ? "ready" : "NOT ready"} — ${cap.reason}`;
459
+ console.log(fmt("screen_sharing", preCaps.screen_sharing));
460
+ console.log(fmt("hid", preCaps.hid));
461
+ console.log(fmt("live_session", preCaps.live_session));
462
+ const ageStr = preCaps.profile.age_ms >= 0 ? `${(preCaps.profile.age_ms / 1000).toFixed(1)}s` : "unknown";
463
+ console.log(` ${preCaps.profile.stale ? "⚠️" : "✅"} profile age=${ageStr}${preCaps.profile.stale ? " (STALE)" : ""}`);
464
+ if (forceRun) {
465
+ console.log(" --force passed: capability gates disabled.");
466
+ }
467
+ }
468
+ console.log("");
469
+
309
470
  let passed = 0;
310
471
  let failed = 0;
472
+ let skipped = 0;
311
473
  let totalSteps = 0;
312
474
 
313
- // ── Helper: run a single HID step ──
314
- async function runHidStep(label, params) {
475
+ // ── Resolve platform-aware params (evaluated at test time) ──
476
+ function resolvePlatformAwareParams(variant) {
477
+ const platform = getDevicePlatform();
478
+ if (variant === "open_wechat") {
479
+ return platform === "ios"
480
+ ? { action: "open_app", bundleId: "com.tencent.xin" }
481
+ : { action: "open_app", appPackage: "com.tencent.mm" };
482
+ }
483
+ if (variant === "clipboard_set") {
484
+ return { action: "clipboard", text: `zhihand_test_${Date.now()}` };
485
+ }
486
+ if (variant === "select_all") {
487
+ const keys = platform === "ios" ? "cmd+a" : "ctrl+a";
488
+ return { action: "keycombo", keys };
489
+ }
490
+ return null;
491
+ }
492
+
493
+ // ── Test runners (shared ACK logic) ──
494
+ async function runCommandTest(t, command) {
315
495
  totalSteps++;
316
- process.stdout.write(` ${label}... `);
496
+ process.stdout.write(` ${String(t.id).padStart(2)}. ${t.label}... `);
317
497
  const t0 = Date.now();
318
498
  try {
319
- const cmd = createControlCommand(params);
320
- const queued = await enqueueCommand(testConfig, cmd);
499
+ const queued = await enqueueCommand(testConfig, command);
321
500
  const ack = await waitForCommandAck(testConfig, { commandId: queued.id, timeoutMs: 10_000 });
322
501
  const ms = Date.now() - t0;
323
502
  if (ack.acked) {
324
503
  const ackStatus = ack.command?.ack_status ?? "ok";
325
- const detail = ackStatus !== "ok" ? ` [${ackStatus}]` : "";
326
504
  const resultInfo = ack.command?.ack_result ? ` ${JSON.stringify(ack.command.ack_result)}` : "";
327
- console.log(`✅ (${ms}ms)${detail}${resultInfo}`);
328
- passed++;
329
- return ack;
505
+ if (ackStatus === "ok") {
506
+ console.log(`✅ (${ms}ms)${resultInfo}`);
507
+ passed++;
508
+ } else {
509
+ console.log(`❌ [${ackStatus}] (${ms}ms)${resultInfo}`);
510
+ failed++;
511
+ }
330
512
  } else {
331
513
  console.log(`⏱️ Timeout (${ms}ms)`);
332
514
  failed++;
333
- return null;
334
515
  }
335
516
  } catch (err) {
336
517
  console.log(`❌ ${err.message} (${Date.now() - t0}ms)`);
337
518
  failed++;
338
- return null;
339
519
  }
340
520
  }
341
521
 
342
- // ── Helper: run a screenshot step ──
343
- async function runScreenshotStep(label) {
344
- totalSteps++;
345
- process.stdout.write(` ${label}... `);
346
- const t0 = Date.now();
347
- try {
348
- const cmd = createControlCommand({ action: "screenshot" });
349
- const queued = await enqueueCommand(testConfig, cmd);
350
- const ack = await waitForCommandAck(testConfig, { commandId: queued.id, timeoutMs: 10_000 });
351
- if (ack.acked) {
352
- const buf = await fetchScreenshotBinary(testConfig);
353
- const ms = Date.now() - t0;
354
- console.log(`✅ ${(buf.length / 1024).toFixed(0)}KB (${ms}ms)`);
355
- passed++;
356
- } else {
357
- console.log(`⏱️ Timeout (${Date.now() - t0}ms)`);
358
- failed++;
522
+ async function runSingleTest(t) {
523
+ // Platform skip (evaluated at test time — Test 1 may have just populated the profile)
524
+ const currentPlatform = getDevicePlatform();
525
+ if (t.platform && t.platform !== currentPlatform) {
526
+ totalSteps++;
527
+ skipped++;
528
+ console.log(` ${String(t.id).padStart(2)}. ${t.label}... ⏭️ Skipped (${t.platform}-only, device is ${currentPlatform})`);
529
+ return;
530
+ }
531
+
532
+ // Capability gate (unless --force) — evaluated per-test so later
533
+ // tests see fresh readiness flags pushed after Test 1's profile fetch.
534
+ if (!forceRun && isDeviceProfileLoaded()) {
535
+ const requiredCap = KIND_CAPABILITY[t.kind] ?? "none";
536
+ if (requiredCap !== "none") {
537
+ const caps = getCapabilities();
538
+ const gate = requiredCap === "screen" ? caps.screen_sharing : caps.hid;
539
+ if (!gate.ready) {
540
+ totalSteps++;
541
+ skipped++;
542
+ console.log(` ${String(t.id).padStart(2)}. ${t.label}... ⏭️ Skipped (${requiredCap} not ready: ${gate.reason})`);
543
+ return;
544
+ }
545
+ }
546
+ }
547
+
548
+ switch (t.kind) {
549
+ case "profile": {
550
+ totalSteps++;
551
+ process.stdout.write(` ${String(t.id).padStart(2)}. ${t.label}... `);
552
+ const t0 = Date.now();
553
+ try {
554
+ await fetchDeviceProfile(testConfig);
555
+ const ms = Date.now() - t0;
556
+ if (isDeviceProfileLoaded()) {
557
+ const s = getStaticContext();
558
+ console.log(`✅ ${s.platform} ${s.model}, ${s.osVersion}, ${s.screenWidthPx}x${s.screenHeightPx} (${ms}ms)`);
559
+ passed++;
560
+ } else {
561
+ console.log(`⚠️ Loaded but empty (${ms}ms)`);
562
+ failed++;
563
+ }
564
+ } catch (err) {
565
+ console.log(`❌ ${err.message} (${Date.now() - t0}ms)`);
566
+ failed++;
567
+ }
568
+ break;
569
+ }
570
+ case "status": {
571
+ totalSteps++;
572
+ process.stdout.write(` ${String(t.id).padStart(2)}. ${t.label}... `);
573
+ try {
574
+ const status = formatDeviceStatus();
575
+ // Count curated top-level fields (excluding the nested 'raw'
576
+ // and 'capabilities' containers) plus every allowlisted raw
577
+ // attribute — this is what the LLM actually sees via
578
+ // zhihand_status.
579
+ const topLevel = Object.keys(status).filter((k) => k !== "raw" && k !== "capabilities");
580
+ const rawKeys = Object.keys(status.raw ?? {});
581
+ const caps = status.capabilities ?? {};
582
+ const capReadySummary = ["screen_sharing", "hid", "live_session"]
583
+ .map((k) => `${k}=${caps[k]?.ready ? "ready" : "not-ready"}`)
584
+ .join(", ");
585
+ console.log(`✅ ${topLevel.length} curated + ${rawKeys.length} raw attributes; ${capReadySummary}`);
586
+ console.log(` curated: ${topLevel.join(", ")}`);
587
+ console.log(` raw: ${rawKeys.join(", ")}`);
588
+ passed++;
589
+ } catch (err) {
590
+ console.log(`❌ ${err.message}`);
591
+ failed++;
592
+ }
593
+ break;
594
+ }
595
+ case "screenshot": {
596
+ totalSteps++;
597
+ process.stdout.write(` ${String(t.id).padStart(2)}. ${t.label}... `);
598
+ const t0 = Date.now();
599
+ try {
600
+ const cmd = createControlCommand({ action: "screenshot" });
601
+ const queued = await enqueueCommand(testConfig, cmd);
602
+ const ack = await waitForCommandAck(testConfig, { commandId: queued.id, timeoutMs: 10_000 });
603
+ if (!ack.acked) {
604
+ console.log(`⏱️ Timeout (${Date.now() - t0}ms)`);
605
+ failed++;
606
+ break;
607
+ }
608
+ const shot = await fetchScreenshot(testConfig);
609
+ const kb = (shot.buffer.length / 1024).toFixed(0);
610
+ const ms = Date.now() - t0;
611
+ // The screenshot endpoint returns the last cached frame even
612
+ // when the phone isn't actively sharing — the age header is
613
+ // the only way to tell. Treat stale as failure.
614
+ if (shot.stale) {
615
+ const threshold = getSnapshotStaleThresholdMs();
616
+ console.log(`❌ Stale (${kb}KB, age=${(shot.ageMs / 1000).toFixed(1)}s > ${(threshold / 1000).toFixed(1)}s) ${shot.width}x${shot.height} seq=${shot.sequence} — phone may not be screen-sharing (${ms}ms)`);
617
+ failed++;
618
+ } else {
619
+ console.log(`✅ ${kb}KB, ${shot.width}x${shot.height}, age=${shot.ageMs >= 0 ? `${shot.ageMs}ms` : "?"}, seq=${shot.sequence} (${ms}ms)`);
620
+ passed++;
621
+ }
622
+ } catch (err) {
623
+ console.log(`❌ ${err.message} (${Date.now() - t0}ms)`);
624
+ failed++;
625
+ }
626
+ break;
627
+ }
628
+ case "hid": {
629
+ const params = t.platformAware ? resolvePlatformAwareParams(t.platformAware) : t.params;
630
+ await runCommandTest(t, createControlCommand(params));
631
+ break;
632
+ }
633
+ case "system": {
634
+ await runCommandTest(t, createSystemCommand(t.params));
635
+ break;
359
636
  }
360
- } catch (err) {
361
- console.log(`❌ ${err.message} (${Date.now() - t0}ms)`);
362
- failed++;
363
637
  }
364
638
  }
365
639
 
366
640
  const pause = () => new Promise((r) => setTimeout(r, 1500));
367
641
 
368
- // ── Phase 1: Device Profile ──────────────────────────────
369
- console.log(" ── Phase 1: Device Info ──");
370
- totalSteps++;
371
- process.stdout.write(" 1. Fetch device profile... ");
372
- {
373
- const t0 = Date.now();
374
- try {
375
- await fetchDeviceProfile(testConfig);
376
- const ms = Date.now() - t0;
377
- if (isDeviceProfileLoaded()) {
378
- const s = getStaticContext();
379
- console.log(`✅ ${s.platform} ${s.model}, ${s.osVersion}, ${s.screenWidthPx}x${s.screenHeightPx} (${ms}ms)`);
380
- passed++;
381
- } else {
382
- console.log(`⚠️ Loaded but empty (${ms}ms)`);
383
- failed++;
384
- }
385
- } catch (err) {
386
- console.log(`❌ ${err.message} (${Date.now() - t0}ms)`);
387
- failed++;
388
- }
642
+ // Select tests to run
643
+ const toRun = REGISTRY.filter((t) => {
644
+ if (selectedIds) return selectedIds.has(t.id);
645
+ if (t.unsafe && !includeUnsafe) return false;
646
+ return true;
647
+ });
648
+
649
+ if (toRun.length === 0) {
650
+ console.error("No matching tests.");
651
+ console.error("Run 'zhihand list' to see available tests.");
652
+ process.exit(1);
389
653
  }
390
654
 
391
- totalSteps++;
392
- process.stdout.write(" 2. Device status fields... ");
393
- {
394
- try {
395
- const status = formatDeviceStatus();
396
- const ignoredDefaults = new Set(["unknown", "0x0", "-1% (unknown)", "0"]);
397
- const fields = Object.keys(status).filter((k) => {
398
- const v = status[k];
399
- if (v === null || v === undefined) return false;
400
- if (ignoredDefaults.has(String(v))) return false;
401
- return true;
402
- });
403
- console.log(`✅ ${fields.length} fields (${fields.join(", ")})`);
404
- passed++;
405
- } catch (err) {
406
- console.log(`❌ ${err.message}`);
407
- failed++;
655
+ // Warn about missing IDs
656
+ if (selectedIds) {
657
+ const foundIds = new Set(toRun.map((t) => t.id));
658
+ const missing = [...selectedIds].filter((id) => !foundIds.has(id));
659
+ if (missing.length) {
660
+ console.warn(`⚠️ Unknown test IDs: ${missing.join(", ")}`);
408
661
  }
409
662
  }
410
663
 
411
- await pause();
412
-
413
- // ── Phase 2: Screenshot + Basic HID ──────────────────────
414
- console.log(" ── Phase 2: Screenshot + HID ──");
415
- await runScreenshotStep("3. Screenshot");
416
- await pause();
417
- await runHidStep("4. Click center", { action: "click", xRatio: 0.5, yRatio: 0.5 });
418
- await pause();
419
- await runHidStep("5. Swipe up", { action: "swipe", startXRatio: 0.5, startYRatio: 0.7, endXRatio: 0.5, endYRatio: 0.3, durationMs: 300 });
420
- await pause();
421
- await runHidStep("6. Swipe down", { action: "swipe", startXRatio: 0.5, startYRatio: 0.3, endXRatio: 0.5, endYRatio: 0.7, durationMs: 300 });
422
- await pause();
423
- await runHidStep("7. Scroll down", { action: "scroll", xRatio: 0.5, yRatio: 0.5, direction: "down", amount: 3 });
424
- await pause();
425
- await runHidStep("8. Scroll up", { action: "scroll", xRatio: 0.5, yRatio: 0.5, direction: "up", amount: 3 });
426
- await pause();
427
-
428
- // ── Phase 3: App + Navigation ────────────────────────────
429
- console.log(" ── Phase 3: App + Navigation ──");
430
- await runHidStep("9. Press Home", { action: "home" });
431
- await pause();
432
- {
433
- const platform = isDeviceProfileLoaded() ? getStaticContext().platform : "android";
434
- const openParams = platform === "ios"
435
- ? { action: "open_app", bundleId: "com.tencent.xin" }
436
- : { action: "open_app", appPackage: "com.tencent.mm" };
437
- await runHidStep(`10. Open WeChat (${platform})`, openParams);
664
+ let currentPhase = "";
665
+ for (let i = 0; i < toRun.length; i++) {
666
+ const t = toRun[i];
667
+ if (t.phase !== currentPhase) {
668
+ console.log(` ── ${t.phase} ──`);
669
+ currentPhase = t.phase;
670
+ }
671
+ await runSingleTest(t);
672
+ if (i < toRun.length - 1) await pause();
438
673
  }
439
- await pause();
440
- await runHidStep("11. Press Back", { action: "back" });
441
- await pause();
442
-
443
- // ── Phase 4: Clipboard Set ─────────────────────────────
444
- // Note: App only supports clipboard set, not get
445
- console.log(" ── Phase 4: Clipboard ──");
446
- const clipboardTestText = `zhihand_test_${Date.now()}`;
447
- await runHidStep("12. Clipboard set", { action: "clipboard", text: clipboardTestText });
448
674
 
449
675
  // ── Summary ──────────────────────────────────────────────
450
- console.log(`\n Result: ${passed}/${totalSteps} passed`);
676
+ console.log(`\n Result: ${passed}/${totalSteps} passed, ${failed} failed, ${skipped} skipped`);
451
677
  if (failed === 0) {
452
678
  console.log(" ✅ All tests passed! Device is fully responsive.");
453
679
  } else {
@@ -37,6 +37,11 @@ export interface WaitForCommandAckResult {
37
37
  command?: QueuedCommandRecord;
38
38
  }
39
39
  export declare function createControlCommand(params: ControlParams): QueuedControlCommand;
40
+ export interface SystemParams {
41
+ action: string;
42
+ text?: string;
43
+ }
44
+ export declare function createSystemCommand(params: SystemParams): QueuedControlCommand;
40
45
  export declare function enqueueCommand(config: ZhiHandConfig, command: QueuedControlCommand): Promise<QueuedCommandRecord>;
41
46
  export declare function getCommand(config: ZhiHandConfig, commandId: string): Promise<QueuedCommandRecord>;
42
47
  export declare function formatAckSummary(action: string, result: WaitForCommandAckResult): string;
@@ -93,6 +93,65 @@ export function createControlCommand(params) {
93
93
  throw new Error(`Unsupported action: ${params.action}`);
94
94
  }
95
95
  }
96
+ const IOS_ONLY_ACTIONS = new Set(["siri", "control_center"]);
97
+ const ANDROID_ONLY_ACTIONS = new Set(["open_browser", "shortcut_help"]);
98
+ export function createSystemCommand(params) {
99
+ const platform = isDeviceProfileLoaded() ? getStaticContext().platform : "unknown";
100
+ // Platform validation — block mismatched platform-specific actions
101
+ if (platform === "android" && IOS_ONLY_ACTIONS.has(params.action)) {
102
+ throw new Error(`Action '${params.action}' is not supported on Android.`);
103
+ }
104
+ if (platform === "ios" && ANDROID_ONLY_ACTIONS.has(params.action)) {
105
+ throw new Error(`Action '${params.action}' is not supported on iOS.`);
106
+ }
107
+ switch (params.action) {
108
+ // System navigation
109
+ case "notification":
110
+ return { type: "receive_notification", payload: {} };
111
+ case "recent":
112
+ return { type: "receive_recent", payload: {} };
113
+ case "search":
114
+ return { type: "receive_search", payload: { query: params.text ?? "" } };
115
+ case "switch_input":
116
+ return { type: "receive_switch_input", payload: {} };
117
+ case "siri":
118
+ return { type: "receive_siri", payload: {} };
119
+ case "control_center":
120
+ return { type: "receive_control_center", payload: {} };
121
+ case "open_browser":
122
+ return { type: "receive_open_browser", payload: {} };
123
+ case "shortcut_help":
124
+ return { type: "receive_shortcut_help", payload: {} };
125
+ // Media controls
126
+ case "volume_up":
127
+ return { type: "receive_volume_up", payload: {} };
128
+ case "volume_down":
129
+ return { type: "receive_volume_down", payload: {} };
130
+ case "mute":
131
+ return { type: "receive_mute", payload: {} };
132
+ case "play_pause":
133
+ return { type: "receive_play_pause", payload: {} };
134
+ case "stop":
135
+ return { type: "receive_stop", payload: {} };
136
+ case "next_track":
137
+ return { type: "receive_next_track", payload: {} };
138
+ case "prev_track":
139
+ return { type: "receive_prev_track", payload: {} };
140
+ case "fast_forward":
141
+ return { type: "receive_fast_forward", payload: {} };
142
+ case "rewind":
143
+ return { type: "receive_rewind", payload: {} };
144
+ // Hardware
145
+ case "brightness_up":
146
+ return { type: "receive_brightness_up", payload: {} };
147
+ case "brightness_down":
148
+ return { type: "receive_brightness_down", payload: {} };
149
+ case "power":
150
+ return { type: "receive_power", payload: {} };
151
+ default:
152
+ throw new Error(`Unsupported system action: ${params.action}`);
153
+ }
154
+ }
96
155
  export async function enqueueCommand(config, command) {
97
156
  const url = `${config.controlPlaneEndpoint}/v1/credentials/${encodeURIComponent(config.credentialId)}/commands`;
98
157
  const body = { command: { ...command, message_id: command.messageId ?? nextMessageId() } };
@@ -37,11 +37,28 @@ export interface DynamicContext {
37
37
  }
38
38
  export declare function getStaticContext(): StaticContext;
39
39
  export declare function getDynamicContext(): DynamicContext;
40
+ export declare function getRawAttributes(): Record<string, unknown>;
41
+ export declare function getProfileAgeMs(): number;
40
42
  export declare function isDeviceProfileLoaded(): boolean;
43
+ export interface Capability {
44
+ ready: boolean;
45
+ reason: string;
46
+ }
47
+ export interface Capabilities {
48
+ screen_sharing: Capability;
49
+ hid: Capability;
50
+ live_session: Capability;
51
+ profile: {
52
+ age_ms: number;
53
+ stale: boolean;
54
+ };
55
+ }
56
+ export declare function getCapabilities(): Capabilities;
41
57
  export declare function extractStatic(profile: Record<string, unknown>): StaticContext;
42
58
  export declare function extractDynamic(profile: Record<string, unknown>): DynamicContext;
43
59
  export declare function updateDeviceProfile(raw: Record<string, unknown>): void;
44
60
  export declare function fetchDeviceProfile(config: ZhiHandConfig): Promise<void>;
45
61
  export declare function buildControlToolDescription(): string;
62
+ export declare function buildSystemToolDescription(): string;
46
63
  export declare function buildScreenshotToolDescription(): string;
47
64
  export declare function formatDeviceStatus(): Record<string, unknown>;
@@ -36,6 +36,11 @@ const DEFAULT_DYNAMIC = {
36
36
  // ── Module state ──────────────────────────────────────────
37
37
  let staticCtx = { ...DEFAULT_STATIC };
38
38
  let dynamicCtx = { ...DEFAULT_DYNAMIC };
39
+ let rawAttributes = {};
40
+ // Local monotonic timestamp (Date.now()) captured when the profile was last
41
+ // updated. Used for age calculations — avoids distributed clock skew vs.
42
+ // reading server-side `updated_at`.
43
+ let profileReceivedAtMs = 0;
39
44
  let loaded = false;
40
45
  export function getStaticContext() {
41
46
  return staticCtx;
@@ -43,9 +48,66 @@ export function getStaticContext() {
43
48
  export function getDynamicContext() {
44
49
  return dynamicCtx;
45
50
  }
51
+ export function getRawAttributes() {
52
+ return rawAttributes;
53
+ }
54
+ export function getProfileAgeMs() {
55
+ if (!loaded || profileReceivedAtMs === 0)
56
+ return Number.POSITIVE_INFINITY;
57
+ return Date.now() - profileReceivedAtMs;
58
+ }
46
59
  export function isDeviceProfileLoaded() {
47
60
  return loaded;
48
61
  }
62
+ // Max age (ms) before the device profile is considered stale. Bounds to
63
+ // 60s: profile updates are pushed ~every 10–30s by the phone app.
64
+ const PROFILE_STALE_THRESHOLD_MS = 60_000;
65
+ export function getCapabilities() {
66
+ const a = rawAttributes;
67
+ const b = (k) => typeof a[k] === "boolean" ? a[k] : undefined;
68
+ const recordingActive = b("recording_active");
69
+ const hidConnected = b("hid_connected");
70
+ const hidBonded = b("hid_bonded");
71
+ const hidPairing = b("hid_pairing");
72
+ const hidSessionReady = b("hid_session_ready");
73
+ const liveSessionActive = b("live_session_active");
74
+ const pairedHostReady = b("paired_host_ready");
75
+ const screenSharingReady = recordingActive === true;
76
+ // HID is "ready" when we have a connected bonded peripheral and aren't
77
+ // mid-pairing. `hid_session_ready` is advisory — some devices keep it
78
+ // false while HID still works, so we don't require it.
79
+ const hidReady = hidConnected === true && hidBonded === true && hidPairing !== true;
80
+ // Strict AND: a "ready" live session requires both an active socket
81
+ // and a paired host. Using OR here would mask a dead session when a
82
+ // host is still paired from a previous run.
83
+ const liveReady = liveSessionActive === true && pairedHostReady === true;
84
+ const ageMs = getProfileAgeMs();
85
+ const stale = ageMs > PROFILE_STALE_THRESHOLD_MS;
86
+ return {
87
+ screen_sharing: {
88
+ ready: screenSharingReady,
89
+ reason: screenSharingReady
90
+ ? "recording_active=true"
91
+ : `recording_active=${recordingActive ?? "unknown"} — phone is not screen-sharing; start sharing in the app to enable screenshots`,
92
+ },
93
+ hid: {
94
+ ready: hidReady,
95
+ reason: hidReady
96
+ ? `connected=true, bonded=true, session_ready=${hidSessionReady ?? "unknown"}`
97
+ : `connected=${hidConnected ?? "unknown"}, bonded=${hidBonded ?? "unknown"}, pairing=${hidPairing ?? "unknown"}, session_ready=${hidSessionReady ?? "unknown"} — connect the ZhiHand (BLE HID) to enable input`,
98
+ },
99
+ live_session: {
100
+ ready: liveReady,
101
+ reason: liveReady
102
+ ? `live_session_active=${liveSessionActive ?? "-"}, paired_host_ready=${pairedHostReady ?? "-"}`
103
+ : `live_session_active=${liveSessionActive ?? "unknown"}, paired_host_ready=${pairedHostReady ?? "unknown"}`,
104
+ },
105
+ profile: {
106
+ age_ms: Number.isFinite(ageMs) ? ageMs : -1,
107
+ stale,
108
+ },
109
+ };
110
+ }
49
111
  // ── Extract helpers ───────────────────────────────────────
50
112
  function str(v, fallback) {
51
113
  return typeof v === "string" && v ? v : fallback;
@@ -121,6 +183,8 @@ export function updateDeviceProfile(raw) {
121
183
  }
122
184
  staticCtx = extractStatic(profile);
123
185
  dynamicCtx = extractDynamic(profile);
186
+ rawAttributes = profile;
187
+ profileReceivedAtMs = Date.now();
124
188
  loaded = true;
125
189
  dbg(`[device] Profile updated: platform=${staticCtx.platform}, model=${staticCtx.model}, screen=${staticCtx.screenWidthPx}x${staticCtx.screenHeightPx}`);
126
190
  }
@@ -176,6 +240,28 @@ export function buildControlToolDescription() {
176
240
  }
177
241
  return desc;
178
242
  }
243
+ export function buildSystemToolDescription() {
244
+ if (!loaded || staticCtx.platform === "unknown") {
245
+ return "System navigation and media controls. Actions: notification, recent, search, switch_input, siri (iOS), control_center (iOS), open_browser (Android), shortcut_help (Android), volume_up/down, mute, play_pause, stop, next/prev_track, fast_forward, rewind, brightness_up/down, power.";
246
+ }
247
+ const platform = staticCtx.platform;
248
+ const parts = [
249
+ `System navigation and media controls for ${platform} device (${staticCtx.model}).`,
250
+ ];
251
+ // Navigation
252
+ parts.push("Navigation: notification, recent, search (optional text query), switch_input.");
253
+ if (platform === "ios") {
254
+ parts.push("iOS: siri, control_center.");
255
+ }
256
+ else if (platform === "android") {
257
+ parts.push("Android: open_browser, shortcut_help.");
258
+ }
259
+ // Media
260
+ parts.push("Media: volume_up, volume_down, mute, play_pause, stop, next_track, prev_track, fast_forward, rewind.");
261
+ // Hardware
262
+ parts.push("Hardware: brightness_up, brightness_down, power.");
263
+ return parts.join(" ");
264
+ }
179
265
  export function buildScreenshotToolDescription() {
180
266
  if (!loaded || staticCtx.platform === "unknown") {
181
267
  return "Take a screenshot of the phone screen.";
@@ -183,8 +269,45 @@ export function buildScreenshotToolDescription() {
183
269
  return `Take a screenshot of the ${staticCtx.platform} device (${staticCtx.model}, ${staticCtx.screenWidthPx}x${staticCtx.screenHeightPx}).`;
184
270
  }
185
271
  // ── Format status for zhihand_status tool ─────────────────
272
+ // Allowlist of raw attribute keys exposed via zhihand_status.
273
+ // Keeps context window manageable and blocks sensitive/internal fields
274
+ // (e.g. credential_status, full_access_*). Wire-format names are kept
275
+ // verbatim so the LLM can cite them consistently with the server logs.
276
+ const RAW_ATTRIBUTE_ALLOWLIST = [
277
+ // Device identity
278
+ "brand", "manufacturer", "model", "rom_family", "rom_version",
279
+ "system_release", "api_level", "app_version", "app_build",
280
+ // Display / form factor
281
+ "display_width_px", "display_height_px", "density", "density_dpi",
282
+ "screen_width_dp", "screen_height_dp", "smallest_width_dp",
283
+ "form_factor", "orientation", "touchscreen", "navigation_mode",
284
+ // Locale / UI
285
+ "locale", "language", "timezone", "rtl", "dark_mode", "font_scale",
286
+ // Power / thermal / storage
287
+ "battery_level", "battery_state", "available_storage_mb",
288
+ "thermal_state", "low_ram_device",
289
+ // Network
290
+ "network_type",
291
+ // Capability / readiness signals (most important for LLM diagnosis)
292
+ "hid_connected", "hid_bonded", "hid_pairing", "hid_session_ready",
293
+ "live_session_active", "paired_host_ready", "recording_active",
294
+ "recording_archive_enabled", "app_in_foreground", "task_running",
295
+ "emergency_stop_armed", "firmware_update_in_progress",
296
+ "hardware_keyboard_present", "hard_keyboard_hidden",
297
+ "supports_keyboard_prompt_navigation",
298
+ ];
299
+ function pickAllowlistedRawAttributes() {
300
+ const out = {};
301
+ for (const k of RAW_ATTRIBUTE_ALLOWLIST) {
302
+ if (k in rawAttributes && rawAttributes[k] !== undefined) {
303
+ out[k] = rawAttributes[k];
304
+ }
305
+ }
306
+ return out;
307
+ }
186
308
  export function formatDeviceStatus() {
187
309
  return {
310
+ // Curated summary (human-readable, stable schema)
188
311
  platform: staticCtx.platform,
189
312
  model: staticCtx.model,
190
313
  os_version: staticCtx.osVersion,
@@ -203,5 +326,9 @@ export function formatDeviceStatus() {
203
326
  storage_available_mb: dynamicCtx.availableStorageMb,
204
327
  thermal: dynamicCtx.thermalState ?? "normal",
205
328
  font_scale: dynamicCtx.fontScale,
329
+ // Readiness — always present so LLM knows what works right now
330
+ capabilities: getCapabilities(),
331
+ // Full (allowlisted) attributes from the device — wire-format names
332
+ raw: pickAllowlistedRawAttributes(),
206
333
  };
207
334
  }
@@ -1,2 +1,13 @@
1
1
  import type { ZhiHandConfig } from "./config.ts";
2
+ export declare function getSnapshotStaleThresholdMs(): number;
3
+ export interface ScreenshotResult {
4
+ buffer: Buffer;
5
+ ageMs: number;
6
+ width: number;
7
+ height: number;
8
+ capturedAt: string | null;
9
+ sequence: number;
10
+ stale: boolean;
11
+ }
12
+ export declare function fetchScreenshot(config: ZhiHandConfig): Promise<ScreenshotResult>;
2
13
  export declare function fetchScreenshotBinary(config: ZhiHandConfig): Promise<Buffer>;
@@ -1,5 +1,24 @@
1
1
  import { dbg } from "../daemon/logger.js";
2
- export async function fetchScreenshotBinary(config) {
2
+ // Snapshot is considered stale if the server-reported age exceeds this
3
+ // threshold. Configurable via env ZHIHAND_SNAPSHOT_MAX_AGE_MS.
4
+ // Default 5s: typical HID command + capture + upload is well under 2s;
5
+ // anything beyond 5s suggests the phone is no longer actively sharing.
6
+ export function getSnapshotStaleThresholdMs() {
7
+ const raw = process.env.ZHIHAND_SNAPSHOT_MAX_AGE_MS;
8
+ if (raw) {
9
+ const n = Number(raw);
10
+ if (Number.isFinite(n) && n > 0)
11
+ return n;
12
+ }
13
+ return 5000;
14
+ }
15
+ function parseIntHeader(h) {
16
+ if (!h)
17
+ return -1;
18
+ const n = Number(h);
19
+ return Number.isFinite(n) ? n : -1;
20
+ }
21
+ export async function fetchScreenshot(config) {
3
22
  const controller = new AbortController();
4
23
  const timeoutMs = config.timeoutMs ?? 10_000;
5
24
  const timeout = setTimeout(() => controller.abort(), timeoutMs);
@@ -20,10 +39,31 @@ export async function fetchScreenshotBinary(config) {
20
39
  throw new Error(`Screenshot fetch failed: ${response.status}`);
21
40
  }
22
41
  const buf = Buffer.from(await response.arrayBuffer());
23
- dbg(`[screenshot] OK: ${(buf.length / 1024).toFixed(0)}KB in ${Date.now() - t0}ms`);
24
- return buf;
42
+ const ageMs = parseIntHeader(response.headers.get("x-snapshot-age"));
43
+ const width = parseIntHeader(response.headers.get("x-snapshot-width"));
44
+ const height = parseIntHeader(response.headers.get("x-snapshot-height"));
45
+ const sequence = parseIntHeader(response.headers.get("x-snapshot-sequence"));
46
+ const capturedAt = response.headers.get("x-snapshot-captured-at");
47
+ const threshold = getSnapshotStaleThresholdMs();
48
+ const stale = ageMs >= 0 && ageMs > threshold;
49
+ dbg(`[screenshot] OK: ${(buf.length / 1024).toFixed(0)}KB in ${Date.now() - t0}ms, age=${ageMs}ms, stale=${stale}`);
50
+ return {
51
+ buffer: buf,
52
+ ageMs,
53
+ width: Math.max(width, 0),
54
+ height: Math.max(height, 0),
55
+ capturedAt,
56
+ sequence,
57
+ stale,
58
+ };
25
59
  }
26
60
  finally {
27
61
  clearTimeout(timeout);
28
62
  }
29
63
  }
64
+ // Backward-compatible wrapper — returns only the Buffer.
65
+ // New code should prefer fetchScreenshot() for staleness info.
66
+ export async function fetchScreenshotBinary(config) {
67
+ const res = await fetchScreenshot(config);
68
+ return res.buffer;
69
+ }
@@ -372,6 +372,17 @@ function buildSystemContext() {
372
372
  else {
373
373
  openAppDoc = "- open_app: Open an app. Params: appPackage (Android, e.g. 'com.tencent.mm'), bundleId (iOS), urlScheme (e.g. 'weixin://')";
374
374
  }
375
+ // Platform-specific system actions
376
+ let platformSystemDoc;
377
+ if (static_?.platform === "ios") {
378
+ platformSystemDoc = "- siri: Activate Siri\n- control_center: Open Control Center";
379
+ }
380
+ else if (static_?.platform === "android") {
381
+ platformSystemDoc = "- open_browser: Launch default browser\n- shortcut_help: Show keyboard shortcuts overlay";
382
+ }
383
+ else {
384
+ platformSystemDoc = "- siri: Activate Siri (iOS only)\n- control_center: Open Control Center (iOS only)\n- open_browser: Launch default browser (Android only)\n- shortcut_help: Show keyboard shortcuts overlay (Android only)";
385
+ }
375
386
  return `You are ZhiHand, an AI assistant connected to the user's mobile phone via MCP tools.
376
387
 
377
388
  ## Device
@@ -401,13 +412,35 @@ ${openAppDoc}
401
412
  - screenshot: Capture screen via control (same as zhihand_screenshot)
402
413
  - wait: Wait before next action. Params: durationMs (default 1000)
403
414
 
415
+ ### zhihand_system
416
+ System navigation and media controls. Requires "action" parameter.
417
+
418
+ **System navigation:**
419
+ - notification: Open notification shade/center
420
+ - recent: Show app switcher / recent apps
421
+ - search: Open system search. Optional "text" param to type query after opening
422
+ - switch_input: Switch input method (only works in text input fields)
423
+ ${platformSystemDoc}
424
+
425
+ **Media controls:**
426
+ - volume_up / volume_down: Adjust volume
427
+ - mute: Toggle mute
428
+ - play_pause / stop: Playback control
429
+ - next_track / prev_track: Skip track
430
+ - fast_forward / rewind: Seek
431
+
432
+ **Hardware:**
433
+ - brightness_up / brightness_down: Adjust brightness
434
+ - power: Press power button
435
+
404
436
  ### zhihand_status
405
437
  Get device status: platform, battery, network, BLE connection, dark mode, storage, etc.
406
438
 
407
439
  ## Rules
408
440
  - When the user asks to see their screen, ALWAYS call zhihand_screenshot first.
409
- - When the user asks to open an app (e.g. WeChat, Settings), use open_app action.
410
- - When the user asks to go back/home, use back/home actions.
441
+ - When the user asks to open an app (e.g. WeChat, Settings), use open_app action with zhihand_control.
442
+ - When the user asks to go back/home, use back/home actions with zhihand_control.
443
+ - For system functions (notifications, volume, brightness, media), use zhihand_system.
411
444
  - For all tap/click operations, use xRatio and yRatio (0-1 normalized coordinates based on the screenshot).`;
412
445
  }
413
446
  /**
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
- export declare const PACKAGE_VERSION = "0.26.4";
2
+ export declare const PACKAGE_VERSION = "0.29.0";
3
3
  export declare function createServer(deviceName?: string): McpServer;
4
4
  export declare function startStdioServer(deviceName?: string): Promise<void>;
package/dist/index.js CHANGED
@@ -1,12 +1,13 @@
1
1
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
2
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
3
  import { resolveConfig } from "./core/config.js";
4
- import { controlSchema, screenshotSchema, pairSchema } from "./tools/schemas.js";
4
+ import { controlSchema, systemSchema, screenshotSchema, pairSchema } from "./tools/schemas.js";
5
5
  import { executeControl } from "./tools/control.js";
6
+ import { executeSystem } from "./tools/system.js";
6
7
  import { handleScreenshot } from "./tools/screenshot.js";
7
8
  import { handlePair } from "./tools/pair.js";
8
- import { getStaticContext, getDynamicContext, fetchDeviceProfile, buildControlToolDescription, buildScreenshotToolDescription, formatDeviceStatus, } from "./core/device.js";
9
- export const PACKAGE_VERSION = "0.26.4";
9
+ import { getStaticContext, getDynamicContext, fetchDeviceProfile, buildControlToolDescription, buildSystemToolDescription, buildScreenshotToolDescription, formatDeviceStatus, } from "./core/device.js";
10
+ export const PACKAGE_VERSION = "0.29.0";
10
11
  export function createServer(deviceName) {
11
12
  const server = new McpServer({
12
13
  name: "zhihand",
@@ -18,13 +19,18 @@ export function createServer(deviceName) {
18
19
  const config = resolveConfig(deviceName);
19
20
  return await executeControl(config, params);
20
21
  });
22
+ // zhihand_system — system navigation + media controls (separate tool per Gemini design review)
23
+ server.tool("zhihand_system", buildSystemToolDescription(), systemSchema, async (params) => {
24
+ const config = resolveConfig(deviceName);
25
+ return await executeSystem(config, params);
26
+ });
21
27
  // zhihand_screenshot — capture current screen without any action
22
28
  server.tool("zhihand_screenshot", buildScreenshotToolDescription(), screenshotSchema, async () => {
23
29
  const config = resolveConfig(deviceName);
24
30
  return await handleScreenshot(config);
25
31
  });
26
32
  // zhihand_status — return device context for LLM to query on demand
27
- server.tool("zhihand_status", "Get device status: platform, model, OS version, screen size, battery, network, BLE, dark mode, storage, and more.", {}, async () => {
33
+ server.tool("zhihand_status", "Get device status and capability readiness. Returns curated fields (platform, model, OS, screen, battery, network, BLE, ...), a `capabilities` object with `ready`/`reason` for screen_sharing, hid, live_session, profile.age, AND a `raw` map of allowlisted device attributes (wire-format names). Call this BEFORE issuing commands if you are unsure whether the phone is screen-sharing or the ZhiHand (BLE HID) is connected.", {}, async () => {
28
34
  return {
29
35
  content: [{
30
36
  type: "text",
@@ -1,20 +1,46 @@
1
1
  import { createControlCommand, enqueueCommand, formatAckSummary } from "../core/command.js";
2
- import { fetchScreenshotBinary } from "../core/screenshot.js";
2
+ import { fetchScreenshot } from "../core/screenshot.js";
3
3
  import { waitForCommandAck } from "../core/sse.js";
4
+ import { getCapabilities, isDeviceProfileLoaded } from "../core/device.js";
4
5
  function sleep(ms) {
5
6
  return new Promise((r) => setTimeout(r, ms));
6
7
  }
8
+ /**
9
+ * Build a short human-readable warning for the LLM if the underlying
10
+ * capability isn't ready, or if the last screenshot is stale. Returns
11
+ * empty string when everything is nominal.
12
+ */
13
+ function buildReadinessWarning(requiredCapability, screenshot) {
14
+ if (!isDeviceProfileLoaded())
15
+ return "";
16
+ const caps = getCapabilities();
17
+ const warnings = [];
18
+ if (requiredCapability === "hid" && !caps.hid.ready) {
19
+ warnings.push(`⚠️ HID not ready: ${caps.hid.reason}`);
20
+ }
21
+ if (requiredCapability === "screen" && !caps.screen_sharing.ready) {
22
+ warnings.push(`⚠️ Screen sharing not active: ${caps.screen_sharing.reason}`);
23
+ }
24
+ if (screenshot && screenshot.stale) {
25
+ warnings.push(`⚠️ Stale screenshot: age=${(screenshot.ageMs / 1000).toFixed(1)}s (phone may not be actively sharing the screen).`);
26
+ }
27
+ if (caps.profile.stale) {
28
+ warnings.push(`⚠️ Stale device profile: ${(caps.profile.age_ms / 1000).toFixed(1)}s old — readiness flags may be out of date.`);
29
+ }
30
+ return warnings.join("\n");
31
+ }
7
32
  export async function executeControl(config, params) {
8
33
  // wait: Plugin-local implementation, no server round-trip
9
34
  if (params.action === "wait") {
10
35
  await sleep(params.durationMs ?? 1000);
11
- const screenshot = await fetchScreenshotBinary(config);
12
- return {
13
- content: [
14
- { type: "text", text: `Waited ${params.durationMs ?? 1000}ms` },
15
- { type: "image", data: screenshot.toString("base64"), mimeType: "image/jpeg" },
16
- ],
17
- };
36
+ const shot = await fetchScreenshot(config);
37
+ const warning = buildReadinessWarning("screen", shot);
38
+ const content = [];
39
+ if (warning)
40
+ content.push({ type: "text", text: warning });
41
+ content.push({ type: "text", text: `Waited ${params.durationMs ?? 1000}ms` });
42
+ content.push({ type: "image", data: shot.buffer.toString("base64"), mimeType: "image/jpeg" });
43
+ return { content };
18
44
  }
19
45
  // screenshot: send receive_screenshot, App captures immediately (no 2s delay)
20
46
  if (params.action === "screenshot") {
@@ -24,29 +50,38 @@ export async function executeControl(config, params) {
24
50
  const command = createControlCommand(params);
25
51
  const queued = await enqueueCommand(config, command);
26
52
  const ack = await waitForCommandAck(config, { commandId: queued.id, timeoutMs: 15_000 });
27
- const content = [
28
- { type: "text", text: formatAckSummary(params.action, ack) },
29
- ];
53
+ const content = [];
54
+ let shot = null;
30
55
  if (ack.acked) {
31
56
  try {
32
- const screenshot = await fetchScreenshotBinary(config);
33
- content.push({ type: "image", data: screenshot.toString("base64"), mimeType: "image/jpeg" });
57
+ shot = await fetchScreenshot(config);
34
58
  }
35
59
  catch {
36
60
  // Screenshot is best-effort after ACK
37
61
  }
38
62
  }
63
+ const warning = buildReadinessWarning("hid", shot);
64
+ if (warning)
65
+ content.push({ type: "text", text: warning });
66
+ content.push({ type: "text", text: formatAckSummary(params.action, ack) });
67
+ if (shot) {
68
+ content.push({ type: "image", data: shot.buffer.toString("base64"), mimeType: "image/jpeg" });
69
+ }
39
70
  return { content };
40
71
  }
41
72
  export async function executeScreenshot(config) {
42
73
  const command = createControlCommand({ action: "screenshot" });
43
74
  const queued = await enqueueCommand(config, command);
44
75
  const ack = await waitForCommandAck(config, { commandId: queued.id, timeoutMs: 5_000 });
45
- const screenshot = await fetchScreenshotBinary(config);
46
- return {
47
- content: [
48
- { type: "text", text: `Screenshot captured (acked: ${ack.acked})` },
49
- { type: "image", data: screenshot.toString("base64"), mimeType: "image/jpeg" },
50
- ],
51
- };
76
+ const shot = await fetchScreenshot(config);
77
+ const warning = buildReadinessWarning("screen", shot);
78
+ const content = [];
79
+ if (warning)
80
+ content.push({ type: "text", text: warning });
81
+ content.push({
82
+ type: "text",
83
+ text: `Screenshot captured (acked: ${ack.acked}, age: ${shot.ageMs >= 0 ? `${shot.ageMs}ms` : "unknown"}, size: ${shot.width}x${shot.height}, seq: ${shot.sequence})`,
84
+ });
85
+ content.push({ type: "image", data: shot.buffer.toString("base64"), mimeType: "image/jpeg" });
86
+ return { content };
52
87
  }
@@ -16,6 +16,10 @@ export declare const controlSchema: {
16
16
  bundleId: z.ZodOptional<z.ZodString>;
17
17
  urlScheme: z.ZodOptional<z.ZodString>;
18
18
  };
19
+ export declare const systemSchema: {
20
+ action: z.ZodEnum<["notification", "recent", "search", "switch_input", "siri", "control_center", "open_browser", "shortcut_help", "volume_up", "volume_down", "mute", "play_pause", "stop", "next_track", "prev_track", "fast_forward", "rewind", "brightness_up", "brightness_down", "power"]>;
21
+ text: z.ZodOptional<z.ZodString>;
22
+ };
19
23
  export declare const screenshotSchema: {};
20
24
  export declare const pairSchema: {
21
25
  forceNew: z.ZodOptional<z.ZodDefault<z.ZodBoolean>>;
@@ -23,6 +23,24 @@ export const controlSchema = {
23
23
  bundleId: z.string().optional().describe("iOS bundle ID, e.g. 'com.tencent.xin'"),
24
24
  urlScheme: z.string().optional().describe("URL scheme, e.g. 'weixin://'"),
25
25
  };
26
+ // zhihand_system — system navigation + media controls (separate from UI control)
27
+ export const systemSchema = {
28
+ action: z.enum([
29
+ // System navigation — cross-platform
30
+ "notification", "recent", "search", "switch_input",
31
+ // System navigation — iOS only
32
+ "siri", "control_center",
33
+ // System navigation — Android only
34
+ "open_browser", "shortcut_help",
35
+ // Media controls — cross-platform
36
+ "volume_up", "volume_down", "mute",
37
+ "play_pause", "stop", "next_track", "prev_track",
38
+ "fast_forward", "rewind",
39
+ // Hardware — cross-platform
40
+ "brightness_up", "brightness_down", "power",
41
+ ]).describe("System or media action to perform"),
42
+ text: z.string().optional().describe("Optional text, e.g. search query for 'search' action"),
43
+ };
26
44
  export const screenshotSchema = {};
27
45
  export const pairSchema = {
28
46
  forceNew: z.boolean().default(false).optional().describe("Force new pairing even if already paired"),
@@ -0,0 +1,17 @@
1
+ /**
2
+ * zhihand_system tool handler — system navigation + media controls.
3
+ *
4
+ * Separated from zhihand_control to keep UI-control schema focused and
5
+ * reduce LLM parameter hallucination (Gemini design review recommendation).
6
+ */
7
+ import type { ZhiHandConfig } from "../core/config.ts";
8
+ import type { SystemParams } from "../core/command.ts";
9
+ type TextContent = {
10
+ type: "text";
11
+ text: string;
12
+ };
13
+ type ToolResult = {
14
+ content: TextContent[];
15
+ };
16
+ export declare function executeSystem(config: ZhiHandConfig, params: SystemParams): Promise<ToolResult>;
17
+ export {};
@@ -0,0 +1,11 @@
1
+ import { createSystemCommand, enqueueCommand, formatAckSummary } from "../core/command.js";
2
+ import { waitForCommandAck } from "../core/sse.js";
3
+ export async function executeSystem(config, params) {
4
+ const command = createSystemCommand(params);
5
+ const queued = await enqueueCommand(config, command);
6
+ const ack = await waitForCommandAck(config, { commandId: queued.id, timeoutMs: 15_000 });
7
+ const summary = formatAckSummary(params.action, ack);
8
+ return {
9
+ content: [{ type: "text", text: summary }],
10
+ };
11
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zhihand/mcp",
3
- "version": "0.26.4",
3
+ "version": "0.29.0",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "description": "ZhiHand MCP Server — phone control tools for Claude Code, Codex, Gemini CLI, and OpenClaw",