@zhihand/mcp 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/zhihand CHANGED
@@ -27,6 +27,7 @@ const { positionals, values } = parseArgs({
27
27
  help: { type: "boolean", short: "h", default: false },
28
28
  detach: { type: "boolean", short: "d", default: false },
29
29
  debug: { type: "boolean", default: false },
30
+ force: { type: "boolean", default: false },
30
31
  port: { type: "string" },
31
32
  },
32
33
  });
@@ -54,9 +55,10 @@ Usage:
54
55
  zhihand detect Detect available CLI tools
55
56
 
56
57
  zhihand list List all available tests with IDs
57
- zhihand test Run all safe device tests
58
+ zhihand test Run all safe device tests (skips capability-gated)
58
59
  zhihand test <ids> Run specific test(s), e.g. 'zhihand test 4' or '4,9,20'
59
60
  zhihand test all Run ALL tests (including unsafe, e.g. power button)
61
+ zhihand test --force Bypass capability gates (run anyway even if NOT ready)
60
62
  zhihand serve Start MCP Server (stdio mode, backward compat)
61
63
 
62
64
  Options:
@@ -65,6 +67,7 @@ Options:
65
67
  --port <port> Override daemon port (default: 18686)
66
68
  -d, --detach Run daemon in background
67
69
  --debug Enable verbose debug logging
70
+ --force (test only) Run tests even if capability not ready
68
71
  -h, --help Show this help
69
72
  `);
70
73
  process.exit(0);
@@ -294,13 +297,31 @@ switch (command) {
294
297
  const { resolveConfig: resolveTestConfig } = await import("../dist/core/config.js");
295
298
  const { createControlCommand, createSystemCommand, enqueueCommand } = await import("../dist/core/command.js");
296
299
  const { waitForCommandAck } = await import("../dist/core/sse.js");
297
- const { fetchScreenshotBinary } = await import("../dist/core/screenshot.js");
298
- const { fetchDeviceProfile, getStaticContext, isDeviceProfileLoaded, formatDeviceStatus } = await import("../dist/core/device.js");
300
+ const { fetchScreenshot, getSnapshotStaleThresholdMs } = await import("../dist/core/screenshot.js");
301
+ const { fetchDeviceProfile, getStaticContext, isDeviceProfileLoaded, formatDeviceStatus, getCapabilities } = await import("../dist/core/device.js");
299
302
 
300
303
  // ── Test Registry ────────────────────────────────────────
301
304
  // Kind: "profile" | "status" | "screenshot" | "hid" | "system"
305
+ // - Each kind maps to a required capability (see KIND_CAPABILITY).
302
306
  // Platform: undefined | "android" | "ios" (skipped on non-matching)
303
307
  // Unsafe: won't run in full-suite unless explicitly requested
308
+
309
+ // Required capability per test kind. Tests whose required capability
310
+ // is not ready are SKIPPED (not failed), unless --force is passed.
311
+ //
312
+ // NOTE: `system` commands (volume, brightness, notification, media,
313
+ // etc.) are executed by the phone app via native OS APIs
314
+ // (AccessibilityService on Android, Shortcuts/system hooks on iOS)
315
+ // and do NOT depend on the BLE HID channel. Only the `hid` kind
316
+ // (click, swipe, type, keycombo — which inject into the paired
317
+ // target via the ZhiHand peripheral) needs the HID capability.
318
+ const KIND_CAPABILITY = {
319
+ profile: "none",
320
+ status: "none",
321
+ screenshot: "screen",
322
+ hid: "hid",
323
+ system: "none",
324
+ };
304
325
  const REGISTRY = [
305
326
  // Phase A — Device Info API
306
327
  { id: 1, phase: "Device Info", label: "Fetch device profile", kind: "profile" },
@@ -391,6 +412,7 @@ switch (command) {
391
412
 
392
413
  // Parse which tests to run from positional args
393
414
  const filterArg = positionals[1]; // e.g. "4" or "4,9,20" or "all"
415
+ const forceRun = values.force === true;
394
416
  let selectedIds = null; // null = default (all safe)
395
417
  let includeUnsafe = false;
396
418
  if (filterArg) {
@@ -427,6 +449,24 @@ switch (command) {
427
449
  } catch { /* non-fatal — Test 1 will retry and platform will update */ }
428
450
  const getDevicePlatform = () => isDeviceProfileLoaded() ? getStaticContext().platform : "unknown";
429
451
 
452
+ // ── Capability readiness pre-flight ──
453
+ console.log(" ── Capability readiness ──");
454
+ const preCaps = isDeviceProfileLoaded() ? getCapabilities() : null;
455
+ if (!preCaps) {
456
+ console.log(" ⚠️ Device profile not loaded — all capability gates will allow tests through.");
457
+ } else {
458
+ const fmt = (name, cap) => ` ${cap.ready ? "✅" : "⚠️"} ${name.padEnd(14)} ${cap.ready ? "ready" : "NOT ready"} — ${cap.reason}`;
459
+ console.log(fmt("screen_sharing", preCaps.screen_sharing));
460
+ console.log(fmt("hid", preCaps.hid));
461
+ console.log(fmt("live_session", preCaps.live_session));
462
+ const ageStr = preCaps.profile.age_ms >= 0 ? `${(preCaps.profile.age_ms / 1000).toFixed(1)}s` : "unknown";
463
+ console.log(` ${preCaps.profile.stale ? "⚠️" : "✅"} profile age=${ageStr}${preCaps.profile.stale ? " (STALE)" : ""}`);
464
+ if (forceRun) {
465
+ console.log(" --force passed: capability gates disabled.");
466
+ }
467
+ }
468
+ console.log("");
469
+
430
470
  let passed = 0;
431
471
  let failed = 0;
432
472
  let skipped = 0;
@@ -489,6 +529,22 @@ switch (command) {
489
529
  return;
490
530
  }
491
531
 
532
+ // Capability gate (unless --force) — evaluated per-test so later
533
+ // tests see fresh readiness flags pushed after Test 1's profile fetch.
534
+ if (!forceRun && isDeviceProfileLoaded()) {
535
+ const requiredCap = KIND_CAPABILITY[t.kind] ?? "none";
536
+ if (requiredCap !== "none") {
537
+ const caps = getCapabilities();
538
+ const gate = requiredCap === "screen" ? caps.screen_sharing : caps.hid;
539
+ if (!gate.ready) {
540
+ totalSteps++;
541
+ skipped++;
542
+ console.log(` ${String(t.id).padStart(2)}. ${t.label}... ⏭️ Skipped (${requiredCap} not ready: ${gate.reason})`);
543
+ return;
544
+ }
545
+ }
546
+ }
547
+
492
548
  switch (t.kind) {
493
549
  case "profile": {
494
550
  totalSteps++;
@@ -516,14 +572,19 @@ switch (command) {
516
572
  process.stdout.write(` ${String(t.id).padStart(2)}. ${t.label}... `);
517
573
  try {
518
574
  const status = formatDeviceStatus();
519
- const ignoredDefaults = new Set(["unknown", "0x0", "-1% (unknown)", "0"]);
520
- const fields = Object.keys(status).filter((k) => {
521
- const v = status[k];
522
- if (v === null || v === undefined) return false;
523
- if (ignoredDefaults.has(String(v))) return false;
524
- return true;
525
- });
526
- console.log(`✅ ${fields.length} fields (${fields.join(", ")})`);
575
+ // Count curated top-level fields (excluding the nested 'raw'
576
+ // and 'capabilities' containers) plus every allowlisted raw
577
+ // attribute this is what the LLM actually sees via
578
+ // zhihand_status.
579
+ const topLevel = Object.keys(status).filter((k) => k !== "raw" && k !== "capabilities");
580
+ const rawKeys = Object.keys(status.raw ?? {});
581
+ const caps = status.capabilities ?? {};
582
+ const capReadySummary = ["screen_sharing", "hid", "live_session"]
583
+ .map((k) => `${k}=${caps[k]?.ready ? "ready" : "not-ready"}`)
584
+ .join(", ");
585
+ console.log(`✅ ${topLevel.length} curated + ${rawKeys.length} raw attributes; ${capReadySummary}`);
586
+ console.log(` curated: ${topLevel.join(", ")}`);
587
+ console.log(` raw: ${rawKeys.join(", ")}`);
527
588
  passed++;
528
589
  } catch (err) {
529
590
  console.log(`❌ ${err.message}`);
@@ -539,13 +600,24 @@ switch (command) {
539
600
  const cmd = createControlCommand({ action: "screenshot" });
540
601
  const queued = await enqueueCommand(testConfig, cmd);
541
602
  const ack = await waitForCommandAck(testConfig, { commandId: queued.id, timeoutMs: 10_000 });
542
- if (ack.acked) {
543
- const buf = await fetchScreenshotBinary(testConfig);
544
- console.log(`✅ ${(buf.length / 1024).toFixed(0)}KB (${Date.now() - t0}ms)`);
545
- passed++;
546
- } else {
603
+ if (!ack.acked) {
547
604
  console.log(`⏱️ Timeout (${Date.now() - t0}ms)`);
548
605
  failed++;
606
+ break;
607
+ }
608
+ const shot = await fetchScreenshot(testConfig);
609
+ const kb = (shot.buffer.length / 1024).toFixed(0);
610
+ const ms = Date.now() - t0;
611
+ // The screenshot endpoint returns the last cached frame even
612
+ // when the phone isn't actively sharing — the age header is
613
+ // the only way to tell. Treat stale as failure.
614
+ if (shot.stale) {
615
+ const threshold = getSnapshotStaleThresholdMs();
616
+ console.log(`❌ Stale (${kb}KB, age=${(shot.ageMs / 1000).toFixed(1)}s > ${(threshold / 1000).toFixed(1)}s) ${shot.width}x${shot.height} seq=${shot.sequence} — phone may not be screen-sharing (${ms}ms)`);
617
+ failed++;
618
+ } else {
619
+ console.log(`✅ ${kb}KB, ${shot.width}x${shot.height}, age=${shot.ageMs >= 0 ? `${shot.ageMs}ms` : "?"}, seq=${shot.sequence} (${ms}ms)`);
620
+ passed++;
549
621
  }
550
622
  } catch (err) {
551
623
  console.log(`❌ ${err.message} (${Date.now() - t0}ms)`);
@@ -37,7 +37,23 @@ export interface DynamicContext {
37
37
  }
38
38
  export declare function getStaticContext(): StaticContext;
39
39
  export declare function getDynamicContext(): DynamicContext;
40
+ export declare function getRawAttributes(): Record<string, unknown>;
41
+ export declare function getProfileAgeMs(): number;
40
42
  export declare function isDeviceProfileLoaded(): boolean;
43
+ export interface Capability {
44
+ ready: boolean;
45
+ reason: string;
46
+ }
47
+ export interface Capabilities {
48
+ screen_sharing: Capability;
49
+ hid: Capability;
50
+ live_session: Capability;
51
+ profile: {
52
+ age_ms: number;
53
+ stale: boolean;
54
+ };
55
+ }
56
+ export declare function getCapabilities(): Capabilities;
41
57
  export declare function extractStatic(profile: Record<string, unknown>): StaticContext;
42
58
  export declare function extractDynamic(profile: Record<string, unknown>): DynamicContext;
43
59
  export declare function updateDeviceProfile(raw: Record<string, unknown>): void;
@@ -36,6 +36,11 @@ const DEFAULT_DYNAMIC = {
36
36
  // ── Module state ──────────────────────────────────────────
37
37
  let staticCtx = { ...DEFAULT_STATIC };
38
38
  let dynamicCtx = { ...DEFAULT_DYNAMIC };
39
+ let rawAttributes = {};
40
+ // Local monotonic timestamp (Date.now()) captured when the profile was last
41
+ // updated. Used for age calculations — avoids distributed clock skew vs.
42
+ // reading server-side `updated_at`.
43
+ let profileReceivedAtMs = 0;
39
44
  let loaded = false;
40
45
  export function getStaticContext() {
41
46
  return staticCtx;
@@ -43,9 +48,66 @@ export function getStaticContext() {
43
48
  export function getDynamicContext() {
44
49
  return dynamicCtx;
45
50
  }
51
+ export function getRawAttributes() {
52
+ return rawAttributes;
53
+ }
54
+ export function getProfileAgeMs() {
55
+ if (!loaded || profileReceivedAtMs === 0)
56
+ return Number.POSITIVE_INFINITY;
57
+ return Date.now() - profileReceivedAtMs;
58
+ }
46
59
  export function isDeviceProfileLoaded() {
47
60
  return loaded;
48
61
  }
62
+ // Max age (ms) before the device profile is considered stale. Bounds to
63
+ // 60s: profile updates are pushed ~every 10–30s by the phone app.
64
+ const PROFILE_STALE_THRESHOLD_MS = 60_000;
65
+ export function getCapabilities() {
66
+ const a = rawAttributes;
67
+ const b = (k) => typeof a[k] === "boolean" ? a[k] : undefined;
68
+ const recordingActive = b("recording_active");
69
+ const hidConnected = b("hid_connected");
70
+ const hidBonded = b("hid_bonded");
71
+ const hidPairing = b("hid_pairing");
72
+ const hidSessionReady = b("hid_session_ready");
73
+ const liveSessionActive = b("live_session_active");
74
+ const pairedHostReady = b("paired_host_ready");
75
+ const screenSharingReady = recordingActive === true;
76
+ // HID is "ready" when we have a connected bonded peripheral and aren't
77
+ // mid-pairing. `hid_session_ready` is advisory — some devices keep it
78
+ // false while HID still works, so we don't require it.
79
+ const hidReady = hidConnected === true && hidBonded === true && hidPairing !== true;
80
+ // Strict AND: a "ready" live session requires both an active socket
81
+ // and a paired host. Using OR here would mask a dead session when a
82
+ // host is still paired from a previous run.
83
+ const liveReady = liveSessionActive === true && pairedHostReady === true;
84
+ const ageMs = getProfileAgeMs();
85
+ const stale = ageMs > PROFILE_STALE_THRESHOLD_MS;
86
+ return {
87
+ screen_sharing: {
88
+ ready: screenSharingReady,
89
+ reason: screenSharingReady
90
+ ? "recording_active=true"
91
+ : `recording_active=${recordingActive ?? "unknown"} — phone is not screen-sharing; start sharing in the app to enable screenshots`,
92
+ },
93
+ hid: {
94
+ ready: hidReady,
95
+ reason: hidReady
96
+ ? `connected=true, bonded=true, session_ready=${hidSessionReady ?? "unknown"}`
97
+ : `connected=${hidConnected ?? "unknown"}, bonded=${hidBonded ?? "unknown"}, pairing=${hidPairing ?? "unknown"}, session_ready=${hidSessionReady ?? "unknown"} — connect the ZhiHand (BLE HID) to enable input`,
98
+ },
99
+ live_session: {
100
+ ready: liveReady,
101
+ reason: liveReady
102
+ ? `live_session_active=${liveSessionActive ?? "-"}, paired_host_ready=${pairedHostReady ?? "-"}`
103
+ : `live_session_active=${liveSessionActive ?? "unknown"}, paired_host_ready=${pairedHostReady ?? "unknown"}`,
104
+ },
105
+ profile: {
106
+ age_ms: Number.isFinite(ageMs) ? ageMs : -1,
107
+ stale,
108
+ },
109
+ };
110
+ }
49
111
  // ── Extract helpers ───────────────────────────────────────
50
112
  function str(v, fallback) {
51
113
  return typeof v === "string" && v ? v : fallback;
@@ -121,6 +183,8 @@ export function updateDeviceProfile(raw) {
121
183
  }
122
184
  staticCtx = extractStatic(profile);
123
185
  dynamicCtx = extractDynamic(profile);
186
+ rawAttributes = profile;
187
+ profileReceivedAtMs = Date.now();
124
188
  loaded = true;
125
189
  dbg(`[device] Profile updated: platform=${staticCtx.platform}, model=${staticCtx.model}, screen=${staticCtx.screenWidthPx}x${staticCtx.screenHeightPx}`);
126
190
  }
@@ -205,8 +269,45 @@ export function buildScreenshotToolDescription() {
205
269
  return `Take a screenshot of the ${staticCtx.platform} device (${staticCtx.model}, ${staticCtx.screenWidthPx}x${staticCtx.screenHeightPx}).`;
206
270
  }
207
271
  // ── Format status for zhihand_status tool ─────────────────
272
+ // Allowlist of raw attribute keys exposed via zhihand_status.
273
+ // Keeps context window manageable and blocks sensitive/internal fields
274
+ // (e.g. credential_status, full_access_*). Wire-format names are kept
275
+ // verbatim so the LLM can cite them consistently with the server logs.
276
+ const RAW_ATTRIBUTE_ALLOWLIST = [
277
+ // Device identity
278
+ "brand", "manufacturer", "model", "rom_family", "rom_version",
279
+ "system_release", "api_level", "app_version", "app_build",
280
+ // Display / form factor
281
+ "display_width_px", "display_height_px", "density", "density_dpi",
282
+ "screen_width_dp", "screen_height_dp", "smallest_width_dp",
283
+ "form_factor", "orientation", "touchscreen", "navigation_mode",
284
+ // Locale / UI
285
+ "locale", "language", "timezone", "rtl", "dark_mode", "font_scale",
286
+ // Power / thermal / storage
287
+ "battery_level", "battery_state", "available_storage_mb",
288
+ "thermal_state", "low_ram_device",
289
+ // Network
290
+ "network_type",
291
+ // Capability / readiness signals (most important for LLM diagnosis)
292
+ "hid_connected", "hid_bonded", "hid_pairing", "hid_session_ready",
293
+ "live_session_active", "paired_host_ready", "recording_active",
294
+ "recording_archive_enabled", "app_in_foreground", "task_running",
295
+ "emergency_stop_armed", "firmware_update_in_progress",
296
+ "hardware_keyboard_present", "hard_keyboard_hidden",
297
+ "supports_keyboard_prompt_navigation",
298
+ ];
299
+ function pickAllowlistedRawAttributes() {
300
+ const out = {};
301
+ for (const k of RAW_ATTRIBUTE_ALLOWLIST) {
302
+ if (k in rawAttributes && rawAttributes[k] !== undefined) {
303
+ out[k] = rawAttributes[k];
304
+ }
305
+ }
306
+ return out;
307
+ }
208
308
  export function formatDeviceStatus() {
209
309
  return {
310
+ // Curated summary (human-readable, stable schema)
210
311
  platform: staticCtx.platform,
211
312
  model: staticCtx.model,
212
313
  os_version: staticCtx.osVersion,
@@ -225,5 +326,9 @@ export function formatDeviceStatus() {
225
326
  storage_available_mb: dynamicCtx.availableStorageMb,
226
327
  thermal: dynamicCtx.thermalState ?? "normal",
227
328
  font_scale: dynamicCtx.fontScale,
329
+ // Readiness — always present so LLM knows what works right now
330
+ capabilities: getCapabilities(),
331
+ // Full (allowlisted) attributes from the device — wire-format names
332
+ raw: pickAllowlistedRawAttributes(),
228
333
  };
229
334
  }
@@ -1,2 +1,13 @@
1
1
  import type { ZhiHandConfig } from "./config.ts";
2
+ export declare function getSnapshotStaleThresholdMs(): number;
3
+ export interface ScreenshotResult {
4
+ buffer: Buffer;
5
+ ageMs: number;
6
+ width: number;
7
+ height: number;
8
+ capturedAt: string | null;
9
+ sequence: number;
10
+ stale: boolean;
11
+ }
12
+ export declare function fetchScreenshot(config: ZhiHandConfig): Promise<ScreenshotResult>;
2
13
  export declare function fetchScreenshotBinary(config: ZhiHandConfig): Promise<Buffer>;
@@ -1,5 +1,24 @@
1
1
  import { dbg } from "../daemon/logger.js";
2
- export async function fetchScreenshotBinary(config) {
2
+ // Snapshot is considered stale if the server-reported age exceeds this
3
+ // threshold. Configurable via env ZHIHAND_SNAPSHOT_MAX_AGE_MS.
4
+ // Default 5s: typical HID command + capture + upload is well under 2s;
5
+ // anything beyond 5s suggests the phone is no longer actively sharing.
6
+ export function getSnapshotStaleThresholdMs() {
7
+ const raw = process.env.ZHIHAND_SNAPSHOT_MAX_AGE_MS;
8
+ if (raw) {
9
+ const n = Number(raw);
10
+ if (Number.isFinite(n) && n > 0)
11
+ return n;
12
+ }
13
+ return 5000;
14
+ }
15
+ function parseIntHeader(h) {
16
+ if (!h)
17
+ return -1;
18
+ const n = Number(h);
19
+ return Number.isFinite(n) ? n : -1;
20
+ }
21
+ export async function fetchScreenshot(config) {
3
22
  const controller = new AbortController();
4
23
  const timeoutMs = config.timeoutMs ?? 10_000;
5
24
  const timeout = setTimeout(() => controller.abort(), timeoutMs);
@@ -20,10 +39,31 @@ export async function fetchScreenshotBinary(config) {
20
39
  throw new Error(`Screenshot fetch failed: ${response.status}`);
21
40
  }
22
41
  const buf = Buffer.from(await response.arrayBuffer());
23
- dbg(`[screenshot] OK: ${(buf.length / 1024).toFixed(0)}KB in ${Date.now() - t0}ms`);
24
- return buf;
42
+ const ageMs = parseIntHeader(response.headers.get("x-snapshot-age"));
43
+ const width = parseIntHeader(response.headers.get("x-snapshot-width"));
44
+ const height = parseIntHeader(response.headers.get("x-snapshot-height"));
45
+ const sequence = parseIntHeader(response.headers.get("x-snapshot-sequence"));
46
+ const capturedAt = response.headers.get("x-snapshot-captured-at");
47
+ const threshold = getSnapshotStaleThresholdMs();
48
+ const stale = ageMs >= 0 && ageMs > threshold;
49
+ dbg(`[screenshot] OK: ${(buf.length / 1024).toFixed(0)}KB in ${Date.now() - t0}ms, age=${ageMs}ms, stale=${stale}`);
50
+ return {
51
+ buffer: buf,
52
+ ageMs,
53
+ width: Math.max(width, 0),
54
+ height: Math.max(height, 0),
55
+ capturedAt,
56
+ sequence,
57
+ stale,
58
+ };
25
59
  }
26
60
  finally {
27
61
  clearTimeout(timeout);
28
62
  }
29
63
  }
64
+ // Backward-compatible wrapper — returns only the Buffer.
65
+ // New code should prefer fetchScreenshot() for staleness info.
66
+ export async function fetchScreenshotBinary(config) {
67
+ const res = await fetchScreenshot(config);
68
+ return res.buffer;
69
+ }
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
1
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
- export declare const PACKAGE_VERSION = "0.28.0";
2
+ export declare const PACKAGE_VERSION = "0.29.0";
3
3
  export declare function createServer(deviceName?: string): McpServer;
4
4
  export declare function startStdioServer(deviceName?: string): Promise<void>;
package/dist/index.js CHANGED
@@ -7,7 +7,7 @@ import { executeSystem } from "./tools/system.js";
7
7
  import { handleScreenshot } from "./tools/screenshot.js";
8
8
  import { handlePair } from "./tools/pair.js";
9
9
  import { getStaticContext, getDynamicContext, fetchDeviceProfile, buildControlToolDescription, buildSystemToolDescription, buildScreenshotToolDescription, formatDeviceStatus, } from "./core/device.js";
10
- export const PACKAGE_VERSION = "0.28.0";
10
+ export const PACKAGE_VERSION = "0.29.0";
11
11
  export function createServer(deviceName) {
12
12
  const server = new McpServer({
13
13
  name: "zhihand",
@@ -30,7 +30,7 @@ export function createServer(deviceName) {
30
30
  return await handleScreenshot(config);
31
31
  });
32
32
  // zhihand_status — return device context for LLM to query on demand
33
- server.tool("zhihand_status", "Get device status: platform, model, OS version, screen size, battery, network, BLE, dark mode, storage, and more.", {}, async () => {
33
+ server.tool("zhihand_status", "Get device status and capability readiness. Returns curated fields (platform, model, OS, screen, battery, network, BLE, ...), a `capabilities` object with `ready`/`reason` for screen_sharing, hid, live_session, profile.age, AND a `raw` map of allowlisted device attributes (wire-format names). Call this BEFORE issuing commands if you are unsure whether the phone is screen-sharing or the ZhiHand (BLE HID) is connected.", {}, async () => {
34
34
  return {
35
35
  content: [{
36
36
  type: "text",
@@ -1,20 +1,46 @@
1
1
  import { createControlCommand, enqueueCommand, formatAckSummary } from "../core/command.js";
2
- import { fetchScreenshotBinary } from "../core/screenshot.js";
2
+ import { fetchScreenshot } from "../core/screenshot.js";
3
3
  import { waitForCommandAck } from "../core/sse.js";
4
+ import { getCapabilities, isDeviceProfileLoaded } from "../core/device.js";
4
5
  function sleep(ms) {
5
6
  return new Promise((r) => setTimeout(r, ms));
6
7
  }
8
+ /**
9
+ * Build a short human-readable warning for the LLM if the underlying
10
+ * capability isn't ready, or if the last screenshot is stale. Returns
11
+ * empty string when everything is nominal.
12
+ */
13
+ function buildReadinessWarning(requiredCapability, screenshot) {
14
+ if (!isDeviceProfileLoaded())
15
+ return "";
16
+ const caps = getCapabilities();
17
+ const warnings = [];
18
+ if (requiredCapability === "hid" && !caps.hid.ready) {
19
+ warnings.push(`⚠️ HID not ready: ${caps.hid.reason}`);
20
+ }
21
+ if (requiredCapability === "screen" && !caps.screen_sharing.ready) {
22
+ warnings.push(`⚠️ Screen sharing not active: ${caps.screen_sharing.reason}`);
23
+ }
24
+ if (screenshot && screenshot.stale) {
25
+ warnings.push(`⚠️ Stale screenshot: age=${(screenshot.ageMs / 1000).toFixed(1)}s (phone may not be actively sharing the screen).`);
26
+ }
27
+ if (caps.profile.stale) {
28
+ warnings.push(`⚠️ Stale device profile: ${(caps.profile.age_ms / 1000).toFixed(1)}s old — readiness flags may be out of date.`);
29
+ }
30
+ return warnings.join("\n");
31
+ }
7
32
  export async function executeControl(config, params) {
8
33
  // wait: Plugin-local implementation, no server round-trip
9
34
  if (params.action === "wait") {
10
35
  await sleep(params.durationMs ?? 1000);
11
- const screenshot = await fetchScreenshotBinary(config);
12
- return {
13
- content: [
14
- { type: "text", text: `Waited ${params.durationMs ?? 1000}ms` },
15
- { type: "image", data: screenshot.toString("base64"), mimeType: "image/jpeg" },
16
- ],
17
- };
36
+ const shot = await fetchScreenshot(config);
37
+ const warning = buildReadinessWarning("screen", shot);
38
+ const content = [];
39
+ if (warning)
40
+ content.push({ type: "text", text: warning });
41
+ content.push({ type: "text", text: `Waited ${params.durationMs ?? 1000}ms` });
42
+ content.push({ type: "image", data: shot.buffer.toString("base64"), mimeType: "image/jpeg" });
43
+ return { content };
18
44
  }
19
45
  // screenshot: send receive_screenshot, App captures immediately (no 2s delay)
20
46
  if (params.action === "screenshot") {
@@ -24,29 +50,38 @@ export async function executeControl(config, params) {
24
50
  const command = createControlCommand(params);
25
51
  const queued = await enqueueCommand(config, command);
26
52
  const ack = await waitForCommandAck(config, { commandId: queued.id, timeoutMs: 15_000 });
27
- const content = [
28
- { type: "text", text: formatAckSummary(params.action, ack) },
29
- ];
53
+ const content = [];
54
+ let shot = null;
30
55
  if (ack.acked) {
31
56
  try {
32
- const screenshot = await fetchScreenshotBinary(config);
33
- content.push({ type: "image", data: screenshot.toString("base64"), mimeType: "image/jpeg" });
57
+ shot = await fetchScreenshot(config);
34
58
  }
35
59
  catch {
36
60
  // Screenshot is best-effort after ACK
37
61
  }
38
62
  }
63
+ const warning = buildReadinessWarning("hid", shot);
64
+ if (warning)
65
+ content.push({ type: "text", text: warning });
66
+ content.push({ type: "text", text: formatAckSummary(params.action, ack) });
67
+ if (shot) {
68
+ content.push({ type: "image", data: shot.buffer.toString("base64"), mimeType: "image/jpeg" });
69
+ }
39
70
  return { content };
40
71
  }
41
72
  export async function executeScreenshot(config) {
42
73
  const command = createControlCommand({ action: "screenshot" });
43
74
  const queued = await enqueueCommand(config, command);
44
75
  const ack = await waitForCommandAck(config, { commandId: queued.id, timeoutMs: 5_000 });
45
- const screenshot = await fetchScreenshotBinary(config);
46
- return {
47
- content: [
48
- { type: "text", text: `Screenshot captured (acked: ${ack.acked})` },
49
- { type: "image", data: screenshot.toString("base64"), mimeType: "image/jpeg" },
50
- ],
51
- };
76
+ const shot = await fetchScreenshot(config);
77
+ const warning = buildReadinessWarning("screen", shot);
78
+ const content = [];
79
+ if (warning)
80
+ content.push({ type: "text", text: warning });
81
+ content.push({
82
+ type: "text",
83
+ text: `Screenshot captured (acked: ${ack.acked}, age: ${shot.ageMs >= 0 ? `${shot.ageMs}ms` : "unknown"}, size: ${shot.width}x${shot.height}, seq: ${shot.sequence})`,
84
+ });
85
+ content.push({ type: "image", data: shot.buffer.toString("base64"), mimeType: "image/jpeg" });
86
+ return { content };
52
87
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zhihand/mcp",
3
- "version": "0.28.0",
3
+ "version": "0.29.0",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "description": "ZhiHand MCP Server — phone control tools for Claude Code, Codex, Gemini CLI, and OpenClaw",