npm - bloby-bot - Versions diffs - 0.54.10 → 0.54.12 - Mend

bloby-bot 0.54.10 → 0.54.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/package.json +1 -1
package/supervisor/harnesses/codex.ts +177 -26
package/supervisor/index.ts +22 -0
package/supervisor/scheduler.ts +20 -0
package/workspace/skills/alexa/SKILL.md +5 -0
package/workspace/skills/mac/SKILL.md +182 -191
package/workspace/skills/mac/presets/PRESETS.md +28 -34
package/workspace/skills/mac/skill.json +3 -3
package/workspace/skills/plaud/SKILL.md +5 -0
package/workspace/skills/whatsapp/SKILL.md +5 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "bloby-bot",
-  "version": "0.54.10",
+  "version": "0.54.12",
   "releaseNotes": [
     "1. New Morphy animation system: config-driven sprites loaded from /morphy/*.json",
     "2. Swapped teleporting (splash) and headphones (bubble + chat) to the new format",

package/supervisor/harnesses/codex.ts CHANGED Viewed

@@ -44,11 +44,13 @@ const CLIENT_INFO = { name: 'bloby', title: 'Bloby', version: '1' };
 const REQUEST_TIMEOUT_MS = 60_000;
 const VALID_EFFORTS = new Set(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']);
 /**
- * Per-turn watchdog. `turn/completed` is a NON-guaranteed notification — if the
- * app-server stalls mid-turn without exiting, the RPC `exit` handler never fires
- * and `busy` stays true forever (live: wedges the dashboard + defers backend
- * restarts; one-shot: pins the WhatsApp/scheduler slot since bot:done never
- * arrives). Claude's one-shot path has the same 5-min guard. Mirrors it here.
+ * Per-turn IDLE watchdog. `turn/completed` is a NON-guaranteed notification — if
+ * the app-server stalls mid-turn without exiting, the RPC `exit` handler never
+ * fires and `busy` stays true forever (live: wedges the dashboard + defers
+ * backend restarts; one-shot: pins the WhatsApp/scheduler slot since bot:done
+ * never arrives). This is an IDLE timeout, reset on every notification for the
+ * conversation — a legitimately long turn (deep reasoning, many tool calls)
+ * keeps emitting events and is never killed; only true silence trips recovery.
  */
 const TURN_WATCHDOG_MS = 5 * 60_000;
@@ -172,8 +174,8 @@ class CodexRpc {
   private closed = false;
   private stderrBuf = '';
-  start(): void {
-    this.proc = spawn(resolveCodexBin(), ['app-server'], { stdio: ['pipe', 'pipe', 'pipe'] });
+  start(extraArgs: string[] = []): void {
+    this.proc = spawn(resolveCodexBin(), ['app-server', ...extraArgs], { stdio: ['pipe', 'pipe', 'pipe'] });
     const rl = readline.createInterface({ input: this.proc.stdout });
     rl.on('line', (line) => this.onLine(line));
@@ -246,14 +248,30 @@ class CodexRpc {
   }
   private handleServerRequest(msg: { id: number; method: string; params?: any }): void {
-    const isApproval = msg.method.endsWith('/requestApproval');
-    if (isApproval) {
-      log.info(`[codex-rpc] auto-accepting server request: ${msg.method}`);
-      this.respond(msg.id, 'acceptForSession');
-      return;
+    // Responses are OBJECTS, not bare strings: CommandExecution/FileChange approval
+    // responses are `{ decision }` (CommandExecutionApprovalDecision / FileChangeApprovalDecision),
+    // and the legacy v1 aliases take `{ decision }` with the ReviewDecision enum.
+    // (None of these fire under approvalPolicy:'never' + danger-full-access, but reply
+    // correctly so an edge-case request can't stall the turn with a malformed reply.)
+    switch (msg.method) {
+      case 'item/commandExecution/requestApproval':
+      case 'item/fileChange/requestApproval':
+        log.info(`[codex-rpc] auto-accepting ${msg.method}`);
+        this.respond(msg.id, { decision: 'acceptForSession' });
+        return;
+      case 'execCommandApproval':
+      case 'applyPatchApproval':
+        log.info(`[codex-rpc] auto-accepting (legacy) ${msg.method}`);
+        this.respond(msg.id, { decision: 'approved_for_session' });
+        return;
+      // account/chatgptAuthTokens/refresh is only used by client-managed-token
+      // clients; Bloby authenticates via chatgpt OAuth and the app-server refreshes
+      // ~/.codex/auth.json itself, so this server-request never fires for us. Decline
+      // cleanly (a stale-credential edge would surface as a normal turn error instead).
+      default:
+        log.warn(`[codex-rpc] unhandled server request ${msg.method} — replying -32601`);
+        this.respondError(msg.id, -32601, `Method ${msg.method} not implemented by Bloby client`);
     }
-    log.warn(`[codex-rpc] unhandled server request ${msg.method} — replying with error`);
-    this.respondError(msg.id, -32601, `Method ${msg.method} not implemented by Bloby client`);
   }
   private respond(id: number, result: any): void {
@@ -304,8 +322,16 @@ class CodexRpc {
       p.reject(new Error('RPC connection closed'));
     }
     this.pending.clear();
-    try { this.proc?.stdin.end(); } catch {}
-    try { this.proc?.kill('SIGTERM'); } catch {}
+    const proc = this.proc;
+    try { proc?.stdin.end(); } catch {}
+    try { proc?.kill('SIGTERM'); } catch {}
+    // Escalate to SIGKILL if the app-server ignores SIGTERM (no true leak today
+    // since SIGTERM reaps it, but a SIGTERM-ignoring build would otherwise survive).
+    if (proc) {
+      const t = setTimeout(() => { try { proc.kill('SIGKILL'); } catch {} }, 2000);
+      if (typeof t.unref === 'function') t.unref();
+      proc.once('exit', () => clearTimeout(t));
+    }
     this.proc = null;
   }
 }
@@ -320,6 +346,9 @@ interface CodexConversation {
   onMessage: OnAgentMessage;
   /** Currently in-flight turn id (set on `turn/started`, cleared on `turn/completed`). */
   currentTurnId: string | null;
+  /** itemId of the agentMessage currently streaming — used to insert a paragraph
+   *  break when a turn emits multiple separate agentMessage items. */
+  currentMsgItemId: string | null;
   /** Streaming text accumulator for the current turn's agentMessage items. */
   fullText: string;
   /** Tools/items used during the current turn, for the bot:turn-complete payload. */
@@ -462,10 +491,14 @@ async function steerOrQueue(conv: CodexConversation, content: string, savedFiles
 function handleNotification(conv: CodexConversation, n: { method: string; params?: any }): void {
   const p = n.params || {};
+  // Any notification for this conv proves the app-server is alive and working —
+  // reset the idle watchdog so a long-but-active turn isn't torn down.
+  if (conv.turnWatchdog) armTurnWatchdog(conv);
   switch (n.method) {
     case 'turn/started': {
       conv.currentTurnId = p.turn?.id || null;
       conv.fullText = '';
+      conv.currentMsgItemId = null;
       conv.usedFileTools = false;
       break;
     }
@@ -473,6 +506,13 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
     case 'item/agentMessage/delta': {
       const delta: string = p.delta || '';
       if (!delta) break;
+      // A turn can emit multiple agentMessage items (commentary then final_answer).
+      // On a new itemId, insert a paragraph break so they don't run together (mirrors claude.ts).
+      if (p.itemId && conv.currentMsgItemId && p.itemId !== conv.currentMsgItemId && conv.fullText && !conv.fullText.endsWith('\n')) {
+        conv.fullText += '\n\n';
+        conv.onMessage('bot:token', { conversationId: conv.id, token: '\n\n' });
+      }
+      if (p.itemId) conv.currentMsgItemId = p.itemId;
       conv.fullText += delta;
       conv.onMessage('bot:token', { conversationId: conv.id, token: delta });
       break;
@@ -512,6 +552,17 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
             input: { query: item.query || '' },
           });
           break;
+        case 'collabAgentToolCall':
+          // Codex's collaborating sub-agents (rarely enabled) → Bloby's sub-agent UX.
+          if (item.tool === 'spawnAgent') {
+            conv.onMessage('bot:task-created', {
+              conversationId: conv.id,
+              taskId: item.id,
+              description: item.prompt || 'sub-agent',
+              type: 'collab',
+            });
+          }
+          break;
         // userMessage / agentMessage / reasoning — no tool-style event.
       }
       break;
@@ -528,6 +579,14 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
           conv.onMessage('bot:token', { conversationId: conv.id, token: text });
         }
       }
+      if (item.type === 'collabAgentToolCall' && item.tool === 'spawnAgent') {
+        conv.onMessage('bot:task-done', {
+          conversationId: conv.id,
+          taskId: item.id,
+          status: item.status,
+          summary: item.prompt || '',
+        });
+      }
       break;
     }
@@ -598,8 +657,17 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
       break;
     }
-    // thread/started, thread/status/changed, mcpServer/startupStatus/updated,
-    // remoteControl/status/changed — informational, no-op for the dashboard.
+    case 'mcpServer/startupStatus/updated': {
+      // Surface MCP servers (from MCP.json → -c overrides) that fail to start,
+      // so a misconfigured server is visible instead of silently absent.
+      if (p.status === 'failed' || p.status === 'cancelled') {
+        log.warn(`[codex] MCP server "${p.name}" ${p.status}${p.error ? `: ${p.error}` : ''}`);
+      }
+      break;
+    }
+    // thread/started, thread/status/changed, remoteControl/status/changed —
+    // informational, no-op for the dashboard.
   }
 }
@@ -631,7 +699,7 @@ async function spawnAndInitialize(
   const { id: modelId, effort } = parseModelString(model);
   const rpc = new CodexRpc();
-  rpc.start();
+  rpc.start(buildMcpConfigArgs());
   const conv: CodexConversation = {
     id: conversationId,
@@ -640,6 +708,7 @@ async function spawnAndInitialize(
     effort,
     onMessage,
     currentTurnId: null,
+    currentMsgItemId: null,
     fullText: '',
     usedFileTools: false,
     pendingInputs: [],
@@ -703,27 +772,109 @@ async function spawnAndInitialize(
 }
 const SKILLS_DIR = path.join(WORKSPACE_DIR, 'skills');
+// Codex discovers "repo"-scope skills under `<cwd>/.codex/skills` (verified
+// against 0.135.0 — a bare `<cwd>/skills` is NOT scanned, and `skills/list`
+// has no extra-root param). Bloby keeps the canonical skills in
+// `workspace/skills/<name>`, so we mirror each one into `.codex/skills/<name>`
+// as a symlink — single source of truth, discoverable by codex's native router.
+// (Each SKILL.md needs YAML frontmatter or codex rejects it — see SKILL_FORMAT_MIGRATION.md.)
+const CODEX_SKILLS_ROOT = path.join(WORKSPACE_DIR, '.codex', 'skills');
+/** Mirror workspace/skills/<name> → workspace/.codex/skills/<name> as symlinks (idempotent). */
+function syncCodexSkillRoot(): void {
+  let names: string[] = [];
+  try {
+    names = fs.readdirSync(SKILLS_DIR, { withFileTypes: true })
+      .filter((e) => e.isDirectory() || e.isSymbolicLink())
+      .map((e) => e.name);
+  } catch {
+    return; // no skills dir — nothing to mirror
+  }
+  try { fs.mkdirSync(CODEX_SKILLS_ROOT, { recursive: true }); } catch {}
+  for (const name of names) {
+    const target = path.join(SKILLS_DIR, name);
+    const link = path.join(CODEX_SKILLS_ROOT, name);
+    try {
+      const cur = fs.existsSync(link) ? fs.realpathSync(link) : null;
+      if (cur === fs.realpathSync(target)) continue; // already correct
+      try { fs.rmSync(link, { recursive: true, force: true }); } catch {}
+      fs.symlinkSync(target, link, 'dir');
+    } catch (err: any) {
+      log.warn(`[codex] could not mirror skill "${name}" into .codex/skills: ${err.message}`);
+    }
+  }
+}
 function primeWorkspaceSkills(rpc: CodexRpc): void {
+  syncCodexSkillRoot();
   rpc.request('skills/list', {
     cwds: [WORKSPACE_DIR],
     forceReload: true,
-    perCwdExtraUserRoots: [{
-      cwd: WORKSPACE_DIR,
-      extraUserRoots: [SKILLS_DIR],
-    }],
   }).then((result: any) => {
     const entry = result?.data?.[0];
     const all = entry?.skills ?? [];
-    const workspace = all.filter((s: any) => s.scope !== 'system');
+    const repo = all.filter((s: any) => s.scope === 'repo');
     const errors = entry?.errors ?? [];
-    log.ok(`[codex] skills primed: ${workspace.length} workspace, ${all.length - workspace.length} system${errors.length ? `, ${errors.length} rejected` : ''}`);
+    log.ok(`[codex] skills primed: ${repo.length} workspace (repo), ${all.length - repo.length} user/system${errors.length ? `, ${errors.length} rejected` : ''}`);
     for (const err of errors) log.warn(`[codex] skill load error: ${err.path} — ${err.message}`);
   }).catch((err: any) => {
     log.warn(`[codex] skills/list failed: ${err.message}`);
   });
 }
+/* ── MCP wiring ────────────────────────────────────────────────────────── */
+const MCP_CONFIG_FILE = path.join(WORKSPACE_DIR, 'MCP.json');
+/**
+ * Load MCP servers from workspace/MCP.json (the same file the Claude harness
+ * reads). Accepts the canonical unwrapped map `{ name: { command, args, env } }`,
+ * a `{ mcpServers: {...} }` wrapper, or a legacy array of single-key maps.
+ * Returns {} when absent/invalid — so this is a no-op until the user populates MCP.json.
+ */
+function loadMcpServersForCodex(): Record<string, any> {
+  try {
+    const raw = JSON.parse(fs.readFileSync(MCP_CONFIG_FILE, 'utf-8'));
+    let map: any = raw;
+    if (raw && typeof raw === 'object' && raw.mcpServers && typeof raw.mcpServers === 'object') map = raw.mcpServers;
+    else if (Array.isArray(raw)) map = Object.assign({}, ...raw);
+    if (map && typeof map === 'object' && !Array.isArray(map)) return map;
+  } catch {}
+  return {};
+}
+/** Serialize a JS value as a TOML literal for a `-c key=value` override. */
+function toToml(v: any): string {
+  if (Array.isArray(v)) return `[${v.map(toToml).join(',')}]`;
+  if (v && typeof v === 'object') return `{${Object.entries(v).map(([k, val]) => `${JSON.stringify(k)}=${toToml(val)}`).join(',')}}`;
+  if (typeof v === 'number' || typeof v === 'boolean') return String(v);
+  return JSON.stringify(String(v)); // TOML basic string — JSON escaping is compatible
+}
+/**
+ * Translate MCP.json into `codex app-server -c mcp_servers.<name>.<field>=<toml>`
+ * spawn flags. Codex sources MCP from its own config layer rather than a per-query
+ * param (verified against 0.135.0: a `-c mcp_servers.X.command=...` override shows
+ * up in both mcpServerStatus/list and config/read). Only the stdio fields Bloby
+ * uses (command/args/env) are translated; names must be TOML-bare-key safe.
+ */
+function buildMcpConfigArgs(): string[] {
+  const servers = loadMcpServersForCodex();
+  const args: string[] = [];
+  let wired = 0;
+  for (const [name, cfg] of Object.entries(servers)) {
+    if (!/^[A-Za-z0-9_-]+$/.test(name)) { log.warn(`[codex] skipping MCP server "${name}" — name not TOML-bare-key safe`); continue; }
+    const c: any = cfg || {};
+    if (!c.command) { log.warn(`[codex] skipping MCP server "${name}" — no command`); continue; }
+    args.push('-c', `mcp_servers.${name}.command=${toToml(c.command)}`);
+    if (Array.isArray(c.args) && c.args.length) args.push('-c', `mcp_servers.${name}.args=${toToml(c.args)}`);
+    if (c.env && typeof c.env === 'object' && Object.keys(c.env).length) args.push('-c', `mcp_servers.${name}.env=${toToml(c.env)}`);
+    wired++;
+  }
+  if (wired) log.info(`[codex] wiring ${wired} MCP server(s) from MCP.json via -c overrides`);
+  return args;
+}
 /* ── Harness implementation ────────────────────────────────────────────── */
 export function hasConversation(conversationId: string): boolean {
@@ -856,7 +1007,7 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
   const timeout = Math.min(Math.max(req.timeout || 120_000, 5_000), 300_000);
   const rpc = new CodexRpc();
-  rpc.start();
+  rpc.start(buildMcpConfigArgs());
   let fullText = '';
   const usedTools = new Set<string>();

package/supervisor/index.ts CHANGED Viewed

@@ -1856,6 +1856,28 @@ mint();
         };
       }
+      // Same for Codex OAuth: the app-server reads ~/.codex/auth.json at spawn, so a
+      // running subprocess only adopts a new identity on re-spawn. End live conversations
+      // after a successful exchange so the next message cold-starts with the fresh token.
+      // (Wraps only the one-shot /exchange; the device-code /status route latches
+      //  success on every poll and must not re-fire teardown — handled at re-spawn instead.)
+      if (req.method === 'POST' && req.url === '/api/auth/codex/exchange') {
+        const origEnd = res.end.bind(res);
+        (res as any).end = function (this: typeof res, ...args: any[]) {
+          try {
+            const body = typeof args[0] === 'string' ? args[0] : args[0]?.toString();
+            if (body) {
+              const json = JSON.parse(body);
+              if (json.success) {
+                log.info('[orchestrator] Codex re-auth succeeded — restarting conversations with fresh token');
+                endAllConversations();
+              }
+            }
+          } catch {}
+          return origEnd(...args);
+        };
+      }
       workerApp(req, res);
       return;
     }

package/supervisor/scheduler.ts CHANGED Viewed

@@ -160,6 +160,26 @@ function triggerAgent(prompt: string, label: string, onComplete?: () => void) {
       }
       if (type === 'bot:done') {
+        // ── Mac channel push (server-initiated) ──
+        // When the `mac` skill is installed, it instructs the agent to wrap any
+        // Mac-bound pulse/cron output in <mac_push>…</mac_push> (a spoken line +
+        // an optional <notch_card>/<notch_html>). Forward each block's inner
+        // content over the chat WebSocket as an unsolicited `mac:push` frame —
+        // the Morphy Mac app renders it (notch card + TTS) without the user
+        // having pushed to talk. If the skill isn't installed the agent never
+        // emits this tag, so nothing is pushed and non-Mac users carry no weight.
+        if (fullResponse) {
+          const macPushRegex = /<mac_push>([\s\S]*?)<\/mac_push>/g;
+          let macMatch;
+          while ((macMatch = macPushRegex.exec(fullResponse)) !== null) {
+            const macContent = macMatch[1].trim();
+            if (macContent) {
+              broadcastBloby('mac:push', { content: macContent });
+              log.info(`[scheduler] Mac push broadcast (${macContent.length} chars)`);
+            }
+          }
+        }
         // Extract <Message> blocks after agent turn completes
         if (fullResponse) {
           const messageRegex = /<Message(?:\s+([^>]*))?>(([\s\S]*?))<\/Message>/g;

package/workspace/skills/alexa/SKILL.md CHANGED Viewed

@@ -1,3 +1,8 @@
+---
+name: alexa
+description: "Alexa voice channel for your agent via the public 'Morphy' skill. Code-based pairing, voice-first response style, three-pattern decision tree (fast / chat-deferred / HA-announce-deferred)."
+---
 # Alexa
 ## What This Is

package/workspace/skills/mac/SKILL.md CHANGED Viewed

@@ -1,287 +1,276 @@
-# Mac (Morphy notch)
+---
+name: mac
+description: "Morphy native macOS companion. Activates on the [Mac] tag. You reply with a concise spoken line (TTS) and optionally drive the Mac's action registry — one <mac_actions> JSON array that can show a notch card, point the mascot at the screen, or spotlight a control. Custom cards use <notch_html>. The same registry works proactively (PULSE/cron) wrapped in <mac_push>. Card presets + schemas: presets/PRESETS.md. Reusable custom cards: frequentSnippets/."
+---
+# Mac (Morphy companion)
 ## What This Is
-A channel for replying to your human **on their Mac**, through the **Morphy companion app** that lives in the MacBook notch. You speak a short reply (TTS) and *optionally* push a small HTML+CSS card into the notch — same beat as the audio. No pull, no schedule, no state to maintain. Pure output channel.
+A channel for reaching your human **on their Mac**, through the **Morphy companion app** that lives in the menu bar + the MacBook notch. Two things happen per turn:
-The notch's bottom slot is rendered with a sandboxed `WKWebView`. You're not building a webpage; you're hand-rolling a single ~9.4 cm² glance card.
+1. **You speak** — your concise reply is read aloud (ElevenLabs TTS).
+2. **You optionally drive the Mac** — through one **action registry**: a `<mac_actions>` block holding a JSON array of actions. An action can render a **card** in the notch, **point** the mascot at something on screen, or **spotlight** a control. (Custom hand-written HTML cards use the raw `<notch_html>` tag — see below.)
+The registry is **extensible**: today it's `card`, `point`, `spotlight`; more verbs arrive over time. You drive it the same way whether the human just talked to you (reply) or you're reaching out on your own (proactive push).
 ---
 ## When To Use This Skill
-Activate when the **user message starts with `[Mac]`**. Don't apply it to other inbound traffic (WhatsApp, Alexa, web chat). Specifically:
+Activate when the **user message starts with `[Mac]`**. Don't apply it to other inbound traffic (WhatsApp, Alexa, web chat).
 - `[Mac] what's on my calendar today?` → activate
-- `[Mac] what time is it in Tokyo?` → activate
+- `[Mac] where do I cancel this subscription?` → activate (point/spotlight at it)
 - `what's on my calendar today?` (no tag) → ignore this skill, reply as usual
-The tag is injected by the Morphy app itself when the human pushes-to-talk through Morphy, so by the time you see `[Mac]` it means **your reply will be spoken back at them**. Optimize for the ear, supplement with the eye.
+The tag is injected by the Morphy app when the human pushes-to-talk, **and every `[Mac]` turn carries a screenshot of each display** — so you can *see* their screen and reference exact pixels on it. Optimize for the ear, supplement with the eye, and act on the screen when it helps.
 ---
-## Two ways to put a visual in the notch
+## The action registry — `<mac_actions>`
+Your reply has two parts: **spoken prose** (plain text, read aloud) and an optional **`<mac_actions>` block** — a JSON array of actions the Mac runs. The Mac strips the block before TTS, so **it is never spoken**.
+```
+Here's your day, Bruno.
+<mac_actions>
+[
+  { "type": "card", "preset": "calendar", "data": { "weekday": "Thu", "date": "May 28", "events": [ { "time": "10:00", "title": "Stand-up" } ] } }
+]
+</mac_actions>
+```
-There are **two** ways to render a card. Reach for them in this order:
+Rules:
-1. **Preset (preferred).** Morphy ships a library of beautiful, pre-built card
-   renderers (`email`, `calendar`, `list`, `text`, `weather`, `ticker`, `stat`,
-   `info`, `comparison`). You send only **structured data** — never CSS — and
-   Morphy lays it out perfectly, on-brand, with scrolling handled for you. It's
-   the fast, pretty, low-token path. **Full data schemas + examples:
-   [`presets/PRESETS.md`](presets/PRESETS.md).**
+- **One `<mac_actions>` block per reply.** Put it after your spoken sentence.
+- It's a **JSON array** — you can include **several actions** and they run **in order** (e.g. spotlight a control *and* point at it).
+- Each element is a flat object with a `"type"` and that action's fields.
+- If you have nothing to show or do, send **no block** — just speak.
-   ```
-   <notch_card type="email">
-   { "from": "Alex Chen", "subject": "Migration plan", "time": "2:14 PM",
-     "body": "Can we move the cutover to Tuesday?\n\nStaging looked clean." }
-   </notch_card>
-   ```
+### The action types
-2. **Custom (escape hatch).** When no preset fits the shape of the answer,
-   hand-write the whole card in `<notch_html>…</notch_html>` (rules below). Cards
-   you hand-build and reuse get saved to `frequentSnippets/` so next time is
-   copy-fill-send. If a custom card gets requested a lot, tell Bruno — it's a
-   candidate to become a real shipped preset.
+| `type` | What the Mac does | Fields |
+|---|---|---|
+| `card` | Renders a card in the notch (a preset from the library). | `{ "preset": "<name>", "data": { … } }` |
+| `point` | The mascot flies across the screen and points at a spot, with a label bubble. | `{ "x", "y", "label"?, "screen"? }` |
+| `spotlight` | Dims the display and opens a soft glowing hole over a spot, isolating one control. | `{ "x", "y", "r"?, "label"?, "screen"? }` |
-**Check for a preset first.** Only drop to custom HTML when the data genuinely
-doesn't fit one of the presets.
+> **Adding capabilities:** new verbs (open an app, run a shortcut, click a button…) will appear here as new `type`s over time. If you try a `type` the app doesn't know yet, it's safely ignored — so only use the types documented above.
-> **Long prose / "read me this" / a summary / an explanation → use the `text`
-> preset.** Do NOT dump raw paragraphs into `<notch_html>` — that tag is *HTML*,
-> not a text box. Bare text there renders unstyled, edge-to-edge, with no padding.
-> The `text` preset gives you a title, proper margins, and a scrollable body for
-> free: `<notch_card type="text">{ "title": "…", "body": "…long text with \n\n…" }</notch_card>`.
-> `<notch_html>` is only for a genuinely custom *visual layout* you hand-build with
-> real markup and inline styles — never as a dumping ground for plain text.
+---
-## Output Format
+## The `card` action (notch cards)
-Your full reply lives inside the conversation; Morphy will:
+A card is a small glance-visual in the notch's bottom slot — **383 × 147 pt, transparent over black**. There are two ways to make one; reach for them in this order:
-1. Strip any `<notch_card …>…</notch_card>` **and** `<notch_html>…</notch_html>`
-   block before TTS — neither is **ever** spoken.
-2. Send the remaining text to ElevenLabs and play it through the Mac.
-3. Render the card (preset → Morphy's renderer; custom → your HTML) and auto-open
-   the notch downward **at the same moment** the audio starts. Visual + audio land
-   together — that's the whole point.
+### 1. Preset (preferred) — `card` action
-So a reply has up to two parts — a spoken sentence and **one** card block (either
-a `<notch_card>` preset or a `<notch_html>` custom card, not both):
+Morphy ships pre-built, on-brand renderers: **`email`, `calendar`, `list`, `text`, `weather`, `ticker`, `stat`, `info`, `comparison`**. You send only **structured data** — never CSS — and Morphy lays it out perfectly, scrolling handled. **Full data schemas + examples: [`presets/PRESETS.md`](presets/PRESETS.md).**
 ```
-<concise spoken sentence>
-<notch_card type="…">{ …data… }</notch_card>
+Here it is, Bruno.
+<mac_actions>
+[ { "type": "card", "preset": "email", "data": { "from": "Alex Chen", "subject": "Migration plan", "time": "2:14 PM", "body": "Can we move the cutover to Tuesday?" } } ]
+</mac_actions>
 ```
-### Spoken text rules
+> **Long prose / "read me this" / a summary → use the `text` preset**, not custom HTML:
+> `{ "type": "card", "preset": "text", "data": { "title": "…", "body": "…long text with \n\n…" } }`. It gives you a title, margins, and a scrollable body for free.
-- **One or two sentences max.** It's audio, not prose. The human is mid-task when this plays — don't make them stand still for a paragraph.
-- **No markdown, no bullet lists, no enumerations.** TTS reads symbols literally and it sounds awful.
-- **Refer the human by name** if you know it ("Here's your calendar for today, Bruno.") — sounds personal, doesn't add length.
-- **Acknowledge the card when you send one.** Otherwise the visual feels disconnected from the voice. "Here it is.", "I put the details up top.", "Tap to glance.".
+### 2. Custom — raw `<notch_html>` (escape hatch)
-### 🚫 The #1 mistake: speaking the card content out loud
+When no preset fits, hand-write the whole card in a **`<notch_html>…</notch_html>`** tag (separate from `<mac_actions>`). Use the **raw tag, not a JSON string** — that's deliberate: you write real markup with no escaping.
-**This is the single most common failure mode and it ruins the UX.** If your card contains a list, a calendar, an email digest, news headlines, search results — *anything structured* — your spoken text **must NOT re-read those items**. The human is already looking at them. Reading them aloud doubles the time-to-information and makes Morphy feel broken.
+```
+Pinned it up top, Bruno.
+<notch_html>
+<div style="padding:12px 16px;color:#fff;font-family:-apple-system">…your layout…</div>
+</notch_html>
+```
-When the card carries the answer, the voice's only job is the **lead-in sentence** — and then stop talking.
+Reusable custom cards live in [`frequentSnippets/`](frequentSnippets/) — read the file, fill the `{{placeholders}}`, drop the result inside `<notch_html>`. If a custom card gets asked for a lot, tell Bruno — it's a candidate to become a shipped preset.
-#### ❌ BAD — speaks the card back at the human
+**Check for a preset first.** Only drop to `<notch_html>` when the data genuinely doesn't fit one.
-> *"Top five AI today, Bruno. Anthropic dropped Claude Opus 4.7 with a one-million-token window. OpenAI is rolling Sora 3 video into ChatGPT for Plus users. Meta open-sourced Llama 4 Scout at 400 billion params. Google DeepMind's Gemini 3 hit number one on the LMSys arena. And Mistral closed a five billion dollar round at a forty billion valuation."*
-> ```
-> <notch_html>… the same five items rendered as a list …</notch_html>
-> ```
+### The canvas (for custom cards)
-ElevenLabs spends 30+ seconds reciting what's already on screen. The human paid for tokens AND TTS minutes to receive identical information twice. Don't do this.
+| Constraint | Value |
+|---|---|
+| **Size** | **383 × 147 pt** (fixed) — about two stacked Spotlight rows. |
+| **Background** | **Transparent over black**. Black is your canvas; use white/light text. |
+| **Color** | White or `rgba(255,255,255,0.x)` tints read better than gray on black. |
+| **Allowed** | HTML, CSS (flexbox, grid, gradients, transitions, animations), unicode/emoji. |
+| **Forbidden** | External resources — `<img src="https://…">`, `<iframe>`, web fonts. The view has **no network**. |
+| **Interactivity** | No clicks/hovers (they do nothing). Long content **scrolls** via trackpad — overflow is fine. |
+| **Type size** | **12–14 px** is the floor on a Retina notch; below that white-on-black smears. |
-#### ✓ GOOD — lead-in only, card carries the detail
+Tips: two-column `display:flex;gap:8px` at ~190 pt/column; a 1px `rgba(255,255,255,0.08)` divider under a header; don't outline the card (the black pill is the frame).
-> *"Here are today's top five in AI, Bruno."*
-> ```
-> <notch_html>… the same five items rendered as a list …</notch_html>
-> ```
+---
-Two seconds of speech, the eye picks up the rest, human keeps working.
+## `point` & `spotlight` (acting on the screen)
-#### Same rule, different shapes
+Because every `[Mac]` turn attaches a **screenshot of each display**, you can reference exact pixels and Morphy animates over the *live* screen at the matching spot.
-| Question | Speech | Card |
-|---|---|---|
-| "What's on my calendar today?" | *"Here's your day, Bruno."* | day + events |
-| "Read me my unread emails." | *"Three worth a glance, Bruno."* | sender + subject list |
-| "Compare Postgres vs SQLite." | *"Quick side-by-side up top, Bruno."* | two-column card |
-| "What's the weather?" | *"Pinned the forecast for you, Bruno."* | temp + conditions |
-| "What time is it in Tokyo?" | *"It's 8:14 PM in Tokyo."* | **no card** — bare fact, voice is enough |
+- `x`, `y` — **pixels measured on the screenshot you were given this turn** (top-left origin). Read the position straight off the image.
+- `screen` *(optional)* — **1-based display index**, matching the order the screenshots were attached (screen 1 = first image). Omit for a single display or to default to the cursor's screen.
+- `label` *(optional)* — short on-screen caption ("Send", "Cancel here").
+- `r` *(spotlight only, optional)* — hole radius in points (default 90).
-The rule is symmetric: if the **voice alone** is the right answer, send **no card**. If the **card** is the answer, send a **short lead-in** in voice. **Never both at full length.**
+`spotlight` + `point` pair beautifully — spotlight to kill the clutter, point to name it:
-### Whether to send a card at all
+```
+The cancel button's bottom-right, Bruno.
+<mac_actions>
+[
+  { "type": "spotlight", "x": 1180, "y": 540, "r": 80, "screen": 1 },
+  { "type": "point", "x": 1180, "y": 540, "label": "Cancel", "screen": 1 }
+]
+</mac_actions>
+```
-- **Sometimes useful, sometimes not.** Decide per reply. A time-of-day answer doesn't need a card. A calendar, a list, a comparison, a status — those benefit from one.
-- **Preset first.** If the answer is an email, a list, a calendar, a stat, a comparison, a quote/summary, weather, or a quote — there's almost certainly a preset for it (see [`presets/PRESETS.md`](presets/PRESETS.md)). Send data, not markup.
-- **Custom only when nothing fits.** Reach for `<notch_html>` when the shape is genuinely bespoke. Pin reusable custom cards as files in `frequentSnippets/` so you can copy-swap-send instead of regenerating.
-- **The card is display-only — but scrollable.** Don't put buttons, links, or "click to expand" — clicks do nothing and read as broken UI. But long content **is scrollable**: the user scrolls `email` / `list` / `calendar` / `text` cards with the trackpad (a thin scrollbar appears), so it's fine to let content overflow.
+**Reply-only.** `point`/`spotlight` need the screenshot, so they only work on a reply to a `[Mac]` turn — **never** in a proactive push (no screenshot to map against). Cards work in both.
 ---
-## The Canvas
+## Proactive pushes (PULSE / cron — you start the conversation)
-| Constraint | Value |
-|---|---|
-| **Width** | **383 points** (fixed) |
-| **Height** | **~147 points** (fixed; bottom is inset to clear the pill's rounded corners) |
-| **Background** | **Transparent**. Sits directly on a **black** pill. Plan accordingly — black is your canvas. |
-| **Font** | System (`-apple-system, BlinkMacSystemFont, system-ui`) is preloaded. Override only if you need a monospaced number or display weight. |
-| **Color** | White or light. Use `rgba(255,255,255,0.x)` for tints — opacity tints read better than gray on black. |
-| **Allowed** | HTML, CSS (incl. flexbox, grid, gradients, transitions, animations), unicode glyphs / emoji. |
-| **Forbidden** | JS that does network or file access, `<img src="https://...">`, `<iframe>`, anything that loads external resources. The view has no network. |
-| **Interactivity** | No clicks or hovers — they do nothing. But long content **scrolls** via the trackpad (a thin scrollbar appears), so overflow is fine. |
+You can reach the Mac **without being asked** — during a `<PULSE/>` run or a scheduled cron, when memory says the human wants a proactive Mac update (a trade status, "tell me when new mail lands", a build result, a reminder).
-**383 × 147 is small.** Roughly the size of two stacked Spotlight rows. Treat each card as a single coherent idea — date + time, one event, one fact, one comparison. If you can't fit it, paginate via speech ("here's the first, ask 'next' for the rest"), don't shrink the type.
+Opt-in and quiet by design:
-### Space tips
+- **Only push when the human asked for it** (a memory/instruction). A push interrupts with voice + visuals — never speculative.
+- **Wrap the whole Mac payload in `<mac_push>…</mac_push>`.** Inside, write *exactly* a normal reply: a spoken line + an optional `<mac_actions>` block (a `card` is ideal) and/or `<notch_html>`. The supervisor forwards the wrapper's inner content to the Mac as an unsolicited message; the app speaks it and renders it, same as a reply.
-- **Two-column grids** with `display: flex; gap: 8px` are very effective at ~190pt per column.
-- **A divider line** (`rgba(255,255,255,0.08)`, 1px) under a header separates regions without visually crowding.
-- **Tabs / pagination** can be visual-only (a row of dots indicating "1 of 3"); say "next" in the audio and resend a fresh snippet for page 2.
-- **Don't outline the card.** The black pill is already the frame.
-- **12–14px is the smallest comfortable size** on a Retina notch. Below that, antialiasing makes white text on black smear.
+  ```
+  <mac_push>
+  Your TSLA position is up 2.1% today, Bruno.
+  <mac_actions>
+  [ { "type": "card", "preset": "stat", "data": { "value": "+2.1%", "label": "TSLA · today" } } ]
+  </mac_actions>
+  </mac_push>
+  ```
----
+- **No `point`/`spotlight` in a push** — there's no screenshot behind it. Spoken line + a `card` (or `<notch_html>`) only.
+- **Fire-and-forget, online-only.** The push lands **only if the Mac is connected and the human isn't mid-interaction**; otherwise it's silently dropped (not queued). For must-not-miss updates, also emit a `<Message>` block (web/push) — `<mac_push>` and `<Message>` are independent.
+- **Keep the reply discipline** (below): ≤ 2 sentences, no markdown, never read the card aloud. The human didn't ask, so be especially brief.
-## Frequent Snippets
+If the `mac` skill isn't installed, none of this exists — so only emit `<mac_push>` when this skill is active and the human has opted into Mac updates.
-`workspace/skills/mac/frequentSnippets/*.html` — pre-built cards. When your human asks something you've answered before, **don't regenerate** the markup. Read the file, replace placeholders, paste into `<notch_html>…</notch_html>`. Saves tokens, lowers latency, lets the visual feel land at the same beat as the voice.
+---
+## Spoken-text rules
-**Workflow:**
+- **One or two sentences max.** It's audio. The human is mid-task — don't make them stand still for a paragraph.
+- **No markdown, no bullet lists, no enumerations.** TTS reads symbols literally and it sounds awful.
+- **Refer to the human by name** if you know it — personal, costs nothing.
+- **Acknowledge a card/action when you send one** ("Here it is.", "Pinned it up top.", "It's the gear, top-right.") so the visual feels connected to the voice.
+- **Name things, don't narrate coordinates.** *"It's the gear, top-right."* — never *"I'm pointing at 1890 comma 40."*
-1. First time the human asks "what's on my calendar today?" — generate a card from scratch, show it, ask: *"Want me to save this as a template for next time?"*
-2. If yes, write it to `frequentSnippets/calendar-today.html` with `{{placeholders}}` for the dynamic bits.
-3. Next time the same question comes in, `Read` the file, do a simple string-replace on the placeholders, paste the result into `<notch_html>…</notch_html>`.
+### 🚫 The #1 mistake: speaking the card content out loud
-Use `{{double_curly}}` placeholders so they're easy to grep and unambiguous. Keep file names kebab-cased and intent-named (`calendar-today.html`, `weather-now.html`, `pomodoro-status.html`).
+If your card carries a list, calendar, email digest, headlines — *anything structured* — your spoken text **must NOT re-read those items**. The human is already looking at them. When the card carries the answer, the voice's only job is a short **lead-in**, then stop.
-Starter snippets in this skill (read them to see the style):
+**❌ BAD:** *"Top five in AI: Anthropic shipped Claude Opus… OpenAI rolled out… Meta open-sourced…"* (30s reciting what's on screen)
+**✓ GOOD:** *"Here are today's top five, Bruno."* + the list card.
-- `frequentSnippets/calendar-today.html` — day + date hero, event list, "no events" empty state.
-- `frequentSnippets/info-card.html` — two-column key/value card.
-- `frequentSnippets/single-stat.html` — one big number, one supporting line.
+The rule is symmetric: if the **voice alone** is the right answer, send **no card**. If the **card** is the answer, send a **short lead-in**. Never both at full length.
-You're encouraged to **discuss the snippet design with your human** — they see it every day, they'll have opinions on density, accent color, what to lead with. Keep the file in sync with what they tell you. Treat each `.html` as a small product surface.
+| Question | Speech | Visual |
+|---|---|---|
+| "What's on my calendar?" | *"Here's your day, Bruno."* | `card` calendar |
+| "Read me my unread emails." | *"Three worth a glance, Bruno."* | `card` list |
+| "Where do I cancel?" | *"Bottom-right, Bruno."* | `spotlight` + `point` |
+| "What time is it in Tokyo?" | *"It's 8:14 PM in Tokyo."* | **nothing** — voice is enough |
 ---
 ## Examples
-### Example 1 — Calendar (with card)
+### Reply with a preset card
 > **Human:** `[Mac] what's on my calendar today?`
-> **You (full reply):**
 > ```
-> Here's your day, Bruno. Stand-up at 10, design review at 2, nothing after 4.
-> <notch_html>
-> <div style="padding:12px 16px;display:flex;flex-direction:column;gap:8px;height:100%;box-sizing:border-box">
->   <div style="display:flex;align-items:baseline;gap:8px">
->     <div style="font-size:22px;font-weight:700">Thu</div>
->     <div style="font-size:13px;opacity:0.55">May 28</div>
->   </div>
->   <div style="height:1px;background:rgba(255,255,255,0.08)"></div>
->   <div style="display:flex;justify-content:space-between;font-size:11px">
->     <span style="opacity:0.7">10:00 · Stand-up</span>
->     <span style="opacity:0.4">30m</span>
->   </div>
->   <div style="display:flex;justify-content:space-between;font-size:11px">
->     <span style="opacity:0.7">14:00 · Design review</span>
->     <span style="opacity:0.4">1h</span>
->   </div>
-> </div>
-> </notch_html>
+> Here's your day, Bruno. Stand-up at 10, design review at 2.
+> <mac_actions>
+> [ { "type": "card", "preset": "calendar", "data": { "weekday": "Thu", "date": "May 28", "events": [ { "time": "10:00", "title": "Stand-up" }, { "time": "14:00", "title": "Design review" } ] } } ]
+> </mac_actions>
 > ```
-Morphy speaks "Here's your day, Bruno…" while the card slides into the notch.
+### Reply that acts on the screen
+> **Human:** `[Mac] where do I turn off notifications?`
+> ```
+> It's the bell icon near the top-right, Bruno.
+> <mac_actions>
+> [ { "type": "spotlight", "x": 1840, "y": 64, "r": 70, "screen": 1 },
+>   { "type": "point", "x": 1840, "y": 64, "label": "Notifications", "screen": 1 } ]
+> </mac_actions>
+> ```
-### Example 2 — Quick fact (no card)
+### Reply, bare fact (no visual)
 > **Human:** `[Mac] what time is it in Tokyo?`
-> **You (full reply):**
 > ```
 > It's 8:14 PM in Tokyo.
 > ```
-A bare fact doesn't earn screen real estate. Speak it, move on.
-### Example 3 — Comparison (card with two columns)
+### Custom card (no preset fits)
-> **Human:** `[Mac] which is better for our use case, Postgres or SQLite?`
-> **You:**
 > ```
-> For your use case, Bruno, Postgres edges it out for the concurrent writes. Quick side-by-side up top.
+> Quick side-by-side up top, Bruno.
 > <notch_html>
-> <div style="padding:12px 16px;display:flex;gap:10px;height:100%;box-sizing:border-box">
+> <div style="padding:12px 16px;display:flex;gap:10px;height:100%;box-sizing:border-box;color:#fff;font-family:-apple-system">
 >   <div style="flex:1;padding:8px 10px;border-radius:8px;background:rgba(255,255,255,0.05)">
->     <div style="font-size:10px;opacity:0.5;text-transform:uppercase;letter-spacing:.5px">Postgres</div>
+>     <div style="font-size:10px;opacity:0.5;text-transform:uppercase">Postgres</div>
 >     <div style="font-size:13px;font-weight:700;margin-top:2px">Concurrent ✓</div>
->     <div style="font-size:10px;opacity:0.5;margin-top:4px">Heavier setup</div>
 >   </div>
 >   <div style="flex:1;padding:8px 10px;border-radius:8px;background:rgba(255,255,255,0.05)">
->     <div style="font-size:10px;opacity:0.5;text-transform:uppercase;letter-spacing:.5px">SQLite</div>
+>     <div style="font-size:10px;opacity:0.5;text-transform:uppercase">SQLite</div>
 >     <div style="font-size:13px;font-weight:700;margin-top:2px">Single-writer</div>
->     <div style="font-size:10px;opacity:0.5;margin-top:4px">Zero setup</div>
 >   </div>
 > </div>
 > </notch_html>
 > ```
-### Example 4 — Pagination via voice (long list)
+### Proactive push (PULSE/cron)
-> **Human:** `[Mac] read me my unread emails.`
-> **You:** *(reply 1)*
 > ```
-> Three of seven worth a glance, Bruno. First: Alex about the migration. Say "next" for the rest.
-> <notch_html>
-> <div style="padding:10px 14px;display:flex;flex-direction:column;gap:6px;height:100%;box-sizing:border-box">
->   <div style="display:flex;justify-content:space-between;align-items:baseline">
->     <div style="font-size:13px;font-weight:700">Unread mail</div>
->     <div style="font-size:9px;opacity:0.45">1 / 3</div>
->   </div>
->   <div style="height:1px;background:rgba(255,255,255,0.08)"></div>
->   <div style="font-size:11px"><b>Alex Chen</b> · Migration plan</div>
->   <div style="font-size:10px;opacity:0.6">"…can we move the cutover to Tue?"</div>
-> </div>
-> </notch_html>
+> <mac_push>
+> Build's green, Bruno.
+> <mac_actions>
+> [ { "type": "card", "preset": "stat", "data": { "value": "PASS", "label": "CI · main" } } ]
+> </mac_actions>
+> </mac_push>
 > ```
-> *(human says "next" → reply 2 with `1 / 3` swapped for `2 / 3` and the next email's body)*
 ---
 ## What Not To Do
-- ❌ **No long monologues.** If you can't say it in two sentences, you're either over-explaining or you should make the card carry it.
-- ❌ **No reading the card aloud.** The card and the speech complement each other; don't make them redundant.
-- ❌ **No external assets.** `<img src="https://…">`, `<link href="…">`, Google Fonts, none of it loads.
-- ❌ **No interactive elements.** Buttons render but do nothing — they read as broken UI. Drop them.
-- ❌ **No light backgrounds.** The pill is black. White text on a white card looks like a missing texture.
-- ❌ **No emoji as primary content** unless it's the answer. They render small at 12px and lose meaning.
-- ❌ **Don't send a card "just because"** if the answer is a bare fact. The card should add something the voice can't carry — structure, list, comparison, status.
+- ❌ **No long monologues.** Two sentences. If you can't, let the card carry it.
+- ❌ **No reading the card aloud.** Voice + card complement, never duplicate.
+- ❌ **No `point`/`spotlight` in a proactive push** — no screenshot exists there.
+- ❌ **No external assets** in custom HTML — no network in the notch view.
+- ❌ **No interactive elements** in cards — buttons render but do nothing.
+- ❌ **No light backgrounds** — the pill is black.
+- ❌ **Don't send a visual "just because"** — a bare fact needs no card.
+- ❌ **Don't invent action `type`s** — only `card`, `point`, `spotlight` exist today.
 ---
 ## Reply Checklist
-Before you send:
-1. **Did the tag `[Mac]` actually start the user message?** If not, don't use this skill.
-2. **Is the spoken text ≤ 2 sentences, no markdown, no enumerations?**
-3. **🚫 Does my spoken text recite items that are already in my card?** If yes, **rewrite the speech as a lead-in only** ("Here are the top five, Bruno."). Speaking the card aloud is the #1 failure mode of this skill. Re-read the spoken text and the card together — if you remove the card, would the voice still make sense as the standalone answer? If yes, you're double-answering. Trim the voice.
-4. **If I'm sending a card, does it add structure the voice can't carry?** If a sentence covers it, drop the card.
-5. **Does my voice acknowledge the card if I sent one?** ("Here it is.", "Pinned it up top.", etc.)
-6. **Did I check for a PRESET first** ([`presets/PRESETS.md`](presets/PRESETS.md))? Only hand-write `<notch_html>` if no preset fits. (For custom, did I check `frequentSnippets/` for one I already made?)
-7. **If it's a preset:** is the body **valid JSON**, and is `type` one of the real preset names (lowercase)?
-8. **If it's custom:** is the card ≤ 383×147pt, transparent-on-black, white text, no external assets?
-9. **Is the tag spelled exactly `<notch_card type="…">…</notch_card>` or `<notch_html>…</notch_html>` (lowercase, underscore)?**
-10. **If I'm offering to save a custom card for next time, did I say so plainly?**
+1. Did the message actually start with `[Mac]`? If not, don't use this skill.
+2. Spoken text ≤ 2 sentences, no markdown, no enumerations?
+3. 🚫 Does my speech recite what's already in my card? Rewrite it as a lead-in only.
+4. If acting on screen, did I read the coordinates off **this turn's screenshot**, and set `screen` if multi-display?
+5. Is my `<mac_actions>` value **valid JSON** (an array of objects, each with a `type`)?
+6. For a `card`: did I check **PRESETS.md** first, and is `preset` a real lowercase name with valid `data`?
+7. For a custom card: raw `<notch_html>`, ≤ 383×147, transparent-on-black, white text, no external assets?
+8. Proactive? Wrapped in `<mac_push>`, no `point`/`spotlight`, and brief?
+9. Does my voice acknowledge the visual if I sent one?
 ---
@@ -289,14 +278,16 @@ Before you send:
 | Thing | Where / how |
 |---|---|
-| Tag that activates this skill | `[Mac]` at the start of the user message |
-| **Preset card tag (preferred)** | `<notch_card type="…">{ …json… }</notch_card>` |
-| Preset catalog + schemas | [`presets/PRESETS.md`](presets/PRESETS.md) — email, list, calendar, weather, ticker, stat, info, text, comparison |
-| Custom card tag (escape hatch) | `<notch_html>…</notch_html>` (singular, anywhere in your reply) |
-| Canvas size | **383 × 147 pt**, transparent over **black** |
-| Custom snippet library | `workspace/skills/mac/frequentSnippets/*.html` |
-| TTS engine | ElevenLabs (Morphy handles it; you just write the words) |
-| Stripped from speech | Anything inside `<notch_card …>…` or `<notch_html>…` (case-insensitive, multi-line) |
-| Long text | **Scrollable** — the user scrolls long cards (email/list/calendar/text) with the trackpad |
-| Auto-opens notch? | Yes — sending a card ⇒ notch slides down automatically as audio starts |
-| Auto-clears? | When the human triggers the next push-to-talk, or after the safety timer |
+| Activates this skill | `[Mac]` at the start of the user message |
+| **Action registry** | `<mac_actions>[ { "type": "…", … }, … ]</mac_actions>` — one block, JSON array, runs in order |
+| Action types | `card` (preset), `point`, `spotlight` |
+| `card` | `{ "type":"card", "preset":"…", "data":{…} }` — catalog: [`presets/PRESETS.md`](presets/PRESETS.md) |
+| `point` | `{ "type":"point", "x", "y", "label"?, "screen"? }` — reply-only |
+| `spotlight` | `{ "type":"spotlight", "x","y", "r"?, "label"?, "screen"? }` — reply-only |
+| Custom card (raw) | `<notch_html>…</notch_html>` — raw markup, no escaping |
+| Custom snippet library | [`frequentSnippets/`](frequentSnippets/)`*.html` |
+| Proactive push | wrap payload in `<mac_push>…</mac_push>` (cards only, no point/spotlight) |
+| Canvas | **383 × 147 pt**, transparent over **black** |
+| Stripped from speech | Anything inside `<mac_actions>`, `<notch_html>` (and legacy `<notch_card>` / `<morphy_action>`) |
+| Coordinates | screenshot pixels (top-left origin); `screen` is 1-based |
+| Auto-clears | next push-to-talk, or a short safety timer |

package/workspace/skills/mac/presets/PRESETS.md CHANGED Viewed

@@ -7,30 +7,41 @@ these, and the card always looks on-brand.
 ## How to send a preset
-Put one block anywhere in your `[Mac]` reply (it's stripped from TTS, never
-spoken):
+A card is a **registry action**. Put one `<mac_actions>` block in your `[Mac]` reply
+(it's stripped from TTS, never spoken) with a `card` action whose `data` is the
+preset's payload:
 ```
-<notch_card type="PRESET_NAME">
-{ ...JSON data for that preset... }
-</notch_card>
+<mac_actions>
+[ { "type": "card", "preset": "PRESET_NAME", "data": { ...preset data... } } ]
+</mac_actions>
 ```
-- `type` is the preset name (lowercase) from the table below.
-- The body is a **single JSON object**. Use valid JSON — double-quote keys and
-  strings, escape newlines inside strings as `\n` (multi-line text is fine and
-  encouraged for `email`/`text` bodies; it renders with real line breaks).
-- Unknown fields are ignored. Missing optional fields just don't render.
-- If the `type` is unknown or the JSON is malformed, Morphy shows **no card** (and
-  nothing leaks into your spoken reply) — so a typo fails safe, not loud.
+- `preset` is the name (lowercase) from the sections below.
+- `data` is a **single JSON object** — the fields documented for that preset.
+  **Each example below shows exactly that `data` object** (drop it in under `"data"`).
+- Use valid JSON: double-quote keys/strings, escape newlines inside strings as `\n`
+  (multi-line text is fine for `email`/`text` bodies — it renders with real breaks).
+- Unknown fields are ignored; missing optional fields just don't render.
+- An unknown `preset` or malformed JSON shows **no card** (and nothing leaks into
+  your spoken reply) — a typo fails safe, not loud.
+- You can include other actions (`point`, `spotlight`) in the **same array** — they
+  run in order. See `SKILL.md`.
-The same voice rules apply as always: **don't read the card's contents aloud** —
-the voice is a short lead-in, the card carries the detail. See `SKILL.md`.
+> **Legacy:** the older `<notch_card type="PRESET">{ …data… }</notch_card>` tag still
+> works (same renderer), but the `card` action above is the canonical form — one
+> envelope for every Mac capability.
+The same voice rule always applies: **don't read the card's contents aloud** — the
+voice is a short lead-in, the card carries the detail. See `SKILL.md`.
 ---
 ## Presets
+> Each block below is the **`data` object** for that preset — wrap it as shown in
+> "How to send a preset" above.
 ### `stat` — one big number
 A single hero value with a label and caption. Time, countdown, %, rate, score.
@@ -41,9 +52,7 @@ A single hero value with a label and caption. Time, countdown, %, rate, score.
 | `caption` | – | muted line under it |
 ```
-<notch_card type="stat">
 { "label": "TOKYO", "value": "8:14 PM", "caption": "Thursday, May 28" }
-</notch_card>
 ```
 ### `info` — header + two key/value tiles
@@ -57,11 +66,9 @@ Two metrics side by side under a title.
 | `right_label`, `right_value` | ✓ | second tile |
 ```
-<notch_card type="info">
 { "title": "Connection", "subtitle": "VPN · Frankfurt",
   "left_label": "Latency", "left_value": "42 ms",
   "right_label": "Loss", "right_value": "0%" }
-</notch_card>
 ```
 ### `email` — a single email
@@ -77,10 +84,8 @@ can **scroll** if it's long. Perfect for "read me that email."
 | `initial` | – | avatar letter; **auto-derived from `from`** if omitted |
 ```
-<notch_card type="email">
 { "from": "Alex Chen", "time": "2:14 PM", "subject": "Migration plan",
   "body": "Can we move the cutover to Tuesday?\n\nStaging looked clean last night." }
-</notch_card>
 ```
 ### `list` — ranked / labelled rows
@@ -95,14 +100,12 @@ A scrolling list. Use for unread mail, todos, news, search results, top-N.
 Each item: `{ "title": ✓, "meta"?: "secondary line", "value"?: "right-aligned tag", "index"?: "1" }`
 ```
-<notch_card type="list">
 { "title": "Unread mail", "page": 1, "pages": 3,
   "items": [
     { "index": "1", "title": "Alex Chen", "meta": "Migration plan", "value": "2m" },
     { "index": "2", "title": "Figma", "meta": "3 new comments", "value": "1h" },
     { "index": "3", "title": "GitHub", "meta": "CI passed on main", "value": "3h" }
   ] }
-</notch_card>
 ```
 ### `calendar` — day + events
@@ -118,14 +121,12 @@ Day hero with a scrolling event list and a built-in empty state.
 Each event: `{ "time": "10:00", "title": "Stand-up", "duration"?: "30m" }`
 ```
-<notch_card type="calendar">
 { "weekday": "Thu", "date": "May 28", "count": "3 events",
   "events": [
     { "time": "10:00", "title": "Stand-up", "duration": "30m" },
     { "time": "14:00", "title": "Design review", "duration": "1h" },
     { "time": "16:30", "title": "1:1 with Sam", "duration": "30m" }
   ] }
-</notch_card>
 ```
 ### `weather` — current conditions
@@ -141,10 +142,8 @@ optional third tile.
 | `extra_label`, `extra_value` | – | optional third tile (e.g. Wind) |
 ```
-<notch_card type="weather">
 { "location": "San Francisco", "temp": "17", "condition": "Partly cloudy",
   "high": "19", "low": "12", "extra_label": "Wind", "extra_value": "12mph" }
-</notch_card>
 ```
 ### `ticker` — finance quote
@@ -166,10 +165,8 @@ leading `-`), and an **auto-generated sparkline** from a number array.
 > can't get a series, omit `points` and the card still looks fine without it.
 ```
-<notch_card type="ticker">
 { "symbol": "TSLA", "name": "Tesla Inc", "price": "$248.50", "change": "+2.4%",
   "points": [241, 239, 244, 242, 247, 245, 250, 248.5] }
-</notch_card>
 ```
 ### `text` — title + long body
@@ -183,10 +180,8 @@ summaries, explanations, "read me this", a quote, a paragraph answer.
 | `tag` | – | small label top-right (e.g. `"#482"`) |
 ```
-<notch_card type="text">
 { "title": "PR summary", "tag": "#482",
   "body": "Refactors the notch pipeline into a preset renderer.\n\nAdds nine presets the user can scroll." }
-</notch_card>
 ```
 ### `comparison` — two columns
@@ -200,11 +195,9 @@ Side-by-side options, each with a value and a note.
 | `left_note`, `right_note` | – | muted line under each value |
 ```
-<notch_card type="comparison">
 { "title": "Postgres vs SQLite",
   "left_title": "Postgres", "left_value": "Concurrent ✓", "left_note": "Heavier setup",
   "right_title": "SQLite", "right_value": "Single-writer", "right_note": "Zero setup" }
-</notch_card>
 ```
 ---
@@ -212,8 +205,9 @@ Side-by-side options, each with a value and a note.
 ## When no preset fits → go custom
 If the answer needs a shape these don't cover, hand-write the whole card with
-`<notch_html>…</notch_html>` instead (see `SKILL.md` → custom snippets). And if you
-build a custom card you'll reuse, save it to `frequentSnippets/` so next time is a
+`<notch_html>…</notch_html>` instead (see `SKILL.md` → custom cards). Use the raw
+tag, not a JSON string — you write real markup with no escaping. If you build a
+custom card you'll reuse, save it to `frequentSnippets/` so next time is a
 copy-fill-send. When a custom card you've made gets requested a lot, tell Bruno —
 it's a candidate for a real shipped preset.

package/workspace/skills/mac/skill.json CHANGED Viewed

@@ -5,11 +5,11 @@
   "bloby_human": "Bruno Bertapeli",
   "bloby": "bloby-bruno",
   "author": "newbot-official",
-  "description": "Morphy native macOS companion. Activates on the [Mac] tag. Output is a concise spoken reply (TTS); optionally accompany it with a <notch_html>…</notch_html> visual card rendered inside the MacBook notch (383×147pt, transparent over black). Frequent snippets are cached in workspace/skills/mac/frequentSnippets/ for instant re-use.",
+  "description": "Morphy native macOS companion. Activates on the [Mac] tag. You reply with a concise spoken line (TTS) and optionally drive the Mac's action registry — one <mac_actions> JSON array that can show a notch card (preset), point the mascot at the screen, or spotlight a control. Custom cards use raw <notch_html>. The same registry works proactively (PULSE/cron) wrapped in <mac_push>. Card presets + schemas: presets/PRESETS.md. Reusable custom cards: frequentSnippets/.",
   "depends": [],
   "env_keys": [],
   "has_telemetry": false,
-  "size": "8KB",
+  "size": "12KB",
   "contains_binaries": false,
-  "tags": ["mac", "morphy", "notch", "macos", "voice", "tts", "visual", "html"]
+  "tags": ["mac", "morphy", "notch", "macos", "voice", "tts", "visual", "html", "registry", "actions", "spotlight", "point"]
 }

package/workspace/skills/plaud/SKILL.md CHANGED Viewed

@@ -1,3 +1,8 @@
+---
+name: plaud
+description: "Plaud Note integration. Pairs the user's Plaud account (email OTP or paste-token for Google/Apple identities), pulls recordings into workspace/files/audio/plaud/, and routes transcription through either the Bloby Marketplace audio-to-text service (pay-per-minute) or the human's own provider (Groq / OpenAI Whisper / Mistral Voxtral / local)."
+---
 # Plaud
 ## What This Is

package/workspace/skills/whatsapp/SKILL.md CHANGED Viewed

@@ -1,3 +1,8 @@
+---
+name: whatsapp
+description: "WhatsApp channel for your agent via Baileys. QR auth, messaging, voice transcription, channel and business modes."
+---
 # WhatsApp
 ## What This Is