bloby-bot 0.54.10 → 0.54.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "bloby-bot",
3
- "version": "0.54.10",
3
+ "version": "0.54.12",
4
4
  "releaseNotes": [
5
5
  "1. New Morphy animation system: config-driven sprites loaded from /morphy/*.json",
6
6
  "2. Swapped teleporting (splash) and headphones (bubble + chat) to the new format",
@@ -44,11 +44,13 @@ const CLIENT_INFO = { name: 'bloby', title: 'Bloby', version: '1' };
44
44
  const REQUEST_TIMEOUT_MS = 60_000;
45
45
  const VALID_EFFORTS = new Set(['none', 'minimal', 'low', 'medium', 'high', 'xhigh']);
46
46
  /**
47
- * Per-turn watchdog. `turn/completed` is a NON-guaranteed notification — if the
48
- * app-server stalls mid-turn without exiting, the RPC `exit` handler never fires
49
- * and `busy` stays true forever (live: wedges the dashboard + defers backend
50
- * restarts; one-shot: pins the WhatsApp/scheduler slot since bot:done never
51
- * arrives). Claude's one-shot path has the same 5-min guard. Mirrors it here.
47
+ * Per-turn IDLE watchdog. `turn/completed` is a NON-guaranteed notification — if
48
+ * the app-server stalls mid-turn without exiting, the RPC `exit` handler never
49
+ * fires and `busy` stays true forever (live: wedges the dashboard + defers
50
+ * backend restarts; one-shot: pins the WhatsApp/scheduler slot since bot:done
51
+ * never arrives). This is an IDLE timeout, reset on every notification for the
52
+ * conversation — a legitimately long turn (deep reasoning, many tool calls)
53
+ * keeps emitting events and is never killed; only true silence trips recovery.
52
54
  */
53
55
  const TURN_WATCHDOG_MS = 5 * 60_000;
54
56
 
@@ -172,8 +174,8 @@ class CodexRpc {
172
174
  private closed = false;
173
175
  private stderrBuf = '';
174
176
 
175
- start(): void {
176
- this.proc = spawn(resolveCodexBin(), ['app-server'], { stdio: ['pipe', 'pipe', 'pipe'] });
177
+ start(extraArgs: string[] = []): void {
178
+ this.proc = spawn(resolveCodexBin(), ['app-server', ...extraArgs], { stdio: ['pipe', 'pipe', 'pipe'] });
177
179
  const rl = readline.createInterface({ input: this.proc.stdout });
178
180
  rl.on('line', (line) => this.onLine(line));
179
181
 
@@ -246,14 +248,30 @@ class CodexRpc {
246
248
  }
247
249
 
248
250
  private handleServerRequest(msg: { id: number; method: string; params?: any }): void {
249
- const isApproval = msg.method.endsWith('/requestApproval');
250
- if (isApproval) {
251
- log.info(`[codex-rpc] auto-accepting server request: ${msg.method}`);
252
- this.respond(msg.id, 'acceptForSession');
253
- return;
251
+ // Responses are OBJECTS, not bare strings: CommandExecution/FileChange approval
252
+ // responses are `{ decision }` (CommandExecutionApprovalDecision / FileChangeApprovalDecision),
253
+ // and the legacy v1 aliases take `{ decision }` with the ReviewDecision enum.
254
+ // (None of these fire under approvalPolicy:'never' + danger-full-access, but reply
255
+ // correctly so an edge-case request can't stall the turn with a malformed reply.)
256
+ switch (msg.method) {
257
+ case 'item/commandExecution/requestApproval':
258
+ case 'item/fileChange/requestApproval':
259
+ log.info(`[codex-rpc] auto-accepting ${msg.method}`);
260
+ this.respond(msg.id, { decision: 'acceptForSession' });
261
+ return;
262
+ case 'execCommandApproval':
263
+ case 'applyPatchApproval':
264
+ log.info(`[codex-rpc] auto-accepting (legacy) ${msg.method}`);
265
+ this.respond(msg.id, { decision: 'approved_for_session' });
266
+ return;
267
+ // account/chatgptAuthTokens/refresh is only used by client-managed-token
268
+ // clients; Bloby authenticates via chatgpt OAuth and the app-server refreshes
269
+ // ~/.codex/auth.json itself, so this server-request never fires for us. Decline
270
+ // cleanly (a stale-credential edge would surface as a normal turn error instead).
271
+ default:
272
+ log.warn(`[codex-rpc] unhandled server request ${msg.method} — replying -32601`);
273
+ this.respondError(msg.id, -32601, `Method ${msg.method} not implemented by Bloby client`);
254
274
  }
255
- log.warn(`[codex-rpc] unhandled server request ${msg.method} — replying with error`);
256
- this.respondError(msg.id, -32601, `Method ${msg.method} not implemented by Bloby client`);
257
275
  }
258
276
 
259
277
  private respond(id: number, result: any): void {
@@ -304,8 +322,16 @@ class CodexRpc {
304
322
  p.reject(new Error('RPC connection closed'));
305
323
  }
306
324
  this.pending.clear();
307
- try { this.proc?.stdin.end(); } catch {}
308
- try { this.proc?.kill('SIGTERM'); } catch {}
325
+ const proc = this.proc;
326
+ try { proc?.stdin.end(); } catch {}
327
+ try { proc?.kill('SIGTERM'); } catch {}
328
+ // Escalate to SIGKILL if the app-server ignores SIGTERM (no true leak today
329
+ // since SIGTERM reaps it, but a SIGTERM-ignoring build would otherwise survive).
330
+ if (proc) {
331
+ const t = setTimeout(() => { try { proc.kill('SIGKILL'); } catch {} }, 2000);
332
+ if (typeof t.unref === 'function') t.unref();
333
+ proc.once('exit', () => clearTimeout(t));
334
+ }
309
335
  this.proc = null;
310
336
  }
311
337
  }
@@ -320,6 +346,9 @@ interface CodexConversation {
320
346
  onMessage: OnAgentMessage;
321
347
  /** Currently in-flight turn id (set on `turn/started`, cleared on `turn/completed`). */
322
348
  currentTurnId: string | null;
349
+ /** itemId of the agentMessage currently streaming — used to insert a paragraph
350
+ * break when a turn emits multiple separate agentMessage items. */
351
+ currentMsgItemId: string | null;
323
352
  /** Streaming text accumulator for the current turn's agentMessage items. */
324
353
  fullText: string;
325
354
  /** Tools/items used during the current turn, for the bot:turn-complete payload. */
@@ -462,10 +491,14 @@ async function steerOrQueue(conv: CodexConversation, content: string, savedFiles
462
491
 
463
492
  function handleNotification(conv: CodexConversation, n: { method: string; params?: any }): void {
464
493
  const p = n.params || {};
494
+ // Any notification for this conv proves the app-server is alive and working —
495
+ // reset the idle watchdog so a long-but-active turn isn't torn down.
496
+ if (conv.turnWatchdog) armTurnWatchdog(conv);
465
497
  switch (n.method) {
466
498
  case 'turn/started': {
467
499
  conv.currentTurnId = p.turn?.id || null;
468
500
  conv.fullText = '';
501
+ conv.currentMsgItemId = null;
469
502
  conv.usedFileTools = false;
470
503
  break;
471
504
  }
@@ -473,6 +506,13 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
473
506
  case 'item/agentMessage/delta': {
474
507
  const delta: string = p.delta || '';
475
508
  if (!delta) break;
509
+ // A turn can emit multiple agentMessage items (commentary then final_answer).
510
+ // On a new itemId, insert a paragraph break so they don't run together (mirrors claude.ts).
511
+ if (p.itemId && conv.currentMsgItemId && p.itemId !== conv.currentMsgItemId && conv.fullText && !conv.fullText.endsWith('\n')) {
512
+ conv.fullText += '\n\n';
513
+ conv.onMessage('bot:token', { conversationId: conv.id, token: '\n\n' });
514
+ }
515
+ if (p.itemId) conv.currentMsgItemId = p.itemId;
476
516
  conv.fullText += delta;
477
517
  conv.onMessage('bot:token', { conversationId: conv.id, token: delta });
478
518
  break;
@@ -512,6 +552,17 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
512
552
  input: { query: item.query || '' },
513
553
  });
514
554
  break;
555
+ case 'collabAgentToolCall':
556
+ // Codex's collaborating sub-agents (rarely enabled) → Bloby's sub-agent UX.
557
+ if (item.tool === 'spawnAgent') {
558
+ conv.onMessage('bot:task-created', {
559
+ conversationId: conv.id,
560
+ taskId: item.id,
561
+ description: item.prompt || 'sub-agent',
562
+ type: 'collab',
563
+ });
564
+ }
565
+ break;
515
566
  // userMessage / agentMessage / reasoning — no tool-style event.
516
567
  }
517
568
  break;
@@ -528,6 +579,14 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
528
579
  conv.onMessage('bot:token', { conversationId: conv.id, token: text });
529
580
  }
530
581
  }
582
+ if (item.type === 'collabAgentToolCall' && item.tool === 'spawnAgent') {
583
+ conv.onMessage('bot:task-done', {
584
+ conversationId: conv.id,
585
+ taskId: item.id,
586
+ status: item.status,
587
+ summary: item.prompt || '',
588
+ });
589
+ }
531
590
  break;
532
591
  }
533
592
 
@@ -598,8 +657,17 @@ function handleNotification(conv: CodexConversation, n: { method: string; params
598
657
  break;
599
658
  }
600
659
 
601
- // thread/started, thread/status/changed, mcpServer/startupStatus/updated,
602
- // remoteControl/status/changed informational, no-op for the dashboard.
660
+ case 'mcpServer/startupStatus/updated': {
661
+ // Surface MCP servers (from MCP.json → -c overrides) that fail to start,
662
+ // so a misconfigured server is visible instead of silently absent.
663
+ if (p.status === 'failed' || p.status === 'cancelled') {
664
+ log.warn(`[codex] MCP server "${p.name}" ${p.status}${p.error ? `: ${p.error}` : ''}`);
665
+ }
666
+ break;
667
+ }
668
+
669
+ // thread/started, thread/status/changed, remoteControl/status/changed —
670
+ // informational, no-op for the dashboard.
603
671
  }
604
672
  }
605
673
 
@@ -631,7 +699,7 @@ async function spawnAndInitialize(
631
699
 
632
700
  const { id: modelId, effort } = parseModelString(model);
633
701
  const rpc = new CodexRpc();
634
- rpc.start();
702
+ rpc.start(buildMcpConfigArgs());
635
703
 
636
704
  const conv: CodexConversation = {
637
705
  id: conversationId,
@@ -640,6 +708,7 @@ async function spawnAndInitialize(
640
708
  effort,
641
709
  onMessage,
642
710
  currentTurnId: null,
711
+ currentMsgItemId: null,
643
712
  fullText: '',
644
713
  usedFileTools: false,
645
714
  pendingInputs: [],
@@ -703,27 +772,109 @@ async function spawnAndInitialize(
703
772
  }
704
773
 
705
774
  const SKILLS_DIR = path.join(WORKSPACE_DIR, 'skills');
775
+ // Codex discovers "repo"-scope skills under `<cwd>/.codex/skills` (verified
776
+ // against 0.135.0 — a bare `<cwd>/skills` is NOT scanned, and `skills/list`
777
+ // has no extra-root param). Bloby keeps the canonical skills in
778
+ // `workspace/skills/<name>`, so we mirror each one into `.codex/skills/<name>`
779
+ // as a symlink — single source of truth, discoverable by codex's native router.
780
+ // (Each SKILL.md needs YAML frontmatter or codex rejects it — see SKILL_FORMAT_MIGRATION.md.)
781
+ const CODEX_SKILLS_ROOT = path.join(WORKSPACE_DIR, '.codex', 'skills');
782
+
783
+ /** Mirror workspace/skills/<name> → workspace/.codex/skills/<name> as symlinks (idempotent). */
784
+ function syncCodexSkillRoot(): void {
785
+ let names: string[] = [];
786
+ try {
787
+ names = fs.readdirSync(SKILLS_DIR, { withFileTypes: true })
788
+ .filter((e) => e.isDirectory() || e.isSymbolicLink())
789
+ .map((e) => e.name);
790
+ } catch {
791
+ return; // no skills dir — nothing to mirror
792
+ }
793
+ try { fs.mkdirSync(CODEX_SKILLS_ROOT, { recursive: true }); } catch {}
794
+ for (const name of names) {
795
+ const target = path.join(SKILLS_DIR, name);
796
+ const link = path.join(CODEX_SKILLS_ROOT, name);
797
+ try {
798
+ const cur = fs.existsSync(link) ? fs.realpathSync(link) : null;
799
+ if (cur === fs.realpathSync(target)) continue; // already correct
800
+ try { fs.rmSync(link, { recursive: true, force: true }); } catch {}
801
+ fs.symlinkSync(target, link, 'dir');
802
+ } catch (err: any) {
803
+ log.warn(`[codex] could not mirror skill "${name}" into .codex/skills: ${err.message}`);
804
+ }
805
+ }
806
+ }
706
807
 
707
808
  function primeWorkspaceSkills(rpc: CodexRpc): void {
809
+ syncCodexSkillRoot();
708
810
  rpc.request('skills/list', {
709
811
  cwds: [WORKSPACE_DIR],
710
812
  forceReload: true,
711
- perCwdExtraUserRoots: [{
712
- cwd: WORKSPACE_DIR,
713
- extraUserRoots: [SKILLS_DIR],
714
- }],
715
813
  }).then((result: any) => {
716
814
  const entry = result?.data?.[0];
717
815
  const all = entry?.skills ?? [];
718
- const workspace = all.filter((s: any) => s.scope !== 'system');
816
+ const repo = all.filter((s: any) => s.scope === 'repo');
719
817
  const errors = entry?.errors ?? [];
720
- log.ok(`[codex] skills primed: ${workspace.length} workspace, ${all.length - workspace.length} system${errors.length ? `, ${errors.length} rejected` : ''}`);
818
+ log.ok(`[codex] skills primed: ${repo.length} workspace (repo), ${all.length - repo.length} user/system${errors.length ? `, ${errors.length} rejected` : ''}`);
721
819
  for (const err of errors) log.warn(`[codex] skill load error: ${err.path} — ${err.message}`);
722
820
  }).catch((err: any) => {
723
821
  log.warn(`[codex] skills/list failed: ${err.message}`);
724
822
  });
725
823
  }
726
824
 
825
+ /* ── MCP wiring ────────────────────────────────────────────────────────── */
826
+
827
+ const MCP_CONFIG_FILE = path.join(WORKSPACE_DIR, 'MCP.json');
828
+
829
+ /**
830
+ * Load MCP servers from workspace/MCP.json (the same file the Claude harness
831
+ * reads). Accepts the canonical unwrapped map `{ name: { command, args, env } }`,
832
+ * a `{ mcpServers: {...} }` wrapper, or a legacy array of single-key maps.
833
+ * Returns {} when absent/invalid — so this is a no-op until the user populates MCP.json.
834
+ */
835
+ function loadMcpServersForCodex(): Record<string, any> {
836
+ try {
837
+ const raw = JSON.parse(fs.readFileSync(MCP_CONFIG_FILE, 'utf-8'));
838
+ let map: any = raw;
839
+ if (raw && typeof raw === 'object' && raw.mcpServers && typeof raw.mcpServers === 'object') map = raw.mcpServers;
840
+ else if (Array.isArray(raw)) map = Object.assign({}, ...raw);
841
+ if (map && typeof map === 'object' && !Array.isArray(map)) return map;
842
+ } catch {}
843
+ return {};
844
+ }
845
+
846
+ /** Serialize a JS value as a TOML literal for a `-c key=value` override. */
847
+ function toToml(v: any): string {
848
+ if (Array.isArray(v)) return `[${v.map(toToml).join(',')}]`;
849
+ if (v && typeof v === 'object') return `{${Object.entries(v).map(([k, val]) => `${JSON.stringify(k)}=${toToml(val)}`).join(',')}}`;
850
+ if (typeof v === 'number' || typeof v === 'boolean') return String(v);
851
+ return JSON.stringify(String(v)); // TOML basic string — JSON escaping is compatible
852
+ }
853
+
854
+ /**
855
+ * Translate MCP.json into `codex app-server -c mcp_servers.<name>.<field>=<toml>`
856
+ * spawn flags. Codex sources MCP from its own config layer rather than a per-query
857
+ * param (verified against 0.135.0: a `-c mcp_servers.X.command=...` override shows
858
+ * up in both mcpServerStatus/list and config/read). Only the stdio fields Bloby
859
+ * uses (command/args/env) are translated; names must be TOML-bare-key safe.
860
+ */
861
+ function buildMcpConfigArgs(): string[] {
862
+ const servers = loadMcpServersForCodex();
863
+ const args: string[] = [];
864
+ let wired = 0;
865
+ for (const [name, cfg] of Object.entries(servers)) {
866
+ if (!/^[A-Za-z0-9_-]+$/.test(name)) { log.warn(`[codex] skipping MCP server "${name}" — name not TOML-bare-key safe`); continue; }
867
+ const c: any = cfg || {};
868
+ if (!c.command) { log.warn(`[codex] skipping MCP server "${name}" — no command`); continue; }
869
+ args.push('-c', `mcp_servers.${name}.command=${toToml(c.command)}`);
870
+ if (Array.isArray(c.args) && c.args.length) args.push('-c', `mcp_servers.${name}.args=${toToml(c.args)}`);
871
+ if (c.env && typeof c.env === 'object' && Object.keys(c.env).length) args.push('-c', `mcp_servers.${name}.env=${toToml(c.env)}`);
872
+ wired++;
873
+ }
874
+ if (wired) log.info(`[codex] wiring ${wired} MCP server(s) from MCP.json via -c overrides`);
875
+ return args;
876
+ }
877
+
727
878
  /* ── Harness implementation ────────────────────────────────────────────── */
728
879
 
729
880
  export function hasConversation(conversationId: string): boolean {
@@ -856,7 +1007,7 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
856
1007
  const timeout = Math.min(Math.max(req.timeout || 120_000, 5_000), 300_000);
857
1008
 
858
1009
  const rpc = new CodexRpc();
859
- rpc.start();
1010
+ rpc.start(buildMcpConfigArgs());
860
1011
 
861
1012
  let fullText = '';
862
1013
  const usedTools = new Set<string>();
@@ -1856,6 +1856,28 @@ mint();
1856
1856
  };
1857
1857
  }
1858
1858
 
1859
+ // Same for Codex OAuth: the app-server reads ~/.codex/auth.json at spawn, so a
1860
+ // running subprocess only adopts a new identity on re-spawn. End live conversations
1861
+ // after a successful exchange so the next message cold-starts with the fresh token.
1862
+ // (Wraps only the one-shot /exchange; the device-code /status route latches
1863
+ // success on every poll and must not re-fire teardown — handled at re-spawn instead.)
1864
+ if (req.method === 'POST' && req.url === '/api/auth/codex/exchange') {
1865
+ const origEnd = res.end.bind(res);
1866
+ (res as any).end = function (this: typeof res, ...args: any[]) {
1867
+ try {
1868
+ const body = typeof args[0] === 'string' ? args[0] : args[0]?.toString();
1869
+ if (body) {
1870
+ const json = JSON.parse(body);
1871
+ if (json.success) {
1872
+ log.info('[orchestrator] Codex re-auth succeeded — restarting conversations with fresh token');
1873
+ endAllConversations();
1874
+ }
1875
+ }
1876
+ } catch {}
1877
+ return origEnd(...args);
1878
+ };
1879
+ }
1880
+
1859
1881
  workerApp(req, res);
1860
1882
  return;
1861
1883
  }
@@ -160,6 +160,26 @@ function triggerAgent(prompt: string, label: string, onComplete?: () => void) {
160
160
  }
161
161
 
162
162
  if (type === 'bot:done') {
163
+ // ── Mac channel push (server-initiated) ──
164
+ // When the `mac` skill is installed, it instructs the agent to wrap any
165
+ // Mac-bound pulse/cron output in <mac_push>…</mac_push> (a spoken line +
166
+ // an optional <notch_card>/<notch_html>). Forward each block's inner
167
+ // content over the chat WebSocket as an unsolicited `mac:push` frame —
168
+ // the Morphy Mac app renders it (notch card + TTS) without the user
169
+ // having pushed to talk. If the skill isn't installed the agent never
170
+ // emits this tag, so nothing is pushed and non-Mac users carry no weight.
171
+ if (fullResponse) {
172
+ const macPushRegex = /<mac_push>([\s\S]*?)<\/mac_push>/g;
173
+ let macMatch;
174
+ while ((macMatch = macPushRegex.exec(fullResponse)) !== null) {
175
+ const macContent = macMatch[1].trim();
176
+ if (macContent) {
177
+ broadcastBloby('mac:push', { content: macContent });
178
+ log.info(`[scheduler] Mac push broadcast (${macContent.length} chars)`);
179
+ }
180
+ }
181
+ }
182
+
163
183
  // Extract <Message> blocks after agent turn completes
164
184
  if (fullResponse) {
165
185
  const messageRegex = /<Message(?:\s+([^>]*))?>(([\s\S]*?))<\/Message>/g;
@@ -1,3 +1,8 @@
1
+ ---
2
+ name: alexa
3
+ description: "Alexa voice channel for your agent via the public 'Morphy' skill. Code-based pairing, voice-first response style, three-pattern decision tree (fast / chat-deferred / HA-announce-deferred)."
4
+ ---
5
+
1
6
  # Alexa
2
7
 
3
8
  ## What This Is
@@ -1,287 +1,276 @@
1
- # Mac (Morphy notch)
1
+ ---
2
+ name: mac
3
+ description: "Morphy native macOS companion. Activates on the [Mac] tag. You reply with a concise spoken line (TTS) and optionally drive the Mac's action registry — one <mac_actions> JSON array that can show a notch card, point the mascot at the screen, or spotlight a control. Custom cards use <notch_html>. The same registry works proactively (PULSE/cron) wrapped in <mac_push>. Card presets + schemas: presets/PRESETS.md. Reusable custom cards: frequentSnippets/."
4
+ ---
5
+
6
+ # Mac (Morphy companion)
2
7
 
3
8
  ## What This Is
4
9
 
5
- A channel for replying to your human **on their Mac**, through the **Morphy companion app** that lives in the MacBook notch. You speak a short reply (TTS) and *optionally* push a small HTML+CSS card into the notch — same beat as the audio. No pull, no schedule, no state to maintain. Pure output channel.
10
+ A channel for reaching your human **on their Mac**, through the **Morphy companion app** that lives in the menu bar + the MacBook notch. Two things happen per turn:
6
11
 
7
- The notch's bottom slot is rendered with a sandboxed `WKWebView`. You're not building a webpage; you're hand-rolling a single ~9.4 cm² glance card.
12
+ 1. **You speak** your concise reply is read aloud (ElevenLabs TTS).
13
+ 2. **You optionally drive the Mac** — through one **action registry**: a `<mac_actions>` block holding a JSON array of actions. An action can render a **card** in the notch, **point** the mascot at something on screen, or **spotlight** a control. (Custom hand-written HTML cards use the raw `<notch_html>` tag — see below.)
14
+
15
+ The registry is **extensible**: today it's `card`, `point`, `spotlight`; more verbs arrive over time. You drive it the same way whether the human just talked to you (reply) or you're reaching out on your own (proactive push).
8
16
 
9
17
  ---
10
18
 
11
19
  ## When To Use This Skill
12
20
 
13
- Activate when the **user message starts with `[Mac]`**. Don't apply it to other inbound traffic (WhatsApp, Alexa, web chat). Specifically:
21
+ Activate when the **user message starts with `[Mac]`**. Don't apply it to other inbound traffic (WhatsApp, Alexa, web chat).
14
22
 
15
23
  - `[Mac] what's on my calendar today?` → activate
16
- - `[Mac] what time is it in Tokyo?` → activate
24
+ - `[Mac] where do I cancel this subscription?` → activate (point/spotlight at it)
17
25
  - `what's on my calendar today?` (no tag) → ignore this skill, reply as usual
18
26
 
19
- The tag is injected by the Morphy app itself when the human pushes-to-talk through Morphy, so by the time you see `[Mac]` it means **your reply will be spoken back at them**. Optimize for the ear, supplement with the eye.
27
+ The tag is injected by the Morphy app when the human pushes-to-talk, **and every `[Mac]` turn carries a screenshot of each display** so you can *see* their screen and reference exact pixels on it. Optimize for the ear, supplement with the eye, and act on the screen when it helps.
20
28
 
21
29
  ---
22
30
 
23
- ## Two ways to put a visual in the notch
31
+ ## The action registry `<mac_actions>`
32
+
33
+ Your reply has two parts: **spoken prose** (plain text, read aloud) and an optional **`<mac_actions>` block** — a JSON array of actions the Mac runs. The Mac strips the block before TTS, so **it is never spoken**.
34
+
35
+ ```
36
+ Here's your day, Bruno.
37
+ <mac_actions>
38
+ [
39
+ { "type": "card", "preset": "calendar", "data": { "weekday": "Thu", "date": "May 28", "events": [ { "time": "10:00", "title": "Stand-up" } ] } }
40
+ ]
41
+ </mac_actions>
42
+ ```
24
43
 
25
- There are **two** ways to render a card. Reach for them in this order:
44
+ Rules:
26
45
 
27
- 1. **Preset (preferred).** Morphy ships a library of beautiful, pre-built card
28
- renderers (`email`, `calendar`, `list`, `text`, `weather`, `ticker`, `stat`,
29
- `info`, `comparison`). You send only **structured data** never CSS and
30
- Morphy lays it out perfectly, on-brand, with scrolling handled for you. It's
31
- the fast, pretty, low-token path. **Full data schemas + examples:
32
- [`presets/PRESETS.md`](presets/PRESETS.md).**
46
+ - **One `<mac_actions>` block per reply.** Put it after your spoken sentence.
47
+ - It's a **JSON array** — you can include **several actions** and they run **in order** (e.g. spotlight a control *and* point at it).
48
+ - Each element is a flat object with a `"type"` and that action's fields.
49
+ - If you have nothing to show or do, send **no block** just speak.
33
50
 
34
- ```
35
- <notch_card type="email">
36
- { "from": "Alex Chen", "subject": "Migration plan", "time": "2:14 PM",
37
- "body": "Can we move the cutover to Tuesday?\n\nStaging looked clean." }
38
- </notch_card>
39
- ```
51
+ ### The action types
40
52
 
41
- 2. **Custom (escape hatch).** When no preset fits the shape of the answer,
42
- hand-write the whole card in `<notch_html>…</notch_html>` (rules below). Cards
43
- you hand-build and reuse get saved to `frequentSnippets/` so next time is
44
- copy-fill-send. If a custom card gets requested a lot, tell Bruno it's a
45
- candidate to become a real shipped preset.
53
+ | `type` | What the Mac does | Fields |
54
+ |---|---|---|
55
+ | `card` | Renders a card in the notch (a preset from the library). | `{ "preset": "<name>", "data": { … } }` |
56
+ | `point` | The mascot flies across the screen and points at a spot, with a label bubble. | `{ "x", "y", "label"?, "screen"? }` |
57
+ | `spotlight` | Dims the display and opens a soft glowing hole over a spot, isolating one control. | `{ "x", "y", "r"?, "label"?, "screen"? }` |
46
58
 
47
- **Check for a preset first.** Only drop to custom HTML when the data genuinely
48
- doesn't fit one of the presets.
59
+ > **Adding capabilities:** new verbs (open an app, run a shortcut, click a button…) will appear here as new `type`s over time. If you try a `type` the app doesn't know yet, it's safely ignored — so only use the types documented above.
49
60
 
50
- > **Long prose / "read me this" / a summary / an explanation → use the `text`
51
- > preset.** Do NOT dump raw paragraphs into `<notch_html>` — that tag is *HTML*,
52
- > not a text box. Bare text there renders unstyled, edge-to-edge, with no padding.
53
- > The `text` preset gives you a title, proper margins, and a scrollable body for
54
- > free: `<notch_card type="text">{ "title": "…", "body": "…long text with \n\n…" }</notch_card>`.
55
- > `<notch_html>` is only for a genuinely custom *visual layout* you hand-build with
56
- > real markup and inline styles — never as a dumping ground for plain text.
61
+ ---
57
62
 
58
- ## Output Format
63
+ ## The `card` action (notch cards)
59
64
 
60
- Your full reply lives inside the conversation; Morphy will:
65
+ A card is a small glance-visual in the notch's bottom slot — **383 × 147 pt, transparent over black**. There are two ways to make one; reach for them in this order:
61
66
 
62
- 1. Strip any `<notch_card …>…</notch_card>` **and** `<notch_html>…</notch_html>`
63
- block before TTS — neither is **ever** spoken.
64
- 2. Send the remaining text to ElevenLabs and play it through the Mac.
65
- 3. Render the card (preset → Morphy's renderer; custom → your HTML) and auto-open
66
- the notch downward **at the same moment** the audio starts. Visual + audio land
67
- together — that's the whole point.
67
+ ### 1. Preset (preferred) `card` action
68
68
 
69
- So a reply has up to two partsa spoken sentence and **one** card block (either
70
- a `<notch_card>` preset or a `<notch_html>` custom card, not both):
69
+ Morphy ships pre-built, on-brand renderers: **`email`, `calendar`, `list`, `text`, `weather`, `ticker`, `stat`, `info`, `comparison`**. You send only **structured data** never CSS and Morphy lays it out perfectly, scrolling handled. **Full data schemas + examples: [`presets/PRESETS.md`](presets/PRESETS.md).**
71
70
 
72
71
  ```
73
- <concise spoken sentence>
74
- <notch_card type="…">{ …data… }</notch_card>
72
+ Here it is, Bruno.
73
+ <mac_actions>
74
+ [ { "type": "card", "preset": "email", "data": { "from": "Alex Chen", "subject": "Migration plan", "time": "2:14 PM", "body": "Can we move the cutover to Tuesday?" } } ]
75
+ </mac_actions>
75
76
  ```
76
77
 
77
- ### Spoken text rules
78
+ > **Long prose / "read me this" / a summary → use the `text` preset**, not custom HTML:
79
+ > `{ "type": "card", "preset": "text", "data": { "title": "…", "body": "…long text with \n\n…" } }`. It gives you a title, margins, and a scrollable body for free.
78
80
 
79
- - **One or two sentences max.** It's audio, not prose. The human is mid-task when this plays don't make them stand still for a paragraph.
80
- - **No markdown, no bullet lists, no enumerations.** TTS reads symbols literally and it sounds awful.
81
- - **Refer the human by name** if you know it ("Here's your calendar for today, Bruno.") — sounds personal, doesn't add length.
82
- - **Acknowledge the card when you send one.** Otherwise the visual feels disconnected from the voice. "Here it is.", "I put the details up top.", "Tap to glance.".
81
+ ### 2. Customraw `<notch_html>` (escape hatch)
83
82
 
84
- ### 🚫 The #1 mistake: speaking the card content out loud
83
+ When no preset fits, hand-write the whole card in a **`<notch_html>…</notch_html>`** tag (separate from `<mac_actions>`). Use the **raw tag, not a JSON string** — that's deliberate: you write real markup with no escaping.
85
84
 
86
- **This is the single most common failure mode and it ruins the UX.** If your card contains a list, a calendar, an email digest, news headlines, search results — *anything structured* — your spoken text **must NOT re-read those items**. The human is already looking at them. Reading them aloud doubles the time-to-information and makes Morphy feel broken.
85
+ ```
86
+ Pinned it up top, Bruno.
87
+ <notch_html>
88
+ <div style="padding:12px 16px;color:#fff;font-family:-apple-system">…your layout…</div>
89
+ </notch_html>
90
+ ```
87
91
 
88
- When the card carries the answer, the voice's only job is the **lead-in sentence**and then stop talking.
92
+ Reusable custom cards live in [`frequentSnippets/`](frequentSnippets/) — read the file, fill the `{{placeholders}}`, drop the result inside `<notch_html>`. If a custom card gets asked for a lot, tell Bruno it's a candidate to become a shipped preset.
89
93
 
90
- #### BAD speaks the card back at the human
94
+ **Check for a preset first.** Only drop to `<notch_html>` when the data genuinely doesn't fit one.
91
95
 
92
- > *"Top five AI today, Bruno. Anthropic dropped Claude Opus 4.7 with a one-million-token window. OpenAI is rolling Sora 3 video into ChatGPT for Plus users. Meta open-sourced Llama 4 Scout at 400 billion params. Google DeepMind's Gemini 3 hit number one on the LMSys arena. And Mistral closed a five billion dollar round at a forty billion valuation."*
93
- > ```
94
- > <notch_html>… the same five items rendered as a list …</notch_html>
95
- > ```
96
+ ### The canvas (for custom cards)
96
97
 
97
- ElevenLabs spends 30+ seconds reciting what's already on screen. The human paid for tokens AND TTS minutes to receive identical information twice. Don't do this.
98
+ | Constraint | Value |
99
+ |---|---|
100
+ | **Size** | **383 × 147 pt** (fixed) — about two stacked Spotlight rows. |
101
+ | **Background** | **Transparent over black**. Black is your canvas; use white/light text. |
102
+ | **Color** | White or `rgba(255,255,255,0.x)` tints read better than gray on black. |
103
+ | **Allowed** | HTML, CSS (flexbox, grid, gradients, transitions, animations), unicode/emoji. |
104
+ | **Forbidden** | External resources — `<img src="https://…">`, `<iframe>`, web fonts. The view has **no network**. |
105
+ | **Interactivity** | No clicks/hovers (they do nothing). Long content **scrolls** via trackpad — overflow is fine. |
106
+ | **Type size** | **12–14 px** is the floor on a Retina notch; below that white-on-black smears. |
98
107
 
99
- #### GOOD lead-in only, card carries the detail
108
+ Tips: two-column `display:flex;gap:8px` at ~190 pt/column; a 1px `rgba(255,255,255,0.08)` divider under a header; don't outline the card (the black pill is the frame).
100
109
 
101
- > *"Here are today's top five in AI, Bruno."*
102
- > ```
103
- > <notch_html>… the same five items rendered as a list …</notch_html>
104
- > ```
110
+ ---
105
111
 
106
- Two seconds of speech, the eye picks up the rest, human keeps working.
112
+ ## `point` & `spotlight` (acting on the screen)
107
113
 
108
- #### Same rule, different shapes
114
+ Because every `[Mac]` turn attaches a **screenshot of each display**, you can reference exact pixels and Morphy animates over the *live* screen at the matching spot.
109
115
 
110
- | Question | Speech | Card |
111
- |---|---|---|
112
- | "What's on my calendar today?" | *"Here's your day, Bruno."* | day + events |
113
- | "Read me my unread emails." | *"Three worth a glance, Bruno."* | sender + subject list |
114
- | "Compare Postgres vs SQLite." | *"Quick side-by-side up top, Bruno."* | two-column card |
115
- | "What's the weather?" | *"Pinned the forecast for you, Bruno."* | temp + conditions |
116
- | "What time is it in Tokyo?" | *"It's 8:14 PM in Tokyo."* | **no card** — bare fact, voice is enough |
116
+ - `x`, `y` **pixels measured on the screenshot you were given this turn** (top-left origin). Read the position straight off the image.
117
+ - `screen` *(optional)* — **1-based display index**, matching the order the screenshots were attached (screen 1 = first image). Omit for a single display or to default to the cursor's screen.
118
+ - `label` *(optional)* short on-screen caption ("Send", "Cancel here").
119
+ - `r` *(spotlight only, optional)* hole radius in points (default 90).
117
120
 
118
- The rule is symmetric: if the **voice alone** is the right answer, send **no card**. If the **card** is the answer, send a **short lead-in** in voice. **Never both at full length.**
121
+ `spotlight` + `point` pair beautifully spotlight to kill the clutter, point to name it:
119
122
 
120
- ### Whether to send a card at all
123
+ ```
124
+ The cancel button's bottom-right, Bruno.
125
+ <mac_actions>
126
+ [
127
+ { "type": "spotlight", "x": 1180, "y": 540, "r": 80, "screen": 1 },
128
+ { "type": "point", "x": 1180, "y": 540, "label": "Cancel", "screen": 1 }
129
+ ]
130
+ </mac_actions>
131
+ ```
121
132
 
122
- - **Sometimes useful, sometimes not.** Decide per reply. A time-of-day answer doesn't need a card. A calendar, a list, a comparison, a status those benefit from one.
123
- - **Preset first.** If the answer is an email, a list, a calendar, a stat, a comparison, a quote/summary, weather, or a quote — there's almost certainly a preset for it (see [`presets/PRESETS.md`](presets/PRESETS.md)). Send data, not markup.
124
- - **Custom only when nothing fits.** Reach for `<notch_html>` when the shape is genuinely bespoke. Pin reusable custom cards as files in `frequentSnippets/` so you can copy-swap-send instead of regenerating.
125
- - **The card is display-only — but scrollable.** Don't put buttons, links, or "click to expand" — clicks do nothing and read as broken UI. But long content **is scrollable**: the user scrolls `email` / `list` / `calendar` / `text` cards with the trackpad (a thin scrollbar appears), so it's fine to let content overflow.
133
+ **Reply-only.** `point`/`spotlight` need the screenshot, so they only work on a reply to a `[Mac]` turn **never** in a proactive push (no screenshot to map against). Cards work in both.
126
134
 
127
135
  ---
128
136
 
129
- ## The Canvas
137
+ ## Proactive pushes (PULSE / cron — you start the conversation)
130
138
 
131
- | Constraint | Value |
132
- |---|---|
133
- | **Width** | **383 points** (fixed) |
134
- | **Height** | **~147 points** (fixed; bottom is inset to clear the pill's rounded corners) |
135
- | **Background** | **Transparent**. Sits directly on a **black** pill. Plan accordingly — black is your canvas. |
136
- | **Font** | System (`-apple-system, BlinkMacSystemFont, system-ui`) is preloaded. Override only if you need a monospaced number or display weight. |
137
- | **Color** | White or light. Use `rgba(255,255,255,0.x)` for tints — opacity tints read better than gray on black. |
138
- | **Allowed** | HTML, CSS (incl. flexbox, grid, gradients, transitions, animations), unicode glyphs / emoji. |
139
- | **Forbidden** | JS that does network or file access, `<img src="https://...">`, `<iframe>`, anything that loads external resources. The view has no network. |
140
- | **Interactivity** | No clicks or hovers — they do nothing. But long content **scrolls** via the trackpad (a thin scrollbar appears), so overflow is fine. |
139
+ You can reach the Mac **without being asked** — during a `<PULSE/>` run or a scheduled cron, when memory says the human wants a proactive Mac update (a trade status, "tell me when new mail lands", a build result, a reminder).
141
140
 
142
- **383 × 147 is small.** Roughly the size of two stacked Spotlight rows. Treat each card as a single coherent idea — date + time, one event, one fact, one comparison. If you can't fit it, paginate via speech ("here's the first, ask 'next' for the rest"), don't shrink the type.
141
+ Opt-in and quiet by design:
143
142
 
144
- ### Space tips
143
+ - **Only push when the human asked for it** (a memory/instruction). A push interrupts with voice + visuals — never speculative.
144
+ - **Wrap the whole Mac payload in `<mac_push>…</mac_push>`.** Inside, write *exactly* a normal reply: a spoken line + an optional `<mac_actions>` block (a `card` is ideal) and/or `<notch_html>`. The supervisor forwards the wrapper's inner content to the Mac as an unsolicited message; the app speaks it and renders it, same as a reply.
145
145
 
146
- - **Two-column grids** with `display: flex; gap: 8px` are very effective at ~190pt per column.
147
- - **A divider line** (`rgba(255,255,255,0.08)`, 1px) under a header separates regions without visually crowding.
148
- - **Tabs / pagination** can be visual-only (a row of dots indicating "1 of 3"); say "next" in the audio and resend a fresh snippet for page 2.
149
- - **Don't outline the card.** The black pill is already the frame.
150
- - **12–14px is the smallest comfortable size** on a Retina notch. Below that, antialiasing makes white text on black smear.
146
+ ```
147
+ <mac_push>
148
+ Your TSLA position is up 2.1% today, Bruno.
149
+ <mac_actions>
150
+ [ { "type": "card", "preset": "stat", "data": { "value": "+2.1%", "label": "TSLA · today" } } ]
151
+ </mac_actions>
152
+ </mac_push>
153
+ ```
151
154
 
152
- ---
155
+ - **No `point`/`spotlight` in a push** — there's no screenshot behind it. Spoken line + a `card` (or `<notch_html>`) only.
156
+ - **Fire-and-forget, online-only.** The push lands **only if the Mac is connected and the human isn't mid-interaction**; otherwise it's silently dropped (not queued). For must-not-miss updates, also emit a `<Message>` block (web/push) — `<mac_push>` and `<Message>` are independent.
157
+ - **Keep the reply discipline** (below): ≤ 2 sentences, no markdown, never read the card aloud. The human didn't ask, so be especially brief.
153
158
 
154
- ## Frequent Snippets
159
+ If the `mac` skill isn't installed, none of this exists — so only emit `<mac_push>` when this skill is active and the human has opted into Mac updates.
155
160
 
156
- `workspace/skills/mac/frequentSnippets/*.html` — pre-built cards. When your human asks something you've answered before, **don't regenerate** the markup. Read the file, replace placeholders, paste into `<notch_html>…</notch_html>`. Saves tokens, lowers latency, lets the visual feel land at the same beat as the voice.
161
+ ---
162
+
163
+ ## Spoken-text rules
157
164
 
158
- **Workflow:**
165
+ - **One or two sentences max.** It's audio. The human is mid-task — don't make them stand still for a paragraph.
166
+ - **No markdown, no bullet lists, no enumerations.** TTS reads symbols literally and it sounds awful.
167
+ - **Refer to the human by name** if you know it — personal, costs nothing.
168
+ - **Acknowledge a card/action when you send one** ("Here it is.", "Pinned it up top.", "It's the gear, top-right.") so the visual feels connected to the voice.
169
+ - **Name things, don't narrate coordinates.** *"It's the gear, top-right."* — never *"I'm pointing at 1890 comma 40."*
159
170
 
160
- 1. First time the human asks "what's on my calendar today?" — generate a card from scratch, show it, ask: *"Want me to save this as a template for next time?"*
161
- 2. If yes, write it to `frequentSnippets/calendar-today.html` with `{{placeholders}}` for the dynamic bits.
162
- 3. Next time the same question comes in, `Read` the file, do a simple string-replace on the placeholders, paste the result into `<notch_html>…</notch_html>`.
171
+ ### 🚫 The #1 mistake: speaking the card content out loud
163
172
 
164
- Use `{{double_curly}}` placeholders so they're easy to grep and unambiguous. Keep file names kebab-cased and intent-named (`calendar-today.html`, `weather-now.html`, `pomodoro-status.html`).
173
+ If your card carries a list, calendar, email digest, headlines *anything structured* your spoken text **must NOT re-read those items**. The human is already looking at them. When the card carries the answer, the voice's only job is a short **lead-in**, then stop.
165
174
 
166
- Starter snippets in this skill (read them to see the style):
175
+ **❌ BAD:** *"Top five in AI: Anthropic shipped Claude Opus… OpenAI rolled out… Meta open-sourced…"* (30s reciting what's on screen)
176
+ **✓ GOOD:** *"Here are today's top five, Bruno."* + the list card.
167
177
 
168
- - `frequentSnippets/calendar-today.html` day + date hero, event list, "no events" empty state.
169
- - `frequentSnippets/info-card.html` — two-column key/value card.
170
- - `frequentSnippets/single-stat.html` — one big number, one supporting line.
178
+ The rule is symmetric: if the **voice alone** is the right answer, send **no card**. If the **card** is the answer, send a **short lead-in**. Never both at full length.
171
179
 
172
- You're encouraged to **discuss the snippet design with your human** — they see it every day, they'll have opinions on density, accent color, what to lead with. Keep the file in sync with what they tell you. Treat each `.html` as a small product surface.
180
+ | Question | Speech | Visual |
181
+ |---|---|---|
182
+ | "What's on my calendar?" | *"Here's your day, Bruno."* | `card` calendar |
183
+ | "Read me my unread emails." | *"Three worth a glance, Bruno."* | `card` list |
184
+ | "Where do I cancel?" | *"Bottom-right, Bruno."* | `spotlight` + `point` |
185
+ | "What time is it in Tokyo?" | *"It's 8:14 PM in Tokyo."* | **nothing** — voice is enough |
173
186
 
174
187
  ---
175
188
 
176
189
  ## Examples
177
190
 
178
- ### Example 1 Calendar (with card)
191
+ ### Reply with a preset card
179
192
 
180
193
  > **Human:** `[Mac] what's on my calendar today?`
181
- > **You (full reply):**
182
194
  > ```
183
- > Here's your day, Bruno. Stand-up at 10, design review at 2, nothing after 4.
184
- > <notch_html>
185
- > <div style="padding:12px 16px;display:flex;flex-direction:column;gap:8px;height:100%;box-sizing:border-box">
186
- > <div style="display:flex;align-items:baseline;gap:8px">
187
- > <div style="font-size:22px;font-weight:700">Thu</div>
188
- > <div style="font-size:13px;opacity:0.55">May 28</div>
189
- > </div>
190
- > <div style="height:1px;background:rgba(255,255,255,0.08)"></div>
191
- > <div style="display:flex;justify-content:space-between;font-size:11px">
192
- > <span style="opacity:0.7">10:00 · Stand-up</span>
193
- > <span style="opacity:0.4">30m</span>
194
- > </div>
195
- > <div style="display:flex;justify-content:space-between;font-size:11px">
196
- > <span style="opacity:0.7">14:00 · Design review</span>
197
- > <span style="opacity:0.4">1h</span>
198
- > </div>
199
- > </div>
200
- > </notch_html>
195
+ > Here's your day, Bruno. Stand-up at 10, design review at 2.
196
+ > <mac_actions>
197
+ > [ { "type": "card", "preset": "calendar", "data": { "weekday": "Thu", "date": "May 28", "events": [ { "time": "10:00", "title": "Stand-up" }, { "time": "14:00", "title": "Design review" } ] } } ]
198
+ > </mac_actions>
201
199
  > ```
202
200
 
203
- Morphy speaks "Here's your day, Bruno…" while the card slides into the notch.
201
+ ### Reply that acts on the screen
202
+
203
+ > **Human:** `[Mac] where do I turn off notifications?`
204
+ > ```
205
+ > It's the bell icon near the top-right, Bruno.
206
+ > <mac_actions>
207
+ > [ { "type": "spotlight", "x": 1840, "y": 64, "r": 70, "screen": 1 },
208
+ > { "type": "point", "x": 1840, "y": 64, "label": "Notifications", "screen": 1 } ]
209
+ > </mac_actions>
210
+ > ```
204
211
 
205
- ### Example 2 — Quick fact (no card)
212
+ ### Reply, bare fact (no visual)
206
213
 
207
214
  > **Human:** `[Mac] what time is it in Tokyo?`
208
- > **You (full reply):**
209
215
  > ```
210
216
  > It's 8:14 PM in Tokyo.
211
217
  > ```
212
218
 
213
- A bare fact doesn't earn screen real estate. Speak it, move on.
214
-
215
- ### Example 3 — Comparison (card with two columns)
219
+ ### Custom card (no preset fits)
216
220
 
217
- > **Human:** `[Mac] which is better for our use case, Postgres or SQLite?`
218
- > **You:**
219
221
  > ```
220
- > For your use case, Bruno, Postgres edges it out for the concurrent writes. Quick side-by-side up top.
222
+ > Quick side-by-side up top, Bruno.
221
223
  > <notch_html>
222
- > <div style="padding:12px 16px;display:flex;gap:10px;height:100%;box-sizing:border-box">
224
+ > <div style="padding:12px 16px;display:flex;gap:10px;height:100%;box-sizing:border-box;color:#fff;font-family:-apple-system">
223
225
  > <div style="flex:1;padding:8px 10px;border-radius:8px;background:rgba(255,255,255,0.05)">
224
- > <div style="font-size:10px;opacity:0.5;text-transform:uppercase;letter-spacing:.5px">Postgres</div>
226
+ > <div style="font-size:10px;opacity:0.5;text-transform:uppercase">Postgres</div>
225
227
  > <div style="font-size:13px;font-weight:700;margin-top:2px">Concurrent ✓</div>
226
- > <div style="font-size:10px;opacity:0.5;margin-top:4px">Heavier setup</div>
227
228
  > </div>
228
229
  > <div style="flex:1;padding:8px 10px;border-radius:8px;background:rgba(255,255,255,0.05)">
229
- > <div style="font-size:10px;opacity:0.5;text-transform:uppercase;letter-spacing:.5px">SQLite</div>
230
+ > <div style="font-size:10px;opacity:0.5;text-transform:uppercase">SQLite</div>
230
231
  > <div style="font-size:13px;font-weight:700;margin-top:2px">Single-writer</div>
231
- > <div style="font-size:10px;opacity:0.5;margin-top:4px">Zero setup</div>
232
232
  > </div>
233
233
  > </div>
234
234
  > </notch_html>
235
235
  > ```
236
236
 
237
- ### Example 4 — Pagination via voice (long list)
237
+ ### Proactive push (PULSE/cron)
238
238
 
239
- > **Human:** `[Mac] read me my unread emails.`
240
- > **You:** *(reply 1)*
241
239
  > ```
242
- > Three of seven worth a glance, Bruno. First: Alex about the migration. Say "next" for the rest.
243
- > <notch_html>
244
- > <div style="padding:10px 14px;display:flex;flex-direction:column;gap:6px;height:100%;box-sizing:border-box">
245
- > <div style="display:flex;justify-content:space-between;align-items:baseline">
246
- > <div style="font-size:13px;font-weight:700">Unread mail</div>
247
- > <div style="font-size:9px;opacity:0.45">1 / 3</div>
248
- > </div>
249
- > <div style="height:1px;background:rgba(255,255,255,0.08)"></div>
250
- > <div style="font-size:11px"><b>Alex Chen</b> · Migration plan</div>
251
- > <div style="font-size:10px;opacity:0.6">"…can we move the cutover to Tue?"</div>
252
- > </div>
253
- > </notch_html>
240
+ > <mac_push>
241
+ > Build's green, Bruno.
242
+ > <mac_actions>
243
+ > [ { "type": "card", "preset": "stat", "data": { "value": "PASS", "label": "CI · main" } } ]
244
+ > </mac_actions>
245
+ > </mac_push>
254
246
  > ```
255
- > *(human says "next" → reply 2 with `1 / 3` swapped for `2 / 3` and the next email's body)*
256
247
 
257
248
  ---
258
249
 
259
250
  ## What Not To Do
260
251
 
261
- - ❌ **No long monologues.** If you can't say it in two sentences, you're either over-explaining or you should make the card carry it.
262
- - ❌ **No reading the card aloud.** The card and the speech complement each other; don't make them redundant.
263
- - ❌ **No external assets.** `<img src="https://…">`, `<link href="…">`, Google Fonts, none of it loads.
264
- - ❌ **No interactive elements.** Buttons render but do nothing they read as broken UI. Drop them.
265
- - ❌ **No light backgrounds.** The pill is black. White text on a white card looks like a missing texture.
266
- - ❌ **No emoji as primary content** unless it's the answer. They render small at 12px and lose meaning.
267
- - ❌ **Don't send a card "just because"** if the answer is a bare fact. The card should add something the voice can't carry — structure, list, comparison, status.
252
+ - ❌ **No long monologues.** Two sentences. If you can't, let the card carry it.
253
+ - ❌ **No reading the card aloud.** Voice + card complement, never duplicate.
254
+ - ❌ **No `point`/`spotlight` in a proactive push** no screenshot exists there.
255
+ - ❌ **No external assets** in custom HTMLno network in the notch view.
256
+ - ❌ **No interactive elements** in cards buttons render but do nothing.
257
+ - ❌ **No light backgrounds** the pill is black.
258
+ - ❌ **Don't send a visual "just because"** a bare fact needs no card.
259
+ - ❌ **Don't invent action `type`s** — only `card`, `point`, `spotlight` exist today.
268
260
 
269
261
  ---
270
262
 
271
263
  ## Reply Checklist
272
264
 
273
- Before you send:
274
-
275
- 1. **Did the tag `[Mac]` actually start the user message?** If not, don't use this skill.
276
- 2. **Is the spoken text 2 sentences, no markdown, no enumerations?**
277
- 3. **🚫 Does my spoken text recite items that are already in my card?** If yes, **rewrite the speech as a lead-in only** ("Here are the top five, Bruno."). Speaking the card aloud is the #1 failure mode of this skill. Re-read the spoken text and the card together — if you remove the card, would the voice still make sense as the standalone answer? If yes, you're double-answering. Trim the voice.
278
- 4. **If I'm sending a card, does it add structure the voice can't carry?** If a sentence covers it, drop the card.
279
- 5. **Does my voice acknowledge the card if I sent one?** ("Here it is.", "Pinned it up top.", etc.)
280
- 6. **Did I check for a PRESET first** ([`presets/PRESETS.md`](presets/PRESETS.md))? Only hand-write `<notch_html>` if no preset fits. (For custom, did I check `frequentSnippets/` for one I already made?)
281
- 7. **If it's a preset:** is the body **valid JSON**, and is `type` one of the real preset names (lowercase)?
282
- 8. **If it's custom:** is the card ≤ 383×147pt, transparent-on-black, white text, no external assets?
283
- 9. **Is the tag spelled exactly `<notch_card type="…">…</notch_card>` or `<notch_html>…</notch_html>` (lowercase, underscore)?**
284
- 10. **If I'm offering to save a custom card for next time, did I say so plainly?**
265
+ 1. Did the message actually start with `[Mac]`? If not, don't use this skill.
266
+ 2. Spoken text ≤ 2 sentences, no markdown, no enumerations?
267
+ 3. 🚫 Does my speech recite what's already in my card? Rewrite it as a lead-in only.
268
+ 4. If acting on screen, did I read the coordinates off **this turn's screenshot**, and set `screen` if multi-display?
269
+ 5. Is my `<mac_actions>` value **valid JSON** (an array of objects, each with a `type`)?
270
+ 6. For a `card`: did I check **PRESETS.md** first, and is `preset` a real lowercase name with valid `data`?
271
+ 7. For a custom card: raw `<notch_html>`, 383×147, transparent-on-black, white text, no external assets?
272
+ 8. Proactive? Wrapped in `<mac_push>`, no `point`/`spotlight`, and brief?
273
+ 9. Does my voice acknowledge the visual if I sent one?
285
274
 
286
275
  ---
287
276
 
@@ -289,14 +278,16 @@ Before you send:
289
278
 
290
279
  | Thing | Where / how |
291
280
  |---|---|
292
- | Tag that activates this skill | `[Mac]` at the start of the user message |
293
- | **Preset card tag (preferred)** | `<notch_card type="…">{json}</notch_card>` |
294
- | Preset catalog + schemas | [`presets/PRESETS.md`](presets/PRESETS.md) — email, list, calendar, weather, ticker, stat, info, text, comparison |
295
- | Custom card tag (escape hatch) | `<notch_html>…</notch_html>` (singular, anywhere in your reply) |
296
- | Canvas size | **383 × 147 pt**, transparent over **black** |
297
- | Custom snippet library | `workspace/skills/mac/frequentSnippets/*.html` |
298
- | TTS engine | ElevenLabs (Morphy handles it; you just write the words) |
299
- | Stripped from speech | Anything inside `<notch_card …>…` or `<notch_html>…` (case-insensitive, multi-line) |
300
- | Long text | **Scrollable** the user scrolls long cards (email/list/calendar/text) with the trackpad |
301
- | Auto-opens notch? | Yes sending a card notch slides down automatically as audio starts |
302
- | Auto-clears? | When the human triggers the next push-to-talk, or after the safety timer |
281
+ | Activates this skill | `[Mac]` at the start of the user message |
282
+ | **Action registry** | `<mac_actions>[ { "type": "…", }, ]</mac_actions>` — one block, JSON array, runs in order |
283
+ | Action types | `card` (preset), `point`, `spotlight` |
284
+ | `card` | `{ "type":"card", "preset":"…", "data":{…} }` catalog: [`presets/PRESETS.md`](presets/PRESETS.md) |
285
+ | `point` | `{ "type":"point", "x", "y", "label"?, "screen"? }` — reply-only |
286
+ | `spotlight` | `{ "type":"spotlight", "x","y", "r"?, "label"?, "screen"? }` — reply-only |
287
+ | Custom card (raw) | `<notch_html>…</notch_html>` raw markup, no escaping |
288
+ | Custom snippet library | [`frequentSnippets/`](frequentSnippets/)`*.html` |
289
+ | Proactive push | wrap payload in `<mac_push>…</mac_push>` (cards only, no point/spotlight) |
290
+ | Canvas | **383 × 147 pt**, transparent over **black** |
291
+ | Stripped from speech | Anything inside `<mac_actions>`, `<notch_html>` (and legacy `<notch_card>` / `<morphy_action>`) |
292
+ | Coordinates | screenshot pixels (top-left origin); `screen` is 1-based |
293
+ | Auto-clears | next push-to-talk, or a short safety timer |
@@ -7,30 +7,41 @@ these, and the card always looks on-brand.
7
7
 
8
8
  ## How to send a preset
9
9
 
10
- Put one block anywhere in your `[Mac]` reply (it's stripped from TTS, never
11
- spoken):
10
+ A card is a **registry action**. Put one `<mac_actions>` block in your `[Mac]` reply
11
+ (it's stripped from TTS, never spoken) with a `card` action whose `data` is the
12
+ preset's payload:
12
13
 
13
14
  ```
14
- <notch_card type="PRESET_NAME">
15
- { ...JSON data for that preset... }
16
- </notch_card>
15
+ <mac_actions>
16
+ [ { "type": "card", "preset": "PRESET_NAME", "data": { ...preset data... } } ]
17
+ </mac_actions>
17
18
  ```
18
19
 
19
- - `type` is the preset name (lowercase) from the table below.
20
- - The body is a **single JSON object**. Use valid JSON double-quote keys and
21
- strings, escape newlines inside strings as `\n` (multi-line text is fine and
22
- encouraged for `email`/`text` bodies; it renders with real line breaks).
23
- - Unknown fields are ignored. Missing optional fields just don't render.
24
- - If the `type` is unknown or the JSON is malformed, Morphy shows **no card** (and
25
- nothing leaks into your spoken reply) so a typo fails safe, not loud.
20
+ - `preset` is the name (lowercase) from the sections below.
21
+ - `data` is a **single JSON object** the fields documented for that preset.
22
+ **Each example below shows exactly that `data` object** (drop it in under `"data"`).
23
+ - Use valid JSON: double-quote keys/strings, escape newlines inside strings as `\n`
24
+ (multi-line text is fine for `email`/`text` bodies it renders with real breaks).
25
+ - Unknown fields are ignored; missing optional fields just don't render.
26
+ - An unknown `preset` or malformed JSON shows **no card** (and nothing leaks into
27
+ your spoken reply) — a typo fails safe, not loud.
28
+ - You can include other actions (`point`, `spotlight`) in the **same array** — they
29
+ run in order. See `SKILL.md`.
26
30
 
27
- The same voice rules apply as always: **don't read the card's contents aloud** —
28
- the voice is a short lead-in, the card carries the detail. See `SKILL.md`.
31
+ > **Legacy:** the older `<notch_card type="PRESET">{ …data… }</notch_card>` tag still
32
+ > works (same renderer), but the `card` action above is the canonical form — one
33
+ > envelope for every Mac capability.
34
+
35
+ The same voice rule always applies: **don't read the card's contents aloud** — the
36
+ voice is a short lead-in, the card carries the detail. See `SKILL.md`.
29
37
 
30
38
  ---
31
39
 
32
40
  ## Presets
33
41
 
42
+ > Each block below is the **`data` object** for that preset — wrap it as shown in
43
+ > "How to send a preset" above.
44
+
34
45
  ### `stat` — one big number
35
46
  A single hero value with a label and caption. Time, countdown, %, rate, score.
36
47
 
@@ -41,9 +52,7 @@ A single hero value with a label and caption. Time, countdown, %, rate, score.
41
52
  | `caption` | – | muted line under it |
42
53
 
43
54
  ```
44
- <notch_card type="stat">
45
55
  { "label": "TOKYO", "value": "8:14 PM", "caption": "Thursday, May 28" }
46
- </notch_card>
47
56
  ```
48
57
 
49
58
  ### `info` — header + two key/value tiles
@@ -57,11 +66,9 @@ Two metrics side by side under a title.
57
66
  | `right_label`, `right_value` | ✓ | second tile |
58
67
 
59
68
  ```
60
- <notch_card type="info">
61
69
  { "title": "Connection", "subtitle": "VPN · Frankfurt",
62
70
  "left_label": "Latency", "left_value": "42 ms",
63
71
  "right_label": "Loss", "right_value": "0%" }
64
- </notch_card>
65
72
  ```
66
73
 
67
74
  ### `email` — a single email
@@ -77,10 +84,8 @@ can **scroll** if it's long. Perfect for "read me that email."
77
84
  | `initial` | – | avatar letter; **auto-derived from `from`** if omitted |
78
85
 
79
86
  ```
80
- <notch_card type="email">
81
87
  { "from": "Alex Chen", "time": "2:14 PM", "subject": "Migration plan",
82
88
  "body": "Can we move the cutover to Tuesday?\n\nStaging looked clean last night." }
83
- </notch_card>
84
89
  ```
85
90
 
86
91
  ### `list` — ranked / labelled rows
@@ -95,14 +100,12 @@ A scrolling list. Use for unread mail, todos, news, search results, top-N.
95
100
  Each item: `{ "title": ✓, "meta"?: "secondary line", "value"?: "right-aligned tag", "index"?: "1" }`
96
101
 
97
102
  ```
98
- <notch_card type="list">
99
103
  { "title": "Unread mail", "page": 1, "pages": 3,
100
104
  "items": [
101
105
  { "index": "1", "title": "Alex Chen", "meta": "Migration plan", "value": "2m" },
102
106
  { "index": "2", "title": "Figma", "meta": "3 new comments", "value": "1h" },
103
107
  { "index": "3", "title": "GitHub", "meta": "CI passed on main", "value": "3h" }
104
108
  ] }
105
- </notch_card>
106
109
  ```
107
110
 
108
111
  ### `calendar` — day + events
@@ -118,14 +121,12 @@ Day hero with a scrolling event list and a built-in empty state.
118
121
  Each event: `{ "time": "10:00", "title": "Stand-up", "duration"?: "30m" }`
119
122
 
120
123
  ```
121
- <notch_card type="calendar">
122
124
  { "weekday": "Thu", "date": "May 28", "count": "3 events",
123
125
  "events": [
124
126
  { "time": "10:00", "title": "Stand-up", "duration": "30m" },
125
127
  { "time": "14:00", "title": "Design review", "duration": "1h" },
126
128
  { "time": "16:30", "title": "1:1 with Sam", "duration": "30m" }
127
129
  ] }
128
- </notch_card>
129
130
  ```
130
131
 
131
132
  ### `weather` — current conditions
@@ -141,10 +142,8 @@ optional third tile.
141
142
  | `extra_label`, `extra_value` | – | optional third tile (e.g. Wind) |
142
143
 
143
144
  ```
144
- <notch_card type="weather">
145
145
  { "location": "San Francisco", "temp": "17", "condition": "Partly cloudy",
146
146
  "high": "19", "low": "12", "extra_label": "Wind", "extra_value": "12mph" }
147
- </notch_card>
148
147
  ```
149
148
 
150
149
  ### `ticker` — finance quote
@@ -166,10 +165,8 @@ leading `-`), and an **auto-generated sparkline** from a number array.
166
165
  > can't get a series, omit `points` and the card still looks fine without it.
167
166
 
168
167
  ```
169
- <notch_card type="ticker">
170
168
  { "symbol": "TSLA", "name": "Tesla Inc", "price": "$248.50", "change": "+2.4%",
171
169
  "points": [241, 239, 244, 242, 247, 245, 250, 248.5] }
172
- </notch_card>
173
170
  ```
174
171
 
175
172
  ### `text` — title + long body
@@ -183,10 +180,8 @@ summaries, explanations, "read me this", a quote, a paragraph answer.
183
180
  | `tag` | – | small label top-right (e.g. `"#482"`) |
184
181
 
185
182
  ```
186
- <notch_card type="text">
187
183
  { "title": "PR summary", "tag": "#482",
188
184
  "body": "Refactors the notch pipeline into a preset renderer.\n\nAdds nine presets the user can scroll." }
189
- </notch_card>
190
185
  ```
191
186
 
192
187
  ### `comparison` — two columns
@@ -200,11 +195,9 @@ Side-by-side options, each with a value and a note.
200
195
  | `left_note`, `right_note` | – | muted line under each value |
201
196
 
202
197
  ```
203
- <notch_card type="comparison">
204
198
  { "title": "Postgres vs SQLite",
205
199
  "left_title": "Postgres", "left_value": "Concurrent ✓", "left_note": "Heavier setup",
206
200
  "right_title": "SQLite", "right_value": "Single-writer", "right_note": "Zero setup" }
207
- </notch_card>
208
201
  ```
209
202
 
210
203
  ---
@@ -212,8 +205,9 @@ Side-by-side options, each with a value and a note.
212
205
  ## When no preset fits → go custom
213
206
 
214
207
  If the answer needs a shape these don't cover, hand-write the whole card with
215
- `<notch_html>…</notch_html>` instead (see `SKILL.md` → custom snippets). And if you
216
- build a custom card you'll reuse, save it to `frequentSnippets/` so next time is a
208
+ `<notch_html>…</notch_html>` instead (see `SKILL.md` → custom cards). Use the raw
209
+ tag, not a JSON string you write real markup with no escaping. If you build a
210
+ custom card you'll reuse, save it to `frequentSnippets/` so next time is a
217
211
  copy-fill-send. When a custom card you've made gets requested a lot, tell Bruno —
218
212
  it's a candidate for a real shipped preset.
219
213
 
@@ -5,11 +5,11 @@
5
5
  "bloby_human": "Bruno Bertapeli",
6
6
  "bloby": "bloby-bruno",
7
7
  "author": "newbot-official",
8
- "description": "Morphy native macOS companion. Activates on the [Mac] tag. Output is a concise spoken reply (TTS); optionally accompany it with a <notch_html>…</notch_html> visual card rendered inside the MacBook notch (383×147pt, transparent over black). Frequent snippets are cached in workspace/skills/mac/frequentSnippets/ for instant re-use.",
8
+ "description": "Morphy native macOS companion. Activates on the [Mac] tag. You reply with a concise spoken line (TTS) and optionally drive the Mac's action registry — one <mac_actions> JSON array that can show a notch card (preset), point the mascot at the screen, or spotlight a control. Custom cards use raw <notch_html>. The same registry works proactively (PULSE/cron) wrapped in <mac_push>. Card presets + schemas: presets/PRESETS.md. Reusable custom cards: frequentSnippets/.",
9
9
  "depends": [],
10
10
  "env_keys": [],
11
11
  "has_telemetry": false,
12
- "size": "8KB",
12
+ "size": "12KB",
13
13
  "contains_binaries": false,
14
- "tags": ["mac", "morphy", "notch", "macos", "voice", "tts", "visual", "html"]
14
+ "tags": ["mac", "morphy", "notch", "macos", "voice", "tts", "visual", "html", "registry", "actions", "spotlight", "point"]
15
15
  }
@@ -1,3 +1,8 @@
1
+ ---
2
+ name: plaud
3
+ description: "Plaud Note integration. Pairs the user's Plaud account (email OTP or paste-token for Google/Apple identities), pulls recordings into workspace/files/audio/plaud/, and routes transcription through either the Bloby Marketplace audio-to-text service (pay-per-minute) or the human's own provider (Groq / OpenAI Whisper / Mistral Voxtral / local)."
4
+ ---
5
+
1
6
  # Plaud
2
7
 
3
8
  ## What This Is
@@ -1,3 +1,8 @@
1
+ ---
2
+ name: whatsapp
3
+ description: "WhatsApp channel for your agent via Baileys. QR auth, messaging, voice transcription, channel and business modes."
4
+ ---
5
+
1
6
  # WhatsApp
2
7
 
3
8
  ## What This Is