npm - ai-notify - Versions diffs - 0.1.0 → 0.2.0 - Mend

ai-notify 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +70 -35
package/menubar/AiNotifyMenuBar.swift +192 -63
package/menubar/dist/ai-notify.app/Contents/MacOS/ai-notify-menubar +0 -0
package/package.json +1 -1
package/src/cli.mjs +244 -4
package/src/highlight.mjs +261 -0
package/src/menubar.mjs +15 -1
package/src/notify.mjs +124 -31
package/src/state.mjs +117 -11
package/src/util.mjs +16 -0
package/src/voicevox.mjs +120 -0

package/src/notify.mjs CHANGED Viewed

@@ -4,10 +4,15 @@
 // so a Linux box without `notify-send` (or a Mac without `terminal-notifier`)
 // never errors — it just does what it can.
-import { spawn } from 'node:child_process';
-import { existsSync } from 'node:fs';
-import { isMuted, readConfig } from './state.mjs';
+import { spawn, execFileSync } from 'node:child_process';
+import { existsSync, rmSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { isMuted, readConfig, readVolume, recordPane, readPaneSetting, setPaneWaiting } from './state.mjs';
+import { controllingTty } from './util.mjs';
 import { translate } from './translate.mjs';
+import { highlightWaiting, clearHighlight } from './highlight.mjs';
+import * as voicevox from './voicevox.mjs';
 const platform = process.platform; // 'darwin' | 'linux' | 'win32'
@@ -33,13 +38,14 @@ const resolveSound = (name) => {
   return name; // linux/win: treated as a freedesktop event id / ignored
 };
-const playSound = (name) => {
+const playSound = (name, vol = 1) => {
   const sound = resolveSound(name);
   if (platform === 'darwin') {
     if (sound && existsSync(sound)) {
       // play twice, a touch louder, so it is hard to miss
-      run('afplay', ['-v', '2', sound]);
-      run('afplay', ['-v', '2', sound]);
+      const v = String(2 * vol);
+      run('afplay', ['-v', v, sound]);
+      run('afplay', ['-v', v, sound]);
     }
   } else if (platform === 'linux') {
     if (which('paplay') && existsSync('/usr/share/sounds/freedesktop/stereo/complete.oga')) {
@@ -54,9 +60,23 @@ const playSound = (name) => {
   }
 };
-const speak = (text, voice) => {
+// `say` has no per-call volume, so when a non-default volume is set we render to
+// a file and play it through afplay at the requested level.
+const sayWithVolume = (text, voice, vol) => {
+  try {
+    const tmp = join(tmpdir(), `ai-notify-say-${process.pid}.aiff`);
+    execFileSync('say', voice ? ['-v', voice, '-o', tmp, text] : ['-o', tmp, text], { timeout: 30000 });
+    execFileSync('afplay', ['-v', String(vol), tmp], { timeout: 30000 });
+    rmSync(tmp, { force: true });
+  } catch {
+    /* ignore */
+  }
+};
+const speak = (text, voice, vol = 1) => {
   if (!text) return;
   if (platform === 'darwin') {
+    if (vol !== 1) return sayWithVolume(text, voice, vol);
     run('say', voice ? ['-v', voice, text] : [text]);
   } else if (platform === 'linux') {
     if (which('spd-say')) run('spd-say', [text]);
@@ -70,20 +90,40 @@ const speak = (text, voice) => {
   }
 };
-const banner = (title, subtitle, message) => {
+const banner = (title, subtitle, message, { activate, urgent } = {}) => {
   if (platform === 'darwin') {
     if (which('terminal-notifier')) {
-      run('terminal-notifier', ['-title', title, '-subtitle', subtitle, '-message', message]);
+      const args = ['-title', title, '-subtitle', subtitle, '-message', message];
+      if (activate) args.push('-activate', activate); // click the notification -> focus the app
+      run('terminal-notifier', args);
     } else {
       const esc = (s) => String(s).replace(/"/g, '\\"');
-      run('osascript', ['-e', `display notification "${esc(message)}" with title "${esc(title)}" subtitle "${esc(subtitle)}"`]);
+      run('osascript', [
+        '-e',
+        `display notification "${esc(message)}" with title "${esc(title)}" subtitle "${esc(subtitle)}"`,
+      ]);
     }
   } else if (platform === 'linux') {
-    if (which('notify-send')) run('notify-send', [`${title}: ${subtitle}`, message]);
+    if (which('notify-send')) {
+      const args = urgent ? ['-u', 'critical'] : [];
+      run('notify-send', [...args, `${title}: ${subtitle}`, message]);
+    }
   }
   // win32: skipped (no dependency-free toast); sound/voice still fire.
 };
+// A short, speakable gist of a summary: the first sentence, capped at `max`
+// characters on a clause boundary — enough to tell which task, not a monologue.
+const shortenForSpeech = (text, max = 40) => {
+  let s = String(text).replace(/\s+/g, ' ').trim();
+  s = (s.split(/[。.!?！？\n]/)[0] || s).trim(); // first sentence
+  if (s.length <= max) return s.replace(/[、,\s]+$/, '');
+  const cut = s.slice(0, max);
+  const ten = cut.lastIndexOf('、'); // prefer a clause boundary
+  const sep = ten > max * 0.4 ? ten : cut.lastIndexOf(' ');
+  return (sep > 0 ? cut.slice(0, sep) : cut).replace(/[、,\s]+$/, '').trim();
+};
 // Public entry. Called by the hook handler with already-parsed fields.
 export const emit = ({ provider = 'default', event = 'done', label = '', message = '' }) => {
   const config = readConfig();
@@ -101,33 +141,86 @@ export const emit = ({ provider = 'default', event = 'done', label = '', message
   //                               (falling back to the template on failure).
   //   default                  -> speak the raw message as-is.
   // The desktop banner always shows the full original message visually.
-  let speakText;
-  if (config.speakAgentMessage === false) {
-    speakText = fromTemplate || fallback;
-  } else if (message) {
-    if (config.translateTo) {
-      const translated = translate(message, config.translateTo);
-      speakText = translated || fromTemplate || fallback;
-    } else {
-      speakText = message;
-    }
+  // Full text for the desktop banner — the translated summary / message. Length
+  // is fine here: a banner never gets cut off and you read it at a glance.
+  let fullBody;
+  if (message) {
+    fullBody = (config.translateTo ? translate(message, config.translateTo) : message) || fromTemplate || fallback;
   } else {
-    speakText = fromTemplate || fallback;
+    fullBody = fromTemplate || fallback;
   }
+  // Spoken read-out — short enough not to get cut off, but enough to identify
+  // WHICH task: the window label + a short gist of what happened (the first
+  // clause of the summary). speakAgentMessage:true reads the whole thing.
+  let spokenBody;
+  if (!message) spokenBody = fromTemplate || fallback;
+  else if (config.speakAgentMessage) spokenBody = fullBody;
+  else spokenBody = shortenForSpeech(fullBody, config.speakMaxChars || 40);
+  // The task gist already tells you which pane; the label (often the working
+  // dir) is just slow filler. Prefix it only if explicitly enabled.
+  const speakText = config.speakLabel === true && label ? `${label}、${spokenBody}` : spokenBody;
+  // Per-pane voice: remember this pane (so the menu bar can list it) and apply
+  // any voice assigned to it. Precedence (most specific first):
+  //   $AI_NOTIFY_* env  — set in the pane's shell
+  //   this pane's pick  — assigned from the menu bar (keyed by tty)
+  //   provider / global — config defaults
+  const tty = controllingTty();
+  recordPane(tty, label);
+  setPaneWaiting(tty, event === 'waiting'); // waiting -> yellow menu bar status; done clears it
+  const pane = readPaneSetting(tty);
+  const tts = pane.tts || config.tts;
+  const voice = process.env.AI_NOTIFY_VOICE || pane.voice || p.voice || config.voice;
+  const speaker = process.env.AI_NOTIFY_VOICEVOX_SPEAKER || pane.speaker || config.voicevox?.speaker;
-  // Voice precedence (most specific first):
-  //   $AI_NOTIFY_VOICE  — set per terminal window/pane to give each its own voice
-  //   provider voice    — per agent (Claude vs Codex)
-  //   global voice      — the single `ai-notify voice` switch
-  const voice = process.env.AI_NOTIFY_VOICE || p.voice || config.voice;
+  // Volume (0–2): per-window env > this pane's slider > the global slider /
+  // `ai-notify volume` > config.
+  const envVol = parseFloat(process.env.AI_NOTIFY_VOLUME);
+  const fileVol = readVolume();
+  const vol = Number.isFinite(envVol)
+    ? Math.min(2, Math.max(0, envVol))
+    : typeof pane.volume === 'number'
+      ? pane.volume
+      : fileVol != null
+        ? fileVol
+        : typeof config.volume === 'number'
+          ? config.volume
+          : 1;
   if (!muted) {
-    playSound(soundName);
-    if (config.speak) speak(speakText, voice);
+    playSound(soundName, vol);
+    if (config.speak && vol > 0) {
+      let spoken = false;
+      if (tts === 'voicevox') {
+        spoken = voicevox.speak(speakText, speaker, config.voicevox?.url, vol);
+      }
+      if (!spoken) speak(speakText, voice, vol); // OS `say` (also the VOICEVOX fallback)
+    }
   }
   if (!muted || config.bannerWhenMuted) {
-    const title = 'AI Notify';
-    banner(title, label || provider, message || speakText);
+    const waiting = event === 'waiting';
+    banner(
+      waiting ? `⏳ ${label || 'input'}` : `✓ ${label || 'done'}`,
+      waiting ? 'waiting for input' : '',
+      fullBody,
+      {
+        // Click the notification to bring the waiting app (e.g. the IDE) forward.
+        activate: config.notifyActivate !== false ? process.env.__CFBundleIdentifier : undefined,
+        urgent: waiting,
+      }
+    );
+  }
+  // Visual highlight of *this* terminal window so a waiting pane stands out
+  // among many. Always best-effort, and applied even when muted (you still want
+  // to see which window needs you during a meeting).
+  if (config.highlightWaiting) {
+    try {
+      if (event === 'waiting') highlightWaiting(label, config.highlightColor);
+      else if (event === 'done') clearHighlight();
+    } catch {
+      /* visual is best-effort */
+    }
   }
 };

package/src/state.mjs CHANGED Viewed

@@ -45,6 +45,90 @@ export const setMuted = (muted) => {
 export const toggleMuted = () => setMuted(!isMuted());
+// --- Volume ----------------------------------------------------------------
+// A single number (0.0–2.0) in a state file, written by the menu bar slider or
+// `ai-notify volume`, read at fire time — just like the mute flag.
+const volumeFlagPath = () => join(stateDir(), 'volume');
+export const readVolume = () => {
+  try {
+    const v = parseFloat(readFileSync(volumeFlagPath(), 'utf8'));
+    return Number.isFinite(v) ? Math.min(2, Math.max(0, v)) : null;
+  } catch {
+    return null;
+  }
+};
+export const setVolume = (v) => {
+  const n = Math.min(2, Math.max(0, Number(v)));
+  ensureDir(stateDir());
+  writeFileSync(volumeFlagPath(), String(n));
+  return n;
+};
+// --- Per-pane state --------------------------------------------------------
+// Recently-active terminal panes (so the menu bar can offer per-pane voices),
+// and a per-tty voice override. Both are small JSON files in the state dir.
+const readJson = (p, fallback) => {
+  try {
+    return JSON.parse(readFileSync(p, 'utf8'));
+  } catch {
+    return fallback;
+  }
+};
+const writeJson = (p, obj) => {
+  ensureDir(stateDir());
+  writeFileSync(p, JSON.stringify(obj));
+};
+const panesPath = () => join(stateDir(), 'panes.json');
+const paneVoicesPath = () => join(stateDir(), 'pane-voices.json');
+const waitingPath = () => join(stateDir(), 'waiting.json');
+// Track which panes are waiting for input, so the menu bar icon can show a
+// status color (yellow) when any agent needs you.
+export const setPaneWaiting = (tty, waiting) => {
+  if (!tty) return;
+  const all = readJson(waitingPath(), {});
+  if (waiting) all[tty] = Date.now();
+  else delete all[tty];
+  writeJson(waitingPath(), all);
+};
+export const anyWaiting = () => Object.keys(readJson(waitingPath(), {})).length > 0;
+// Record this pane as active (keyed by tty). Keeps the 16 most-recent.
+export const recordPane = (tty, label) => {
+  if (!tty) return;
+  const all = readJson(panesPath(), {});
+  all[tty] = { label: label || '', ts: Date.now() };
+  const trimmed = Object.entries(all)
+    .sort((a, b) => b[1].ts - a[1].ts)
+    .slice(0, 16);
+  writeJson(panesPath(), Object.fromEntries(trimmed));
+};
+export const readPanes = () =>
+  Object.entries(readJson(panesPath(), {}))
+    .map(([tty, v]) => ({ tty, label: v.label || '', ts: v.ts || 0 }))
+    .sort((a, b) => b.ts - a.ts);
+// Per-pane settings: { tts, speaker, voice, volume }. Any subset may be set.
+export const readPaneSetting = (tty) => (tty ? readJson(paneVoicesPath(), {})[tty] || {} : {});
+// Merge `patch` into the pane's settings; keys set to null are removed; an empty
+// entry is deleted entirely.
+export const updatePaneSetting = (tty, patch) => {
+  if (!tty) return;
+  const all = readJson(paneVoicesPath(), {});
+  const next = { ...(all[tty] || {}), ...patch };
+  for (const k of Object.keys(next)) if (next[k] == null) delete next[k];
+  if (Object.keys(next).length === 0) delete all[tty];
+  else all[tty] = next;
+  writeJson(paneVoicesPath(), all);
+};
 // --- Config ----------------------------------------------------------------
 // Sounds default to OS built-ins so we ship no audio assets (clean repo, no
@@ -55,10 +139,27 @@ export const DEFAULT_CONFIG = {
   bannerWhenMuted: true,
   // Spoken read-out of which terminal finished (helps tell tabs apart).
   speak: true,
-  // Whether to speak the agent's own text (Codex's reply, a Claude prompt).
-  // That text is in the agent's language — set this false to keep every spoken
-  // read-out in your own language via doneMessage / waitingMessage instead.
-  speakAgentMessage: true,
+  // Output volume 0.0–2.0 (1.0 = normal). The menu bar slider / `ai-notify
+  // volume` write a state file that overrides this; $AI_NOTIFY_VOLUME overrides
+  // per window. Applies to sounds, the spoken voice, and VOICEVOX.
+  volume: 1.0,
+  // Prefix the window label to the SPOKEN read-out. Off by default — the task
+  // gist already identifies the pane, and the label (often the working dir) just
+  // adds slow filler. Turn on if you set a short $AI_NOTIFY_LABEL per window.
+  // (The desktop banner is always titled with the label regardless.)
+  speakLabel: false,
+  // Visually highlight the waiting terminal window/pane (best-effort, by tty).
+  // Off by default; the color is yellow / orange / red / green / #RRGGBB.
+  highlightWaiting: false,
+  highlightColor: 'yellow',
+  // Make the desktop notification click bring the terminal/IDE forward.
+  notifyActivate: true,
+  // Speak the agent's full message aloud (Codex's reply, a Claude prompt, the
+  // done-summary)? Default false = read only a short gist (first clause, capped
+  // at speakMaxChars) — enough to tell which task, never cut off. The full text
+  // still shows in the desktop banner. Set true to read the whole thing.
+  speakAgentMessage: false,
+  speakMaxChars: 40,
   // Optional: translate the agent's message into this language before speaking
   // it (e.g. 'ja'). Empty = off. Key-less, no cost; makes a network request.
   // Toggle with `ai-notify translate on ja` / `off`.
@@ -70,12 +171,17 @@ export const DEFAULT_CONFIG = {
   // 'Kyoko'). Empty = OS default voice. Switch it with `ai-notify voice`. A
   // per-provider `voice` below, if set, overrides this for that agent.
   voice: '',
-  // Spoken read-out templates for agent events. `{label}` is the working-dir
-  // name. Override per language (e.g. Japanese) in config.json. An agent that
-  // supplies its own message (Codex's last reply, a Claude prompt) wins over
-  // these.
-  doneMessage: '{label} finished',
-  waitingMessage: '{label} is waiting for input',
+  // TTS backend: 'say' (OS voice) or 'voicevox' (local VOICEVOX engine — speak
+  // in character voices). Falls back to 'say' if the engine isn't running.
+  // Per window: $AI_NOTIFY_VOICEVOX_SPEAKER overrides the speaker id.
+  tts: 'say',
+  voicevox: { url: 'http://127.0.0.1:50021', speaker: 3 },
+  // Spoken read-out templates for agent events. The window label is added
+  // separately (speakLabel), so leave {label} out here to avoid doubling it.
+  // Override per language (e.g. Japanese) in config.json. An agent that supplies
+  // its own message (Codex's last reply, a Claude prompt) wins over these.
+  doneMessage: 'finished',
+  waitingMessage: 'is waiting for input',
   providers: {
     claude: { sound: { waiting: 'Glass', done: 'Hero' }, voice: '' },
     codex: { sound: { done: 'Submarine' }, voice: '' },
@@ -99,4 +205,4 @@ export const writeConfig = (config) => {
   return configPath();
 };
-export const paths = { muteFlagPath, configPath, stateDir, configDir };
+export const paths = { muteFlagPath, configPath, stateDir, configDir, volumeFlagPath };

package/src/util.mjs CHANGED Viewed

@@ -39,3 +39,19 @@ export const cliInvocation = () => ({
 export const isEphemeralInstall = (cliPath) => /[/\\]_npx[/\\]/.test(cliPath);
 export const MARKER = 'ai-notify'; // substring used to detect our own wiring
+// The controlling terminal of this process (e.g. "/dev/ttys010"), which is
+// stable per terminal pane — used to scope per-pane settings. null if none.
+export const controllingTty = () => {
+  try {
+    const t = execFileSync('ps', ['-o', 'tty=', '-p', String(process.pid)], {
+      stdio: ['ignore', 'pipe', 'ignore'],
+    })
+      .toString()
+      .trim();
+    if (!t || t === '??' || t === '?') return null;
+    return t.startsWith('/dev/') ? t : `/dev/${t}`;
+  } catch {
+    return null;
+  }
+};

package/src/voicevox.mjs ADDED Viewed

@@ -0,0 +1,120 @@
+// VOICEVOX read-out: synthesize the spoken notification with a local VOICEVOX
+// engine (free, offline, no API key) so each terminal can speak in a distinct
+// character voice (ずんだもん, 四国めたん, …).
+//
+// The engine exposes an HTTP API on 127.0.0.1:50021. We use `curl` (zero deps):
+//   POST /audio_query?speaker=ID&text=...   -> query JSON
+//   POST /synthesis?speaker=ID  (query body) -> WAV
+// then play the WAV. Everything is best-effort: if the engine isn't running we
+// return false and the caller falls back to the OS `say` voice.
+import { execSync, execFileSync } from 'node:child_process';
+import { existsSync, statSync, mkdtempSync, rmSync, appendFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { tmpdir } from 'node:os';
+import { stateDir } from './state.mjs';
+export const DEFAULT_URL = 'http://127.0.0.1:50021';
+const platform = process.platform;
+// Record why a synthesis fell back to the OS voice, so intermittent fallbacks
+// are diagnosable instead of silent. Best-effort.
+const logFail = (reason) => {
+  try {
+    appendFileSync(join(stateDir(), 'voicevox.log'), `${new Date().toISOString()} ${reason}\n`);
+  } catch {
+    /* ignore */
+  }
+};
+export const isAvailable = (url = DEFAULT_URL, timeoutMs = 1500) => {
+  try {
+    const out = execFileSync('curl', ['-s', '-m', String(Math.ceil(timeoutMs / 1000)), `${url}/version`], {
+      encoding: 'utf8',
+      timeout: timeoutMs + 500,
+    });
+    return out.trim().length > 0;
+  } catch {
+    return false;
+  }
+};
+// Flatten /speakers into [{ id, name }] (character + style).
+export const listSpeakers = (url = DEFAULT_URL) => {
+  try {
+    const out = execFileSync('curl', ['-s', '-m', '4', `${url}/speakers`], { encoding: 'utf8', timeout: 5000 });
+    const data = JSON.parse(out);
+    const rows = [];
+    for (const sp of data) {
+      for (const st of sp.styles || []) rows.push({ id: st.id, name: `${sp.name}（${st.name}）` });
+    }
+    return rows;
+  } catch {
+    return [];
+  }
+};
+// One entry per character (preferring the ノーマル style) — a short, pickable
+// list for the menu bar, vs the full style list from listSpeakers.
+export const listCharacters = (url = DEFAULT_URL) => {
+  try {
+    const out = execFileSync('curl', ['-s', '-m', '4', `${url}/speakers`], { encoding: 'utf8', timeout: 5000 });
+    const data = JSON.parse(out);
+    const rows = [];
+    for (const sp of data) {
+      const styles = sp.styles || [];
+      const pick = styles.find((s) => s.name === 'ノーマル') || styles[0];
+      if (pick) rows.push({ id: pick.id, name: sp.name });
+    }
+    return rows;
+  } catch {
+    return [];
+  }
+};
+const playWav = (wav, vol = 1) => {
+  if (platform === 'darwin') execFileSync('afplay', ['-v', String(vol), wav], { timeout: 30000 });
+  else if (platform === 'linux') {
+    try {
+      execFileSync('aplay', ['-q', wav], { timeout: 30000 });
+    } catch {
+      execFileSync('paplay', [wav], { timeout: 30000 });
+    }
+  }
+};
+// Synthesize and play. Returns true if it spoke, false to fall back to `say`.
+export const speak = (text, speaker = 3, url = DEFAULT_URL, vol = 1, timeoutMs = 15000) => {
+  if (!text) return false;
+  let dir;
+  try {
+    dir = mkdtempSync(join(tmpdir(), 'ai-notify-vv-'));
+    const wav = join(dir, 'v.wav');
+    const sec = String(Math.max(2, Math.ceil(timeoutMs / 1000)));
+    const enc = encodeURIComponent(text); // URL-encoded -> no shell metacharacters
+    // Pipe audio_query straight into synthesis. execSync uses /bin/sh for the pipe.
+    const cmd =
+      `curl -s -m ${sec} -X POST "${url}/audio_query?speaker=${speaker}&text=${enc}" | ` +
+      `curl -s -m ${sec} -X POST -H "Content-Type: application/json" -d @- ` +
+      `"${url}/synthesis?speaker=${speaker}" -o "${wav}"`;
+    execSync(cmd, { timeout: timeoutMs + 1000, stdio: 'ignore' });
+    if (!existsSync(wav) || statSync(wav).size < 1000) {
+      logFail(`empty/short wav (speaker ${speaker}, ${text.length} chars)`);
+      return false;
+    }
+    playWav(wav, vol);
+    return true;
+  } catch (e) {
+    logFail(`error (speaker ${speaker}): ${(e && e.message) || e}`);
+    return false;
+  } finally {
+    if (dir) {
+      try {
+        rmSync(dir, { recursive: true, force: true });
+      } catch {
+        /* ignore */
+      }
+    }
+  }
+};