ai-notify 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +70 -35
- package/menubar/AiNotifyMenuBar.swift +192 -63
- package/menubar/dist/ai-notify.app/Contents/MacOS/ai-notify-menubar +0 -0
- package/package.json +1 -1
- package/src/cli.mjs +244 -4
- package/src/highlight.mjs +261 -0
- package/src/menubar.mjs +15 -1
- package/src/notify.mjs +124 -31
- package/src/state.mjs +117 -11
- package/src/util.mjs +16 -0
- package/src/voicevox.mjs +120 -0
package/src/notify.mjs
CHANGED
|
@@ -4,10 +4,15 @@
|
|
|
4
4
|
// so a Linux box without `notify-send` (or a Mac without `terminal-notifier`)
|
|
5
5
|
// never errors — it just does what it can.
|
|
6
6
|
|
|
7
|
-
import { spawn } from 'node:child_process';
|
|
8
|
-
import { existsSync } from 'node:fs';
|
|
9
|
-
import {
|
|
7
|
+
import { spawn, execFileSync } from 'node:child_process';
|
|
8
|
+
import { existsSync, rmSync } from 'node:fs';
|
|
9
|
+
import { tmpdir } from 'node:os';
|
|
10
|
+
import { join } from 'node:path';
|
|
11
|
+
import { isMuted, readConfig, readVolume, recordPane, readPaneSetting, setPaneWaiting } from './state.mjs';
|
|
12
|
+
import { controllingTty } from './util.mjs';
|
|
10
13
|
import { translate } from './translate.mjs';
|
|
14
|
+
import { highlightWaiting, clearHighlight } from './highlight.mjs';
|
|
15
|
+
import * as voicevox from './voicevox.mjs';
|
|
11
16
|
|
|
12
17
|
const platform = process.platform; // 'darwin' | 'linux' | 'win32'
|
|
13
18
|
|
|
@@ -33,13 +38,14 @@ const resolveSound = (name) => {
|
|
|
33
38
|
return name; // linux/win: treated as a freedesktop event id / ignored
|
|
34
39
|
};
|
|
35
40
|
|
|
36
|
-
const playSound = (name) => {
|
|
41
|
+
const playSound = (name, vol = 1) => {
|
|
37
42
|
const sound = resolveSound(name);
|
|
38
43
|
if (platform === 'darwin') {
|
|
39
44
|
if (sound && existsSync(sound)) {
|
|
40
45
|
// play twice, a touch louder, so it is hard to miss
|
|
41
|
-
|
|
42
|
-
run('afplay', ['-v',
|
|
46
|
+
const v = String(2 * vol);
|
|
47
|
+
run('afplay', ['-v', v, sound]);
|
|
48
|
+
run('afplay', ['-v', v, sound]);
|
|
43
49
|
}
|
|
44
50
|
} else if (platform === 'linux') {
|
|
45
51
|
if (which('paplay') && existsSync('/usr/share/sounds/freedesktop/stereo/complete.oga')) {
|
|
@@ -54,9 +60,23 @@ const playSound = (name) => {
|
|
|
54
60
|
}
|
|
55
61
|
};
|
|
56
62
|
|
|
57
|
-
|
|
63
|
+
// `say` has no per-call volume, so when a non-default volume is set we render to
|
|
64
|
+
// a file and play it through afplay at the requested level.
|
|
65
|
+
const sayWithVolume = (text, voice, vol) => {
|
|
66
|
+
try {
|
|
67
|
+
const tmp = join(tmpdir(), `ai-notify-say-${process.pid}.aiff`);
|
|
68
|
+
execFileSync('say', voice ? ['-v', voice, '-o', tmp, text] : ['-o', tmp, text], { timeout: 30000 });
|
|
69
|
+
execFileSync('afplay', ['-v', String(vol), tmp], { timeout: 30000 });
|
|
70
|
+
rmSync(tmp, { force: true });
|
|
71
|
+
} catch {
|
|
72
|
+
/* ignore */
|
|
73
|
+
}
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
const speak = (text, voice, vol = 1) => {
|
|
58
77
|
if (!text) return;
|
|
59
78
|
if (platform === 'darwin') {
|
|
79
|
+
if (vol !== 1) return sayWithVolume(text, voice, vol);
|
|
60
80
|
run('say', voice ? ['-v', voice, text] : [text]);
|
|
61
81
|
} else if (platform === 'linux') {
|
|
62
82
|
if (which('spd-say')) run('spd-say', [text]);
|
|
@@ -70,20 +90,40 @@ const speak = (text, voice) => {
|
|
|
70
90
|
}
|
|
71
91
|
};
|
|
72
92
|
|
|
73
|
-
const banner = (title, subtitle, message) => {
|
|
93
|
+
const banner = (title, subtitle, message, { activate, urgent } = {}) => {
|
|
74
94
|
if (platform === 'darwin') {
|
|
75
95
|
if (which('terminal-notifier')) {
|
|
76
|
-
|
|
96
|
+
const args = ['-title', title, '-subtitle', subtitle, '-message', message];
|
|
97
|
+
if (activate) args.push('-activate', activate); // click the notification -> focus the app
|
|
98
|
+
run('terminal-notifier', args);
|
|
77
99
|
} else {
|
|
78
100
|
const esc = (s) => String(s).replace(/"/g, '\\"');
|
|
79
|
-
run('osascript', [
|
|
101
|
+
run('osascript', [
|
|
102
|
+
'-e',
|
|
103
|
+
`display notification "${esc(message)}" with title "${esc(title)}" subtitle "${esc(subtitle)}"`,
|
|
104
|
+
]);
|
|
80
105
|
}
|
|
81
106
|
} else if (platform === 'linux') {
|
|
82
|
-
if (which('notify-send'))
|
|
107
|
+
if (which('notify-send')) {
|
|
108
|
+
const args = urgent ? ['-u', 'critical'] : [];
|
|
109
|
+
run('notify-send', [...args, `${title}: ${subtitle}`, message]);
|
|
110
|
+
}
|
|
83
111
|
}
|
|
84
112
|
// win32: skipped (no dependency-free toast); sound/voice still fire.
|
|
85
113
|
};
|
|
86
114
|
|
|
115
|
+
// A short, speakable gist of a summary: the first sentence, capped at `max`
|
|
116
|
+
// characters on a clause boundary — enough to tell which task, not a monologue.
|
|
117
|
+
const shortenForSpeech = (text, max = 40) => {
|
|
118
|
+
let s = String(text).replace(/\s+/g, ' ').trim();
|
|
119
|
+
s = (s.split(/[。.!?!?\n]/)[0] || s).trim(); // first sentence
|
|
120
|
+
if (s.length <= max) return s.replace(/[、,\s]+$/, '');
|
|
121
|
+
const cut = s.slice(0, max);
|
|
122
|
+
const ten = cut.lastIndexOf('、'); // prefer a clause boundary
|
|
123
|
+
const sep = ten > max * 0.4 ? ten : cut.lastIndexOf(' ');
|
|
124
|
+
return (sep > 0 ? cut.slice(0, sep) : cut).replace(/[、,\s]+$/, '').trim();
|
|
125
|
+
};
|
|
126
|
+
|
|
87
127
|
// Public entry. Called by the hook handler with already-parsed fields.
|
|
88
128
|
export const emit = ({ provider = 'default', event = 'done', label = '', message = '' }) => {
|
|
89
129
|
const config = readConfig();
|
|
@@ -101,33 +141,86 @@ export const emit = ({ provider = 'default', event = 'done', label = '', message
|
|
|
101
141
|
// (falling back to the template on failure).
|
|
102
142
|
// default -> speak the raw message as-is.
|
|
103
143
|
// The desktop banner always shows the full original message visually.
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
const translated = translate(message, config.translateTo);
|
|
110
|
-
speakText = translated || fromTemplate || fallback;
|
|
111
|
-
} else {
|
|
112
|
-
speakText = message;
|
|
113
|
-
}
|
|
144
|
+
// Full text for the desktop banner — the translated summary / message. Length
|
|
145
|
+
// is fine here: a banner never gets cut off and you read it at a glance.
|
|
146
|
+
let fullBody;
|
|
147
|
+
if (message) {
|
|
148
|
+
fullBody = (config.translateTo ? translate(message, config.translateTo) : message) || fromTemplate || fallback;
|
|
114
149
|
} else {
|
|
115
|
-
|
|
150
|
+
fullBody = fromTemplate || fallback;
|
|
116
151
|
}
|
|
152
|
+
// Spoken read-out — short enough not to get cut off, but enough to identify
|
|
153
|
+
// WHICH task: the window label + a short gist of what happened (the first
|
|
154
|
+
// clause of the summary). speakAgentMessage:true reads the whole thing.
|
|
155
|
+
let spokenBody;
|
|
156
|
+
if (!message) spokenBody = fromTemplate || fallback;
|
|
157
|
+
else if (config.speakAgentMessage) spokenBody = fullBody;
|
|
158
|
+
else spokenBody = shortenForSpeech(fullBody, config.speakMaxChars || 40);
|
|
159
|
+
// The task gist already tells you which pane; the label (often the working
|
|
160
|
+
// dir) is just slow filler. Prefix it only if explicitly enabled.
|
|
161
|
+
const speakText = config.speakLabel === true && label ? `${label}、${spokenBody}` : spokenBody;
|
|
162
|
+
|
|
163
|
+
// Per-pane voice: remember this pane (so the menu bar can list it) and apply
|
|
164
|
+
// any voice assigned to it. Precedence (most specific first):
|
|
165
|
+
// $AI_NOTIFY_* env — set in the pane's shell
|
|
166
|
+
// this pane's pick — assigned from the menu bar (keyed by tty)
|
|
167
|
+
// provider / global — config defaults
|
|
168
|
+
const tty = controllingTty();
|
|
169
|
+
recordPane(tty, label);
|
|
170
|
+
setPaneWaiting(tty, event === 'waiting'); // waiting -> yellow menu bar status; done clears it
|
|
171
|
+
const pane = readPaneSetting(tty);
|
|
172
|
+
const tts = pane.tts || config.tts;
|
|
173
|
+
const voice = process.env.AI_NOTIFY_VOICE || pane.voice || p.voice || config.voice;
|
|
174
|
+
const speaker = process.env.AI_NOTIFY_VOICEVOX_SPEAKER || pane.speaker || config.voicevox?.speaker;
|
|
117
175
|
|
|
118
|
-
//
|
|
119
|
-
//
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
const
|
|
176
|
+
// Volume (0–2): per-window env > this pane's slider > the global slider /
|
|
177
|
+
// `ai-notify volume` > config.
|
|
178
|
+
const envVol = parseFloat(process.env.AI_NOTIFY_VOLUME);
|
|
179
|
+
const fileVol = readVolume();
|
|
180
|
+
const vol = Number.isFinite(envVol)
|
|
181
|
+
? Math.min(2, Math.max(0, envVol))
|
|
182
|
+
: typeof pane.volume === 'number'
|
|
183
|
+
? pane.volume
|
|
184
|
+
: fileVol != null
|
|
185
|
+
? fileVol
|
|
186
|
+
: typeof config.volume === 'number'
|
|
187
|
+
? config.volume
|
|
188
|
+
: 1;
|
|
123
189
|
|
|
124
190
|
if (!muted) {
|
|
125
|
-
playSound(soundName);
|
|
126
|
-
if (config.speak
|
|
191
|
+
playSound(soundName, vol);
|
|
192
|
+
if (config.speak && vol > 0) {
|
|
193
|
+
let spoken = false;
|
|
194
|
+
if (tts === 'voicevox') {
|
|
195
|
+
spoken = voicevox.speak(speakText, speaker, config.voicevox?.url, vol);
|
|
196
|
+
}
|
|
197
|
+
if (!spoken) speak(speakText, voice, vol); // OS `say` (also the VOICEVOX fallback)
|
|
198
|
+
}
|
|
127
199
|
}
|
|
128
200
|
|
|
129
201
|
if (!muted || config.bannerWhenMuted) {
|
|
130
|
-
const
|
|
131
|
-
banner(
|
|
202
|
+
const waiting = event === 'waiting';
|
|
203
|
+
banner(
|
|
204
|
+
waiting ? `⏳ ${label || 'input'}` : `✓ ${label || 'done'}`,
|
|
205
|
+
waiting ? 'waiting for input' : '',
|
|
206
|
+
fullBody,
|
|
207
|
+
{
|
|
208
|
+
// Click the notification to bring the waiting app (e.g. the IDE) forward.
|
|
209
|
+
activate: config.notifyActivate !== false ? process.env.__CFBundleIdentifier : undefined,
|
|
210
|
+
urgent: waiting,
|
|
211
|
+
}
|
|
212
|
+
);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Visual highlight of *this* terminal window so a waiting pane stands out
|
|
216
|
+
// among many. Always best-effort, and applied even when muted (you still want
|
|
217
|
+
// to see which window needs you during a meeting).
|
|
218
|
+
if (config.highlightWaiting) {
|
|
219
|
+
try {
|
|
220
|
+
if (event === 'waiting') highlightWaiting(label, config.highlightColor);
|
|
221
|
+
else if (event === 'done') clearHighlight();
|
|
222
|
+
} catch {
|
|
223
|
+
/* visual is best-effort */
|
|
224
|
+
}
|
|
132
225
|
}
|
|
133
226
|
};
|
package/src/state.mjs
CHANGED
|
@@ -45,6 +45,90 @@ export const setMuted = (muted) => {
|
|
|
45
45
|
|
|
46
46
|
export const toggleMuted = () => setMuted(!isMuted());
|
|
47
47
|
|
|
48
|
+
// --- Volume ----------------------------------------------------------------
|
|
49
|
+
// A single number (0.0–2.0) in a state file, written by the menu bar slider or
|
|
50
|
+
// `ai-notify volume`, read at fire time — just like the mute flag.
|
|
51
|
+
|
|
52
|
+
const volumeFlagPath = () => join(stateDir(), 'volume');
|
|
53
|
+
|
|
54
|
+
export const readVolume = () => {
|
|
55
|
+
try {
|
|
56
|
+
const v = parseFloat(readFileSync(volumeFlagPath(), 'utf8'));
|
|
57
|
+
return Number.isFinite(v) ? Math.min(2, Math.max(0, v)) : null;
|
|
58
|
+
} catch {
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
export const setVolume = (v) => {
|
|
64
|
+
const n = Math.min(2, Math.max(0, Number(v)));
|
|
65
|
+
ensureDir(stateDir());
|
|
66
|
+
writeFileSync(volumeFlagPath(), String(n));
|
|
67
|
+
return n;
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
// --- Per-pane state --------------------------------------------------------
|
|
71
|
+
// Recently-active terminal panes (so the menu bar can offer per-pane voices),
|
|
72
|
+
// and a per-tty voice override. Both are small JSON files in the state dir.
|
|
73
|
+
|
|
74
|
+
const readJson = (p, fallback) => {
|
|
75
|
+
try {
|
|
76
|
+
return JSON.parse(readFileSync(p, 'utf8'));
|
|
77
|
+
} catch {
|
|
78
|
+
return fallback;
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
const writeJson = (p, obj) => {
|
|
82
|
+
ensureDir(stateDir());
|
|
83
|
+
writeFileSync(p, JSON.stringify(obj));
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
const panesPath = () => join(stateDir(), 'panes.json');
|
|
87
|
+
const paneVoicesPath = () => join(stateDir(), 'pane-voices.json');
|
|
88
|
+
const waitingPath = () => join(stateDir(), 'waiting.json');
|
|
89
|
+
|
|
90
|
+
// Track which panes are waiting for input, so the menu bar icon can show a
|
|
91
|
+
// status color (yellow) when any agent needs you.
|
|
92
|
+
export const setPaneWaiting = (tty, waiting) => {
|
|
93
|
+
if (!tty) return;
|
|
94
|
+
const all = readJson(waitingPath(), {});
|
|
95
|
+
if (waiting) all[tty] = Date.now();
|
|
96
|
+
else delete all[tty];
|
|
97
|
+
writeJson(waitingPath(), all);
|
|
98
|
+
};
|
|
99
|
+
export const anyWaiting = () => Object.keys(readJson(waitingPath(), {})).length > 0;
|
|
100
|
+
|
|
101
|
+
// Record this pane as active (keyed by tty). Keeps the 16 most-recent.
|
|
102
|
+
export const recordPane = (tty, label) => {
|
|
103
|
+
if (!tty) return;
|
|
104
|
+
const all = readJson(panesPath(), {});
|
|
105
|
+
all[tty] = { label: label || '', ts: Date.now() };
|
|
106
|
+
const trimmed = Object.entries(all)
|
|
107
|
+
.sort((a, b) => b[1].ts - a[1].ts)
|
|
108
|
+
.slice(0, 16);
|
|
109
|
+
writeJson(panesPath(), Object.fromEntries(trimmed));
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
export const readPanes = () =>
|
|
113
|
+
Object.entries(readJson(panesPath(), {}))
|
|
114
|
+
.map(([tty, v]) => ({ tty, label: v.label || '', ts: v.ts || 0 }))
|
|
115
|
+
.sort((a, b) => b.ts - a.ts);
|
|
116
|
+
|
|
117
|
+
// Per-pane settings: { tts, speaker, voice, volume }. Any subset may be set.
|
|
118
|
+
export const readPaneSetting = (tty) => (tty ? readJson(paneVoicesPath(), {})[tty] || {} : {});
|
|
119
|
+
|
|
120
|
+
// Merge `patch` into the pane's settings; keys set to null are removed; an empty
|
|
121
|
+
// entry is deleted entirely.
|
|
122
|
+
export const updatePaneSetting = (tty, patch) => {
|
|
123
|
+
if (!tty) return;
|
|
124
|
+
const all = readJson(paneVoicesPath(), {});
|
|
125
|
+
const next = { ...(all[tty] || {}), ...patch };
|
|
126
|
+
for (const k of Object.keys(next)) if (next[k] == null) delete next[k];
|
|
127
|
+
if (Object.keys(next).length === 0) delete all[tty];
|
|
128
|
+
else all[tty] = next;
|
|
129
|
+
writeJson(paneVoicesPath(), all);
|
|
130
|
+
};
|
|
131
|
+
|
|
48
132
|
// --- Config ----------------------------------------------------------------
|
|
49
133
|
|
|
50
134
|
// Sounds default to OS built-ins so we ship no audio assets (clean repo, no
|
|
@@ -55,10 +139,27 @@ export const DEFAULT_CONFIG = {
|
|
|
55
139
|
bannerWhenMuted: true,
|
|
56
140
|
// Spoken read-out of which terminal finished (helps tell tabs apart).
|
|
57
141
|
speak: true,
|
|
58
|
-
//
|
|
59
|
-
//
|
|
60
|
-
//
|
|
61
|
-
|
|
142
|
+
// Output volume 0.0–2.0 (1.0 = normal). The menu bar slider / `ai-notify
|
|
143
|
+
// volume` write a state file that overrides this; $AI_NOTIFY_VOLUME overrides
|
|
144
|
+
// per window. Applies to sounds, the spoken voice, and VOICEVOX.
|
|
145
|
+
volume: 1.0,
|
|
146
|
+
// Prefix the window label to the SPOKEN read-out. Off by default — the task
|
|
147
|
+
// gist already identifies the pane, and the label (often the working dir) just
|
|
148
|
+
// adds slow filler. Turn on if you set a short $AI_NOTIFY_LABEL per window.
|
|
149
|
+
// (The desktop banner is always titled with the label regardless.)
|
|
150
|
+
speakLabel: false,
|
|
151
|
+
// Visually highlight the waiting terminal window/pane (best-effort, by tty).
|
|
152
|
+
// Off by default; the color is yellow / orange / red / green / #RRGGBB.
|
|
153
|
+
highlightWaiting: false,
|
|
154
|
+
highlightColor: 'yellow',
|
|
155
|
+
// Make the desktop notification click bring the terminal/IDE forward.
|
|
156
|
+
notifyActivate: true,
|
|
157
|
+
// Speak the agent's full message aloud (Codex's reply, a Claude prompt, the
|
|
158
|
+
// done-summary)? Default false = read only a short gist (first clause, capped
|
|
159
|
+
// at speakMaxChars) — enough to tell which task, never cut off. The full text
|
|
160
|
+
// still shows in the desktop banner. Set true to read the whole thing.
|
|
161
|
+
speakAgentMessage: false,
|
|
162
|
+
speakMaxChars: 40,
|
|
62
163
|
// Optional: translate the agent's message into this language before speaking
|
|
63
164
|
// it (e.g. 'ja'). Empty = off. Key-less, no cost; makes a network request.
|
|
64
165
|
// Toggle with `ai-notify translate on ja` / `off`.
|
|
@@ -70,12 +171,17 @@ export const DEFAULT_CONFIG = {
|
|
|
70
171
|
// 'Kyoko'). Empty = OS default voice. Switch it with `ai-notify voice`. A
|
|
71
172
|
// per-provider `voice` below, if set, overrides this for that agent.
|
|
72
173
|
voice: '',
|
|
73
|
-
//
|
|
74
|
-
//
|
|
75
|
-
//
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
174
|
+
// TTS backend: 'say' (OS voice) or 'voicevox' (local VOICEVOX engine — speak
|
|
175
|
+
// in character voices). Falls back to 'say' if the engine isn't running.
|
|
176
|
+
// Per window: $AI_NOTIFY_VOICEVOX_SPEAKER overrides the speaker id.
|
|
177
|
+
tts: 'say',
|
|
178
|
+
voicevox: { url: 'http://127.0.0.1:50021', speaker: 3 },
|
|
179
|
+
// Spoken read-out templates for agent events. The window label is added
|
|
180
|
+
// separately (speakLabel), so leave {label} out here to avoid doubling it.
|
|
181
|
+
// Override per language (e.g. Japanese) in config.json. An agent that supplies
|
|
182
|
+
// its own message (Codex's last reply, a Claude prompt) wins over these.
|
|
183
|
+
doneMessage: 'finished',
|
|
184
|
+
waitingMessage: 'is waiting for input',
|
|
79
185
|
providers: {
|
|
80
186
|
claude: { sound: { waiting: 'Glass', done: 'Hero' }, voice: '' },
|
|
81
187
|
codex: { sound: { done: 'Submarine' }, voice: '' },
|
|
@@ -99,4 +205,4 @@ export const writeConfig = (config) => {
|
|
|
99
205
|
return configPath();
|
|
100
206
|
};
|
|
101
207
|
|
|
102
|
-
export const paths = { muteFlagPath, configPath, stateDir, configDir };
|
|
208
|
+
export const paths = { muteFlagPath, configPath, stateDir, configDir, volumeFlagPath };
|
package/src/util.mjs
CHANGED
|
@@ -39,3 +39,19 @@ export const cliInvocation = () => ({
|
|
|
39
39
|
export const isEphemeralInstall = (cliPath) => /[/\\]_npx[/\\]/.test(cliPath);
|
|
40
40
|
|
|
41
41
|
export const MARKER = 'ai-notify'; // substring used to detect our own wiring
|
|
42
|
+
|
|
43
|
+
// The controlling terminal of this process (e.g. "/dev/ttys010"), which is
|
|
44
|
+
// stable per terminal pane — used to scope per-pane settings. null if none.
|
|
45
|
+
export const controllingTty = () => {
|
|
46
|
+
try {
|
|
47
|
+
const t = execFileSync('ps', ['-o', 'tty=', '-p', String(process.pid)], {
|
|
48
|
+
stdio: ['ignore', 'pipe', 'ignore'],
|
|
49
|
+
})
|
|
50
|
+
.toString()
|
|
51
|
+
.trim();
|
|
52
|
+
if (!t || t === '??' || t === '?') return null;
|
|
53
|
+
return t.startsWith('/dev/') ? t : `/dev/${t}`;
|
|
54
|
+
} catch {
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
};
|
package/src/voicevox.mjs
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
// VOICEVOX read-out: synthesize the spoken notification with a local VOICEVOX
|
|
2
|
+
// engine (free, offline, no API key) so each terminal can speak in a distinct
|
|
3
|
+
// character voice (ずんだもん, 四国めたん, …).
|
|
4
|
+
//
|
|
5
|
+
// The engine exposes an HTTP API on 127.0.0.1:50021. We use `curl` (zero deps):
|
|
6
|
+
// POST /audio_query?speaker=ID&text=... -> query JSON
|
|
7
|
+
// POST /synthesis?speaker=ID (query body) -> WAV
|
|
8
|
+
// then play the WAV. Everything is best-effort: if the engine isn't running we
|
|
9
|
+
// return false and the caller falls back to the OS `say` voice.
|
|
10
|
+
|
|
11
|
+
import { execSync, execFileSync } from 'node:child_process';
|
|
12
|
+
import { existsSync, statSync, mkdtempSync, rmSync, appendFileSync } from 'node:fs';
|
|
13
|
+
import { join } from 'node:path';
|
|
14
|
+
import { tmpdir } from 'node:os';
|
|
15
|
+
import { stateDir } from './state.mjs';
|
|
16
|
+
|
|
17
|
+
export const DEFAULT_URL = 'http://127.0.0.1:50021';
|
|
18
|
+
|
|
19
|
+
const platform = process.platform;
|
|
20
|
+
|
|
21
|
+
// Record why a synthesis fell back to the OS voice, so intermittent fallbacks
|
|
22
|
+
// are diagnosable instead of silent. Best-effort.
|
|
23
|
+
const logFail = (reason) => {
|
|
24
|
+
try {
|
|
25
|
+
appendFileSync(join(stateDir(), 'voicevox.log'), `${new Date().toISOString()} ${reason}\n`);
|
|
26
|
+
} catch {
|
|
27
|
+
/* ignore */
|
|
28
|
+
}
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export const isAvailable = (url = DEFAULT_URL, timeoutMs = 1500) => {
|
|
32
|
+
try {
|
|
33
|
+
const out = execFileSync('curl', ['-s', '-m', String(Math.ceil(timeoutMs / 1000)), `${url}/version`], {
|
|
34
|
+
encoding: 'utf8',
|
|
35
|
+
timeout: timeoutMs + 500,
|
|
36
|
+
});
|
|
37
|
+
return out.trim().length > 0;
|
|
38
|
+
} catch {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
// Flatten /speakers into [{ id, name }] (character + style).
|
|
44
|
+
export const listSpeakers = (url = DEFAULT_URL) => {
|
|
45
|
+
try {
|
|
46
|
+
const out = execFileSync('curl', ['-s', '-m', '4', `${url}/speakers`], { encoding: 'utf8', timeout: 5000 });
|
|
47
|
+
const data = JSON.parse(out);
|
|
48
|
+
const rows = [];
|
|
49
|
+
for (const sp of data) {
|
|
50
|
+
for (const st of sp.styles || []) rows.push({ id: st.id, name: `${sp.name}(${st.name})` });
|
|
51
|
+
}
|
|
52
|
+
return rows;
|
|
53
|
+
} catch {
|
|
54
|
+
return [];
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
// One entry per character (preferring the ノーマル style) — a short, pickable
|
|
59
|
+
// list for the menu bar, vs the full style list from listSpeakers.
|
|
60
|
+
export const listCharacters = (url = DEFAULT_URL) => {
|
|
61
|
+
try {
|
|
62
|
+
const out = execFileSync('curl', ['-s', '-m', '4', `${url}/speakers`], { encoding: 'utf8', timeout: 5000 });
|
|
63
|
+
const data = JSON.parse(out);
|
|
64
|
+
const rows = [];
|
|
65
|
+
for (const sp of data) {
|
|
66
|
+
const styles = sp.styles || [];
|
|
67
|
+
const pick = styles.find((s) => s.name === 'ノーマル') || styles[0];
|
|
68
|
+
if (pick) rows.push({ id: pick.id, name: sp.name });
|
|
69
|
+
}
|
|
70
|
+
return rows;
|
|
71
|
+
} catch {
|
|
72
|
+
return [];
|
|
73
|
+
}
|
|
74
|
+
};
|
|
75
|
+
|
|
76
|
+
const playWav = (wav, vol = 1) => {
|
|
77
|
+
if (platform === 'darwin') execFileSync('afplay', ['-v', String(vol), wav], { timeout: 30000 });
|
|
78
|
+
else if (platform === 'linux') {
|
|
79
|
+
try {
|
|
80
|
+
execFileSync('aplay', ['-q', wav], { timeout: 30000 });
|
|
81
|
+
} catch {
|
|
82
|
+
execFileSync('paplay', [wav], { timeout: 30000 });
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
// Synthesize and play. Returns true if it spoke, false to fall back to `say`.
|
|
88
|
+
export const speak = (text, speaker = 3, url = DEFAULT_URL, vol = 1, timeoutMs = 15000) => {
|
|
89
|
+
if (!text) return false;
|
|
90
|
+
let dir;
|
|
91
|
+
try {
|
|
92
|
+
dir = mkdtempSync(join(tmpdir(), 'ai-notify-vv-'));
|
|
93
|
+
const wav = join(dir, 'v.wav');
|
|
94
|
+
const sec = String(Math.max(2, Math.ceil(timeoutMs / 1000)));
|
|
95
|
+
const enc = encodeURIComponent(text); // URL-encoded -> no shell metacharacters
|
|
96
|
+
// Pipe audio_query straight into synthesis. execSync uses /bin/sh for the pipe.
|
|
97
|
+
const cmd =
|
|
98
|
+
`curl -s -m ${sec} -X POST "${url}/audio_query?speaker=${speaker}&text=${enc}" | ` +
|
|
99
|
+
`curl -s -m ${sec} -X POST -H "Content-Type: application/json" -d @- ` +
|
|
100
|
+
`"${url}/synthesis?speaker=${speaker}" -o "${wav}"`;
|
|
101
|
+
execSync(cmd, { timeout: timeoutMs + 1000, stdio: 'ignore' });
|
|
102
|
+
if (!existsSync(wav) || statSync(wav).size < 1000) {
|
|
103
|
+
logFail(`empty/short wav (speaker ${speaker}, ${text.length} chars)`);
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
playWav(wav, vol);
|
|
107
|
+
return true;
|
|
108
|
+
} catch (e) {
|
|
109
|
+
logFail(`error (speaker ${speaker}): ${(e && e.message) || e}`);
|
|
110
|
+
return false;
|
|
111
|
+
} finally {
|
|
112
|
+
if (dir) {
|
|
113
|
+
try {
|
|
114
|
+
rmSync(dir, { recursive: true, force: true });
|
|
115
|
+
} catch {
|
|
116
|
+
/* ignore */
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
};
|