mcp-voice-hooks 1.0.8 → 1.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/hooks/pre-speak-hook.sh +1 -1
- package/.claude/hooks/pre-tool-hook.sh +1 -1
- package/.claude/hooks/pre-wait-hook.sh +1 -1
- package/.claude/hooks/stop-hook.sh +1 -1
- package/CLAUDE.local.md +3 -10
- package/README.md +48 -71
- package/dist/unified-server.js +147 -95
- package/dist/unified-server.js.map +1 -1
- package/package.json +1 -1
- package/public/app.js +451 -45
- package/public/index.html +255 -61
@@ -1,3 +1,3 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
PORT="${MCP_VOICE_HOOKS_PORT:-5111}"
|
3
|
-
curl -s -X POST http://localhost:${PORT}/api/hooks/pre-speak || echo '{"decision": "approve"}'
|
3
|
+
curl -s -X POST http://localhost:${PORT}/api/hooks/pre-speak || echo '{"decision": "approve", "reason": "voice-hooks unavailable"}'
|
@@ -1,3 +1,3 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
PORT="${MCP_VOICE_HOOKS_PORT:-5111}"
|
3
|
-
curl -s -X POST http://localhost:${PORT}/api/hooks/pre-tool || echo '{"decision": "approve"}'
|
3
|
+
curl -s -X POST http://localhost:${PORT}/api/hooks/pre-tool || echo '{"decision": "approve", "reason": "voice-hooks unavailable"}'
|
@@ -1,3 +1,3 @@
|
|
1
1
|
#!/bin/bash
|
2
2
|
PORT="${MCP_VOICE_HOOKS_PORT:-5111}"
|
3
|
-
curl -s -X POST http://localhost:${PORT}/api/hooks/pre-wait || echo '{"decision": "approve"}'
|
3
|
+
curl -s -X POST http://localhost:${PORT}/api/hooks/pre-wait || echo '{"decision": "approve", "reason": "voice-hooks unavailable"}'
|
package/CLAUDE.local.md
CHANGED
@@ -4,17 +4,10 @@
|
|
4
4
|
# 1. Build the project first
|
5
5
|
npm run build
|
6
6
|
|
7
|
-
# 2.
|
8
|
-
|
7
|
+
# 2. Bump version (patch, minor, or major) - creates a commit and tag
|
8
|
+
HUSKY=0 npm version patch --registry https://registry.npmjs.org/
|
9
9
|
|
10
|
-
# 3.
|
11
|
-
npm version patch --registry https://registry.npmjs.org/
|
12
|
-
|
13
|
-
# Alternative: Bump version without creating a commit/tag
|
14
|
-
# npm version patch --no-git-tag-version
|
15
|
-
# Then manually commit the changes
|
16
|
-
|
17
|
-
# 4. Publish to npm (this creates the .tgz file automatically)
|
10
|
+
# 3. Publish to npm (this creates the .tgz file automatically)
|
18
11
|
npm publish --registry https://registry.npmjs.org/
|
19
12
|
|
20
13
|
# Note: It can take 1-5 minutes for the package to be available globally
|
package/README.md
CHANGED
@@ -2,6 +2,10 @@
|
|
2
2
|
|
3
3
|
Real-time voice interaction for Claude Code. Speak naturally while Claude works - interrupt, redirect, or provide continuous feedback without stopping.
|
4
4
|
|
5
|
+
Optionally enable text-to-speech to have Claude speak back to you.
|
6
|
+
|
7
|
+
Mac only for now.
|
8
|
+
|
5
9
|
## Demo
|
6
10
|
|
7
11
|
[](https://youtu.be/KpkxvJ65gbM)
|
@@ -15,11 +19,11 @@ mcp-voice-hooks enables continuous voice conversations with AI assistants by:
|
|
15
19
|
- Using hooks to ensure Claude checks for voice input before tool use and before stopping
|
16
20
|
- Allowing natural interruptions like "No, stop that" or "Wait, try something else"
|
17
21
|
|
18
|
-
##
|
22
|
+
## Browser Compatibility
|
19
23
|
|
20
|
-
-
|
21
|
-
-
|
22
|
-
-
|
24
|
+
- ✅ **Chrome**: Full support for speech recognition and text-to-speech
|
25
|
+
- ✅ **Safari**: Full support for speech recognition and text-to-speech
|
26
|
+
- ❌ **Edge**: Speech recognition not working on Apple Silicon (language-not-supported error)
|
23
27
|
|
24
28
|
## Installation in Your Own Project
|
25
29
|
|
@@ -52,21 +56,41 @@ mcp-voice-hooks enables continuous voice conversations with AI assistants by:
|
|
52
56
|
claude
|
53
57
|
```
|
54
58
|
|
55
|
-
|
59
|
+
**Important**: After the first-time installation, you will need to restart Claude for the hooks to take effect. This is because the hooks are automatically installed when the MCP server starts for the first time.
|
56
60
|
|
57
|
-
|
61
|
+
3. **Open the voice interface** at <http://localhost:5111> and start speaking!
|
58
62
|
|
59
|
-
|
63
|
+
You need to send one text message to Claude to trigger the voice hooks.
|
60
64
|
|
61
|
-
|
65
|
+
## Voice responses
|
62
66
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
67
|
+
There are two options for voice responses:
|
68
|
+
|
69
|
+
1. Browser Text-to-Speech (Cloud)
|
70
|
+
2. Browser Text-to-Speech (Local)
|
71
|
+
3. Mac System Voice
|
72
|
+
|
73
|
+
### Selecting and downloading high quality System Voices (Mac only)
|
74
|
+
|
75
|
+
When "Mac System Voice" is selected, the system uses macOS's built-in `say` command.
|
76
|
+
|
77
|
+
Configure the system voice in `System Settings > Accessibility > Spoken Content > System Voice`
|
78
|
+
|
79
|
+
I recommend using a Siri voice, as they are much higher quality.
|
80
|
+
|
81
|
+
Click the info icon next to the system voice dropdown. Search for "Siri" to find the highest quality voices. You'll have to trigger a download of the voice.
|
82
|
+
|
83
|
+
It may take a while to download.
|
84
|
+
|
85
|
+
Once it's downloaded, you can select it in the system voice dropdown.
|
86
|
+
|
87
|
+
Test it with the bash command:
|
88
|
+
|
89
|
+
```bash
|
90
|
+
say "Hi, this is your mac system voice"
|
91
|
+
```
|
92
|
+
|
93
|
+
You can also download other high quality voices in the same way. Other voices will show up in the browser voice dropdown, but for Siri voices you need to set the system voice and select Mac System Voice in the browser voice dropdown.
|
70
94
|
|
71
95
|
## Manual Hook Installation
|
72
96
|
|
@@ -100,6 +124,10 @@ This will:
|
|
100
124
|
- Clean up voice hooks from your project's `.claude/settings.json`
|
101
125
|
- Preserve any custom hooks you've added
|
102
126
|
|
127
|
+
## Known Limitations
|
128
|
+
|
129
|
+
- **Intermittent Stop Hook Execution**: Claude Code's Stop hooks are not triggered consistently. Sometimes the assistant can end responses without the Stop hook being executed. I believe this is an issue with Claude Code's hook system, not with mcp-voice-hooks. When working correctly, the Stop hook should prevent the assistant from stopping without first checking for voice input.
|
130
|
+
|
103
131
|
## Development Mode
|
104
132
|
|
105
133
|
If you're developing mcp-voice-hooks itself:
|
@@ -152,65 +180,14 @@ and then configure claude to use the mcp proxy like so:
|
|
152
180
|
}
|
153
181
|
```
|
154
182
|
|
155
|
-
|
183
|
+
### Port Configuration
|
156
184
|
|
157
|
-
|
185
|
+
The default port is 5111. To use a different port, add to your project's `.claude/settings.json`:
|
158
186
|
|
159
|
-
```json
|
160
|
-
{
|
187
|
+
```json
|
161
188
|
{
|
162
|
-
"hooks": {
|
163
|
-
"PostToolUse": [
|
164
|
-
{
|
165
|
-
"matcher": "^mcp__voice-hooks__",
|
166
|
-
"hooks": [
|
167
|
-
{
|
168
|
-
"type": "command",
|
169
|
-
"command": "./.claude/hooks/post-tool-voice-hook.sh"
|
170
|
-
}
|
171
|
-
]
|
172
|
-
}
|
173
|
-
]
|
174
|
-
},
|
175
189
|
"env": {
|
176
|
-
"
|
190
|
+
"MCP_VOICE_HOOKS_PORT": "8080"
|
177
191
|
}
|
178
192
|
}
|
179
|
-
|
180
|
-
```
|
181
|
-
|
182
|
-
### Configuration
|
183
|
-
|
184
|
-
Voice responses are disabled by default. To enable them:
|
185
|
-
|
186
|
-
Add to your Claude Code settings JSON:
|
187
|
-
|
188
|
-
```json
|
189
|
-
{
|
190
|
-
"env": {
|
191
|
-
"VOICE_RESPONSES_ENABLED": "true"
|
192
|
-
}
|
193
|
-
}
|
194
|
-
```
|
195
|
-
|
196
|
-
To disable voice responses, set the value to `false` or remove the setting entirely.
|
197
|
-
|
198
|
-
### High quality voice responses
|
199
|
-
|
200
|
-
These voice responses are spoken by your Mac's system voice.
|
201
|
-
|
202
|
-
Configure in `System Settings > Accessibility > Spoken Content > System Voice`
|
203
|
-
|
204
|
-
I recommend using a Siri voice, as they are much higher quality.
|
205
|
-
|
206
|
-
Click the info icon next to the system voice dropdown. Search for "Siri" to find the highest quality voices. You'll have to trigger a download of the voice.
|
207
|
-
|
208
|
-
It may take a while to download.
|
209
|
-
|
210
|
-
Once it's downloaded, you can select it in the system voice dropdown.
|
211
|
-
|
212
|
-
Test it with the bash command:
|
213
|
-
|
214
|
-
```bash
|
215
|
-
say "Hi, this is your mac system voice"
|
216
|
-
```
|
193
|
+
```
|
package/dist/unified-server.js
CHANGED
@@ -19,9 +19,7 @@ import {
|
|
19
19
|
} from "@modelcontextprotocol/sdk/types.js";
|
20
20
|
var __filename = fileURLToPath(import.meta.url);
|
21
21
|
var __dirname = path.dirname(__filename);
|
22
|
-
var
|
23
|
-
var MIN_WAIT_TIMEOUT_SECONDS = 30;
|
24
|
-
var MAX_WAIT_TIMEOUT_SECONDS = 60;
|
22
|
+
var WAIT_TIMEOUT_SECONDS = 60;
|
25
23
|
var execAsync = promisify(exec);
|
26
24
|
async function playNotificationSound() {
|
27
25
|
try {
|
@@ -62,9 +60,10 @@ var UtteranceQueue = class {
|
|
62
60
|
};
|
63
61
|
var IS_MCP_MANAGED = process.argv.includes("--mcp-managed");
|
64
62
|
var queue = new UtteranceQueue();
|
65
|
-
var
|
66
|
-
|
67
|
-
|
63
|
+
var voicePreferences = {
|
64
|
+
voiceResponsesEnabled: false,
|
65
|
+
voiceInputActive: false
|
66
|
+
};
|
68
67
|
var app = express();
|
69
68
|
app.use(cors());
|
70
69
|
app.use(express.json());
|
@@ -99,7 +98,7 @@ app.get("/api/utterances", (req, res) => {
|
|
99
98
|
}))
|
100
99
|
});
|
101
100
|
});
|
102
|
-
app.get("/api/utterances/status", (
|
101
|
+
app.get("/api/utterances/status", (_req, res) => {
|
103
102
|
const total = queue.utterances.length;
|
104
103
|
const pending = queue.utterances.filter((u) => u.status === "pending").length;
|
105
104
|
const delivered = queue.utterances.filter((u) => u.status === "delivered").length;
|
@@ -110,6 +109,13 @@ app.get("/api/utterances/status", (req, res) => {
|
|
110
109
|
});
|
111
110
|
});
|
112
111
|
app.post("/api/dequeue-utterances", (req, res) => {
|
112
|
+
if (!voicePreferences.voiceInputActive) {
|
113
|
+
res.status(400).json({
|
114
|
+
success: false,
|
115
|
+
error: "Voice input is not active. Cannot dequeue utterances when voice input is disabled."
|
116
|
+
});
|
117
|
+
return;
|
118
|
+
}
|
113
119
|
const { limit = 10 } = req.body;
|
114
120
|
const pendingUtterances = queue.utterances.filter((u) => u.status === "pending").sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime()).slice(0, limit);
|
115
121
|
pendingUtterances.forEach((u) => {
|
@@ -124,36 +130,23 @@ app.post("/api/dequeue-utterances", (req, res) => {
|
|
124
130
|
});
|
125
131
|
});
|
126
132
|
app.post("/api/wait-for-utterances", async (req, res) => {
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
133
|
+
if (!voicePreferences.voiceInputActive) {
|
134
|
+
res.status(400).json({
|
135
|
+
success: false,
|
136
|
+
error: "Voice input is not active. Cannot wait for utterances when voice input is disabled."
|
137
|
+
});
|
138
|
+
return;
|
139
|
+
}
|
140
|
+
const secondsToWait = WAIT_TIMEOUT_SECONDS;
|
132
141
|
const maxWaitMs = secondsToWait * 1e3;
|
133
142
|
const startTime = Date.now();
|
134
143
|
debugLog(`[Server] Starting wait_for_utterance (${secondsToWait}s)`);
|
135
|
-
if (lastTimeoutTimestamp) {
|
136
|
-
const hasNewUtterances = queue.utterances.some(
|
137
|
-
(u) => u.timestamp > lastTimeoutTimestamp
|
138
|
-
);
|
139
|
-
if (!hasNewUtterances) {
|
140
|
-
debugLog("[Server] No new utterances since last timeout, returning immediately");
|
141
|
-
res.json({
|
142
|
-
success: true,
|
143
|
-
utterances: [],
|
144
|
-
message: `No utterances found after waiting ${secondsToWait} seconds.`,
|
145
|
-
waitTime: 0
|
146
|
-
});
|
147
|
-
return;
|
148
|
-
}
|
149
|
-
}
|
150
144
|
let firstTime = true;
|
151
145
|
while (Date.now() - startTime < maxWaitMs) {
|
152
146
|
const pendingUtterances = queue.utterances.filter(
|
153
|
-
(u) => u.status === "pending"
|
147
|
+
(u) => u.status === "pending"
|
154
148
|
);
|
155
149
|
if (pendingUtterances.length > 0) {
|
156
|
-
lastTimeoutTimestamp = null;
|
157
150
|
const sortedUtterances = pendingUtterances.sort((a, b) => a.timestamp.getTime() - b.timestamp.getTime());
|
158
151
|
sortedUtterances.forEach((u) => {
|
159
152
|
queue.markDelivered(u.id);
|
@@ -178,7 +171,6 @@ app.post("/api/wait-for-utterances", async (req, res) => {
|
|
178
171
|
}
|
179
172
|
await new Promise((resolve) => setTimeout(resolve, 100));
|
180
173
|
}
|
181
|
-
lastTimeoutTimestamp = /* @__PURE__ */ new Date();
|
182
174
|
res.json({
|
183
175
|
success: true,
|
184
176
|
utterances: [],
|
@@ -186,11 +178,7 @@ app.post("/api/wait-for-utterances", async (req, res) => {
|
|
186
178
|
waitTime: maxWaitMs
|
187
179
|
});
|
188
180
|
});
|
189
|
-
app.get("/api/
|
190
|
-
const shouldWait = !lastTimeoutTimestamp || queue.utterances.some((u) => u.timestamp > lastTimeoutTimestamp);
|
191
|
-
res.json({ shouldWait });
|
192
|
-
});
|
193
|
-
app.get("/api/has-pending-utterances", (req, res) => {
|
181
|
+
app.get("/api/has-pending-utterances", (_req, res) => {
|
194
182
|
const pendingCount = queue.utterances.filter((u) => u.status === "pending").length;
|
195
183
|
const hasPending = pendingCount > 0;
|
196
184
|
res.json({
|
@@ -200,19 +188,21 @@ app.get("/api/has-pending-utterances", (req, res) => {
|
|
200
188
|
});
|
201
189
|
app.post("/api/validate-action", (req, res) => {
|
202
190
|
const { action } = req.body;
|
203
|
-
const voiceResponsesEnabled =
|
191
|
+
const voiceResponsesEnabled = voicePreferences.voiceResponsesEnabled;
|
204
192
|
if (!action || !["tool-use", "stop"].includes(action)) {
|
205
193
|
res.status(400).json({ error: 'Invalid action. Must be "tool-use" or "stop"' });
|
206
194
|
return;
|
207
195
|
}
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
196
|
+
if (voicePreferences.voiceInputActive) {
|
197
|
+
const pendingUtterances = queue.utterances.filter((u) => u.status === "pending");
|
198
|
+
if (pendingUtterances.length > 0) {
|
199
|
+
res.json({
|
200
|
+
allowed: false,
|
201
|
+
requiredAction: "dequeue_utterances",
|
202
|
+
reason: `${pendingUtterances.length} pending utterance(s) must be dequeued first. Please use dequeue_utterances to process them.`
|
203
|
+
});
|
204
|
+
return;
|
205
|
+
}
|
216
206
|
}
|
217
207
|
if (voiceResponsesEnabled) {
|
218
208
|
const deliveredUtterances = queue.utterances.filter((u) => u.status === "delivered");
|
@@ -225,9 +215,8 @@ app.post("/api/validate-action", (req, res) => {
|
|
225
215
|
return;
|
226
216
|
}
|
227
217
|
}
|
228
|
-
if (action === "stop") {
|
229
|
-
|
230
|
-
if (shouldWait) {
|
218
|
+
if (action === "stop" && voicePreferences.voiceInputActive) {
|
219
|
+
if (queue.utterances.length > 0) {
|
231
220
|
res.json({
|
232
221
|
allowed: false,
|
233
222
|
requiredAction: "wait_for_utterance",
|
@@ -241,13 +230,16 @@ app.post("/api/validate-action", (req, res) => {
|
|
241
230
|
});
|
242
231
|
});
|
243
232
|
function handleHookRequest(attemptedAction) {
|
244
|
-
const voiceResponsesEnabled =
|
245
|
-
const
|
246
|
-
if (
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
233
|
+
const voiceResponsesEnabled = voicePreferences.voiceResponsesEnabled;
|
234
|
+
const voiceInputActive = voicePreferences.voiceInputActive;
|
235
|
+
if (voiceInputActive) {
|
236
|
+
const pendingUtterances = queue.utterances.filter((u) => u.status === "pending");
|
237
|
+
if (pendingUtterances.length > 0) {
|
238
|
+
return {
|
239
|
+
decision: "block",
|
240
|
+
reason: `${pendingUtterances.length} pending utterance(s) must be dequeued first. Please use dequeue_utterances to process them.`
|
241
|
+
};
|
242
|
+
}
|
251
243
|
}
|
252
244
|
if (voiceResponsesEnabled) {
|
253
245
|
const deliveredUtterances = queue.utterances.filter((u) => u.status === "delivered");
|
@@ -262,33 +254,19 @@ function handleHookRequest(attemptedAction) {
|
|
262
254
|
}
|
263
255
|
}
|
264
256
|
if (attemptedAction === "tool") {
|
265
|
-
lastToolUseTimestamp = /* @__PURE__ */ new Date();
|
266
257
|
return { decision: "approve" };
|
267
258
|
}
|
268
259
|
if (attemptedAction === "wait") {
|
269
|
-
if (voiceResponsesEnabled && lastToolUseTimestamp && (!lastSpeakTimestamp || lastSpeakTimestamp < lastToolUseTimestamp)) {
|
270
|
-
return {
|
271
|
-
decision: "block",
|
272
|
-
reason: "Assistant must speak after using tools. Please use the speak tool to respond before waiting for utterances."
|
273
|
-
};
|
274
|
-
}
|
275
260
|
return { decision: "approve" };
|
276
261
|
}
|
277
262
|
if (attemptedAction === "speak") {
|
278
263
|
return { decision: "approve" };
|
279
264
|
}
|
280
265
|
if (attemptedAction === "stop") {
|
281
|
-
if (
|
282
|
-
return {
|
283
|
-
decision: "block",
|
284
|
-
reason: "Assistant must speak after using tools. Please use the speak tool to respond before proceeding."
|
285
|
-
};
|
286
|
-
}
|
287
|
-
const shouldWait = !lastTimeoutTimestamp || queue.utterances.some((u) => u.timestamp > lastTimeoutTimestamp);
|
288
|
-
if (shouldWait) {
|
266
|
+
if (voiceInputActive) {
|
289
267
|
return {
|
290
268
|
decision: "block",
|
291
|
-
reason: "Assistant tried to end its response. Stopping is not allowed without first checking for voice input. Assistant should now use wait_for_utterance to check for voice input"
|
269
|
+
reason: "Assistant tried to end its response, but voice input is active. Stopping is not allowed without first checking for voice input. Assistant should now use wait_for_utterance to check for voice input"
|
292
270
|
};
|
293
271
|
}
|
294
272
|
return {
|
@@ -314,7 +292,7 @@ app.post("/api/hooks/pre-wait", (_req, res) => {
|
|
314
292
|
const result = handleHookRequest("wait");
|
315
293
|
res.json(result);
|
316
294
|
});
|
317
|
-
app.delete("/api/utterances", (
|
295
|
+
app.delete("/api/utterances", (_req, res) => {
|
318
296
|
const clearedCount = queue.utterances.length;
|
319
297
|
queue.clear();
|
320
298
|
res.json({
|
@@ -323,21 +301,67 @@ app.delete("/api/utterances", (req, res) => {
|
|
323
301
|
clearedCount
|
324
302
|
});
|
325
303
|
});
|
304
|
+
var ttsClients = /* @__PURE__ */ new Set();
|
305
|
+
app.get("/api/tts-events", (_req, res) => {
|
306
|
+
res.writeHead(200, {
|
307
|
+
"Content-Type": "text/event-stream",
|
308
|
+
"Cache-Control": "no-cache",
|
309
|
+
"Connection": "keep-alive"
|
310
|
+
});
|
311
|
+
res.write('data: {"type":"connected"}\n\n');
|
312
|
+
ttsClients.add(res);
|
313
|
+
res.on("close", () => {
|
314
|
+
ttsClients.delete(res);
|
315
|
+
});
|
316
|
+
});
|
317
|
+
function notifyTTSClients(text) {
|
318
|
+
const message = JSON.stringify({ type: "speak", text });
|
319
|
+
ttsClients.forEach((client) => {
|
320
|
+
client.write(`data: ${message}
|
321
|
+
|
322
|
+
`);
|
323
|
+
});
|
324
|
+
}
|
325
|
+
app.post("/api/voice-preferences", (req, res) => {
|
326
|
+
const { voiceResponsesEnabled } = req.body;
|
327
|
+
voicePreferences.voiceResponsesEnabled = !!voiceResponsesEnabled;
|
328
|
+
debugLog(`[Preferences] Updated: voiceResponses=${voicePreferences.voiceResponsesEnabled}`);
|
329
|
+
res.json({
|
330
|
+
success: true,
|
331
|
+
preferences: voicePreferences
|
332
|
+
});
|
333
|
+
});
|
334
|
+
app.post("/api/voice-input-state", (req, res) => {
|
335
|
+
const { active } = req.body;
|
336
|
+
voicePreferences.voiceInputActive = !!active;
|
337
|
+
debugLog(`[Voice Input] ${voicePreferences.voiceInputActive ? "Started" : "Stopped"} listening`);
|
338
|
+
res.json({
|
339
|
+
success: true,
|
340
|
+
voiceInputActive: voicePreferences.voiceInputActive
|
341
|
+
});
|
342
|
+
});
|
326
343
|
app.post("/api/speak", async (req, res) => {
|
327
344
|
const { text } = req.body;
|
328
345
|
if (!text || !text.trim()) {
|
329
346
|
res.status(400).json({ error: "Text is required" });
|
330
347
|
return;
|
331
348
|
}
|
349
|
+
if (!voicePreferences.voiceResponsesEnabled) {
|
350
|
+
debugLog(`[Speak] Voice responses disabled, returning error`);
|
351
|
+
res.status(400).json({
|
352
|
+
error: "Voice responses are disabled",
|
353
|
+
message: "Cannot speak when voice responses are disabled"
|
354
|
+
});
|
355
|
+
return;
|
356
|
+
}
|
332
357
|
try {
|
333
|
-
|
334
|
-
debugLog(`[Speak]
|
358
|
+
notifyTTSClients(text);
|
359
|
+
debugLog(`[Speak] Sent text to browser for TTS: "${text}"`);
|
335
360
|
const deliveredUtterances = queue.utterances.filter((u) => u.status === "delivered");
|
336
361
|
deliveredUtterances.forEach((u) => {
|
337
362
|
u.status = "responded";
|
338
363
|
debugLog(`[Queue] marked as responded: "${u.text}" [id: ${u.id}]`);
|
339
364
|
});
|
340
|
-
lastSpeakTimestamp = /* @__PURE__ */ new Date();
|
341
365
|
res.json({
|
342
366
|
success: true,
|
343
367
|
message: "Text spoken successfully",
|
@@ -351,6 +375,27 @@ app.post("/api/speak", async (req, res) => {
|
|
351
375
|
});
|
352
376
|
}
|
353
377
|
});
|
378
|
+
app.post("/api/speak-system", async (req, res) => {
|
379
|
+
const { text, rate = 150 } = req.body;
|
380
|
+
if (!text || !text.trim()) {
|
381
|
+
res.status(400).json({ error: "Text is required" });
|
382
|
+
return;
|
383
|
+
}
|
384
|
+
try {
|
385
|
+
await execAsync(`say -r ${rate} "${text.replace(/"/g, '\\"')}"`);
|
386
|
+
debugLog(`[Speak System] Spoke text using macOS say: "${text}" (rate: ${rate})`);
|
387
|
+
res.json({
|
388
|
+
success: true,
|
389
|
+
message: "Text spoken successfully via system voice"
|
390
|
+
});
|
391
|
+
} catch (error) {
|
392
|
+
debugLog(`[Speak System] Failed to speak text: ${error}`);
|
393
|
+
res.status(500).json({
|
394
|
+
error: "Failed to speak text via system voice",
|
395
|
+
details: error instanceof Error ? error.message : String(error)
|
396
|
+
});
|
397
|
+
}
|
398
|
+
});
|
354
399
|
app.get("/", (_req, res) => {
|
355
400
|
res.sendFile(path.join(__dirname, "..", "public", "index.html"));
|
356
401
|
});
|
@@ -360,7 +405,7 @@ app.listen(HTTP_PORT, () => {
|
|
360
405
|
console.log(`[Mode] Running in ${IS_MCP_MANAGED ? "MCP-managed" : "standalone"} mode`);
|
361
406
|
});
|
362
407
|
function getVoiceResponseReminder() {
|
363
|
-
const voiceResponsesEnabled =
|
408
|
+
const voiceResponsesEnabled = voicePreferences.voiceResponsesEnabled;
|
364
409
|
return voiceResponsesEnabled ? "\n\nThe user has enabled voice responses, so use the 'speak' tool to respond to the user's voice input before proceeding." : "";
|
365
410
|
}
|
366
411
|
if (IS_MCP_MANAGED) {
|
@@ -395,18 +440,10 @@ if (IS_MCP_MANAGED) {
|
|
395
440
|
},
|
396
441
|
{
|
397
442
|
name: "wait_for_utterance",
|
398
|
-
description: "Wait for an utterance to be available or until timeout
|
443
|
+
description: "Wait for an utterance to be available or until timeout",
|
399
444
|
inputSchema: {
|
400
445
|
type: "object",
|
401
|
-
properties: {
|
402
|
-
seconds_to_wait: {
|
403
|
-
type: "number",
|
404
|
-
description: `Maximum seconds to wait for an utterance (default: ${DEFAULT_WAIT_TIMEOUT_SECONDS}, min: ${MIN_WAIT_TIMEOUT_SECONDS}, max: ${MAX_WAIT_TIMEOUT_SECONDS})`,
|
405
|
-
default: DEFAULT_WAIT_TIMEOUT_SECONDS,
|
406
|
-
minimum: MIN_WAIT_TIMEOUT_SECONDS,
|
407
|
-
maximum: MAX_WAIT_TIMEOUT_SECONDS
|
408
|
-
}
|
409
|
-
}
|
446
|
+
properties: {}
|
410
447
|
}
|
411
448
|
},
|
412
449
|
{
|
@@ -437,6 +474,16 @@ if (IS_MCP_MANAGED) {
|
|
437
474
|
body: JSON.stringify({ limit })
|
438
475
|
});
|
439
476
|
const data = await response.json();
|
477
|
+
if (!response.ok) {
|
478
|
+
return {
|
479
|
+
content: [
|
480
|
+
{
|
481
|
+
type: "text",
|
482
|
+
text: `Error: ${data.error || "Failed to dequeue utterances"}`
|
483
|
+
}
|
484
|
+
]
|
485
|
+
};
|
486
|
+
}
|
440
487
|
if (data.utterances.length === 0) {
|
441
488
|
return {
|
442
489
|
content: [
|
@@ -459,18 +506,23 @@ ${data.utterances.reverse().map((u) => `"${u.text}" [time: ${new Date(u.timestam
|
|
459
506
|
};
|
460
507
|
}
|
461
508
|
if (name === "wait_for_utterance") {
|
462
|
-
|
463
|
-
const secondsToWait = Math.max(
|
464
|
-
MIN_WAIT_TIMEOUT_SECONDS,
|
465
|
-
Math.min(MAX_WAIT_TIMEOUT_SECONDS, requestedSeconds)
|
466
|
-
);
|
467
|
-
debugLog(`[MCP] Calling wait_for_utterance with ${secondsToWait}s timeout`);
|
509
|
+
debugLog(`[MCP] Calling wait_for_utterance`);
|
468
510
|
const response = await fetch(`http://localhost:${HTTP_PORT}/api/wait-for-utterances`, {
|
469
511
|
method: "POST",
|
470
512
|
headers: { "Content-Type": "application/json" },
|
471
|
-
body: JSON.stringify({
|
513
|
+
body: JSON.stringify({})
|
472
514
|
});
|
473
515
|
const data = await response.json();
|
516
|
+
if (!response.ok) {
|
517
|
+
return {
|
518
|
+
content: [
|
519
|
+
{
|
520
|
+
type: "text",
|
521
|
+
text: `Error: ${data.error || "Failed to wait for utterances"}`
|
522
|
+
}
|
523
|
+
]
|
524
|
+
};
|
525
|
+
}
|
474
526
|
if (data.utterances && data.utterances.length > 0) {
|
475
527
|
const utteranceTexts = data.utterances.map((u) => `[${u.timestamp}] "${u.text}"`).join("\n");
|
476
528
|
return {
|
@@ -488,7 +540,7 @@ ${utteranceTexts}${getVoiceResponseReminder()}`
|
|
488
540
|
content: [
|
489
541
|
{
|
490
542
|
type: "text",
|
491
|
-
text: data.message || `No utterances found
|
543
|
+
text: data.message || `No utterances found. Timed out.`
|
492
544
|
}
|
493
545
|
]
|
494
546
|
};
|
@@ -518,8 +570,8 @@ ${utteranceTexts}${getVoiceResponseReminder()}`
|
|
518
570
|
content: [
|
519
571
|
{
|
520
572
|
type: "text",
|
521
|
-
text:
|
522
|
-
|
573
|
+
text: ""
|
574
|
+
// Return empty string for success
|
523
575
|
}
|
524
576
|
]
|
525
577
|
};
|