nothumanallowed 4.0.2 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,845 @@
1
+ /**
2
+ * nha voice — Voice-powered chat interface.
3
+ *
4
+ * Opens a local web page with a microphone button. Uses the browser's native
5
+ * Web Speech API (SpeechRecognition) for speech-to-text — zero server-side
6
+ * transcription needed. If the user has an OpenAI key, optionally uses Whisper
7
+ * API for higher-accuracy transcription via MediaRecorder + server upload.
8
+ *
9
+ * Transcribed text is routed through the same /api/chat pipeline as nha chat.
10
+ * Responses are spoken back via Web Speech Synthesis API in the browser.
11
+ *
12
+ * Zero npm dependencies — Node.js 22 native http module only.
13
+ */
14
+
15
+ import http from 'http';
16
+ import { exec } from 'child_process';
17
+ import { loadConfig } from '../config.mjs';
18
+ import { callLLM } from '../services/llm.mjs';
19
+ import { getUnreadImportant, getMessage, listMessages, sendEmail, createDraft } from '../services/mail-router.mjs';
20
+ import { getTodayEvents, getUpcomingEvents, createEvent, updateEvent, getEventsForDate } from '../services/mail-router.mjs';
21
+ import { getTasks, addTask, completeTask } from '../services/task-store.mjs';
22
+ import { VERSION } from '../constants.mjs';
23
+ import { info, ok, fail, warn, C, G, D, NC, BOLD } from '../ui.mjs';
24
+
25
+ // ── Constants ──────────────────────────────────────────────────────────────
26
+
27
+ const DEFAULT_PORT = 3849;
28
+
29
+ // ── Chat system prompt (same as chat.mjs / ui.mjs) ────────────────────────
30
+
31
+ const TOOL_DEFINITIONS = `
32
+ You have access to the following tools. When the user's message requires an action,
33
+ output EXACTLY ONE fenced JSON block per action:
34
+
35
+ \`\`\`json
36
+ {"action": "<tool_name>", "params": { ... }}
37
+ \`\`\`
38
+
39
+ You may include conversational text BEFORE or AFTER the JSON block. If no action
40
+ is needed, respond normally without any JSON block.
41
+
42
+ TOOLS:
43
+
44
+ 1. gmail_list(query: string, maxResults?: number)
45
+ Search emails. query uses Gmail search syntax.
46
+
47
+ 2. gmail_read(messageId: string)
48
+ Read the full body of an email by its ID.
49
+
50
+ 3. gmail_send(to: string, subject: string, body: string)
51
+ Send an email. ALWAYS confirm with the user before sending.
52
+
53
+ 4. gmail_draft(to: string, subject: string, body: string)
54
+ Create a draft email (safe — does not send).
55
+
56
+ 5. calendar_today()
57
+ List all events for today.
58
+
59
+ 6. calendar_upcoming(hours?: number)
60
+ List upcoming events in the next N hours (default 2).
61
+
62
+ 7. calendar_create(summary: string, start: string, end: string, attendees?: string[], description?: string)
63
+ Create a calendar event. start/end are ISO 8601 datetime strings.
64
+
65
+ 8. task_list()
66
+ List today's tasks.
67
+
68
+ 9. task_add(description: string, priority?: "low"|"medium"|"high"|"critical", due?: string)
69
+ Add a new task for today.
70
+
71
+ 10. task_done(id: number)
72
+ Mark a task as completed.
73
+
74
+ RULES:
75
+ - For search/read operations, execute immediately and present results conversationally.
76
+ - For write/send/delete operations, describe what you're about to do and include the JSON block.
77
+ - When presenting results, format them clearly in natural language. Never dump raw JSON to the user.
78
+ - If you need multiple actions in sequence, do them ONE AT A TIME.
79
+ - Dates: today is {{TODAY}}. The user's timezone is {{TIMEZONE}}.
80
+ - IMPORTANT: Keep responses SHORT and conversational since they will be spoken aloud.
81
+ Aim for 2-3 sentences maximum. Avoid markdown formatting, bullet points, or numbered lists.
82
+ Use natural spoken language.
83
+ `.trim();
84
+
85
+ // ── Tool execution (mirrors ui.mjs) ────────────────────────────────────────
86
+
87
+ function parseActions(text) {
88
+ const actions = [];
89
+ const textParts = [];
90
+ const fenceRegex = /```json\s*\n?([\s\S]*?)```/g;
91
+ let lastIndex = 0;
92
+ let match;
93
+
94
+ while ((match = fenceRegex.exec(text)) !== null) {
95
+ const before = text.slice(lastIndex, match.index).trim();
96
+ if (before) textParts.push(before);
97
+ try {
98
+ const parsed = JSON.parse(match[1].trim());
99
+ if (parsed.action && typeof parsed.action === 'string') {
100
+ actions.push({ action: parsed.action, params: parsed.params || {} });
101
+ }
102
+ } catch {
103
+ textParts.push(match[0]);
104
+ }
105
+ lastIndex = match.index + match[0].length;
106
+ }
107
+
108
+ const trailing = text.slice(lastIndex).trim();
109
+ if (trailing) textParts.push(trailing);
110
+ return { textParts, actions };
111
+ }
112
+
113
+ async function executeTool(action, params, config) {
114
+ switch (action) {
115
+ case 'gmail_list': {
116
+ const query = params.query || 'is:unread';
117
+ const max = params.maxResults || 10;
118
+ const refs = await listMessages(config, query, max);
119
+ if (refs.length === 0) return 'No emails found matching that query.';
120
+ const messages = [];
121
+ for (const ref of refs.slice(0, max)) {
122
+ try { messages.push(await getMessage(config, ref.id)); } catch {}
123
+ }
124
+ return messages.map((m, i) =>
125
+ `${i + 1}. From ${m.from}, Subject: ${m.subject}`
126
+ ).join('\n');
127
+ }
128
+ case 'gmail_read': {
129
+ const msg = await getMessage(config, params.messageId);
130
+ return `From: ${msg.from}\nSubject: ${msg.subject}\n${msg.body.slice(0, 2000)}`;
131
+ }
132
+ case 'gmail_send': {
133
+ await sendEmail(config, params.to, params.subject, params.body);
134
+ return `Email sent to ${params.to}.`;
135
+ }
136
+ case 'gmail_draft': {
137
+ await createDraft(config, params.to, params.subject, params.body);
138
+ return `Draft created for ${params.to}.`;
139
+ }
140
+ case 'calendar_today': {
141
+ const events = await getTodayEvents(config);
142
+ if (events.length === 0) return 'No events scheduled for today.';
143
+ return events.map((e, i) => {
144
+ const time = e.isAllDay ? 'All day' : `${fmtTime(e.start)} to ${fmtTime(e.end)}`;
145
+ return `${i + 1}. ${time}, ${e.summary}`;
146
+ }).join('\n');
147
+ }
148
+ case 'calendar_upcoming': {
149
+ const hours = params.hours || 2;
150
+ const events = await getUpcomingEvents(config, hours);
151
+ if (events.length === 0) return `No events in the next ${hours} hours.`;
152
+ return events.map((e, i) => {
153
+ const time = e.isAllDay ? 'All day' : `${fmtTime(e.start)} to ${fmtTime(e.end)}`;
154
+ return `${i + 1}. ${time}, ${e.summary}`;
155
+ }).join('\n');
156
+ }
157
+ case 'calendar_create': {
158
+ await createEvent(config, {
159
+ summary: params.summary,
160
+ start: params.start,
161
+ end: params.end,
162
+ description: params.description || '',
163
+ attendees: params.attendees || [],
164
+ });
165
+ return `Event "${params.summary}" created.`;
166
+ }
167
+ case 'task_list': {
168
+ const tasks = getTasks();
169
+ if (tasks.length === 0) return 'No tasks for today.';
170
+ return tasks.map(t =>
171
+ `Task ${t.id}, ${t.priority} priority: ${t.description}${t.status === 'done' ? ' (done)' : ''}`
172
+ ).join('\n');
173
+ }
174
+ case 'task_add': {
175
+ const task = addTask({
176
+ description: params.description,
177
+ priority: params.priority || 'medium',
178
+ due: params.due || null,
179
+ source: 'voice',
180
+ });
181
+ return `Task added: "${task.description}"`;
182
+ }
183
+ case 'task_done': {
184
+ const success = completeTask(params.id);
185
+ return success ? `Task ${params.id} marked as done.` : `Task ${params.id} not found.`;
186
+ }
187
+ default:
188
+ return `Unknown action: ${action}`;
189
+ }
190
+ }
191
+
192
+ function fmtTime(isoStr) {
193
+ try {
194
+ return new Date(isoStr).toLocaleTimeString('en-US', { hour: '2-digit', minute: '2-digit', hour12: true });
195
+ } catch { return isoStr; }
196
+ }
197
+
198
+ // ── Whisper transcription (OpenAI API) ──────────────────────────────────────
199
+
200
+ async function transcribeWithWhisper(audioBase64, config) {
201
+ const openaiKey = config.llm.openaiKey || (config.llm.provider === 'openai' ? config.llm.apiKey : '');
202
+ if (!openaiKey) throw new Error('No OpenAI key for Whisper transcription');
203
+
204
+ // Decode base64 to binary
205
+ const audioBuffer = Buffer.from(audioBase64, 'base64');
206
+
207
+ // Build multipart/form-data manually (zero-dependency)
208
+ const boundary = '----NHAVoiceBoundary' + Date.now().toString(36);
209
+ const parts = [];
210
+
211
+ // Model field
212
+ parts.push(
213
+ `--${boundary}\r\n` +
214
+ `Content-Disposition: form-data; name="model"\r\n\r\n` +
215
+ `whisper-1\r\n`
216
+ );
217
+
218
+ // Audio file field
219
+ parts.push(
220
+ `--${boundary}\r\n` +
221
+ `Content-Disposition: form-data; name="file"; filename="recording.webm"\r\n` +
222
+ `Content-Type: audio/webm\r\n\r\n`
223
+ );
224
+
225
+ const header = Buffer.from(parts.join(''));
226
+ const footer = Buffer.from(`\r\n--${boundary}--\r\n`);
227
+ const body = Buffer.concat([header, audioBuffer, footer]);
228
+
229
+ const res = await fetch('https://api.openai.com/v1/audio/transcriptions', {
230
+ method: 'POST',
231
+ headers: {
232
+ 'Authorization': `Bearer ${openaiKey}`,
233
+ 'Content-Type': `multipart/form-data; boundary=${boundary}`,
234
+ },
235
+ body,
236
+ });
237
+
238
+ if (!res.ok) {
239
+ const err = await res.text();
240
+ throw new Error(`Whisper ${res.status}: ${err}`);
241
+ }
242
+
243
+ const data = await res.json();
244
+ return data.text || '';
245
+ }
246
+
247
+ // ── Voice HTML page ─────────────────────────────────────────────────────────
248
+
249
+ function getVoiceHTML(port, useWhisper) {
250
+ const CSS = `
251
+ *,*::before,*::after{box-sizing:border-box;margin:0;padding:0}
252
+ :root{
253
+ --bg:#0a0a0a;--bg2:#111;--bg3:#1a1a1a;--bg4:#222;
254
+ --green:#00ff41;--green2:#00cc33;--green3:#00aa28;--greendim:#0a3a12;
255
+ --cyan:#00e5ff;--amber:#ffb300;--red:#ff1744;
256
+ --text:#c8c8c8;--dim:#666;--bright:#fff;
257
+ --border:#1e1e1e;--border2:#333;
258
+ --font:'JetBrains Mono','Fira Code','SF Mono','Consolas',monospace;
259
+ --r:6px;
260
+ }
261
+ html,body{height:100%;background:var(--bg);color:var(--text);font-family:var(--font);font-size:13px;line-height:1.5}
262
+
263
+ .voice-app{display:flex;flex-direction:column;height:100vh;height:100dvh;max-width:640px;margin:0 auto;padding:16px}
264
+ .voice-header{text-align:center;padding:24px 0 16px}
265
+ .voice-header__title{font-size:20px;color:var(--green);font-weight:700;letter-spacing:2px}
266
+ .voice-header__sub{font-size:11px;color:var(--dim);margin-top:4px}
267
+
268
+ .voice-messages{flex:1;overflow-y:auto;padding:12px 0;-webkit-overflow-scrolling:touch}
269
+ .voice-msg{margin-bottom:14px;animation:fadeIn .3s}
270
+ @keyframes fadeIn{from{opacity:0;transform:translateY(8px)}to{opacity:1;transform:none}}
271
+ .voice-msg--user .voice-msg__bubble{background:var(--bg3);border:1px solid var(--border2);border-radius:12px 12px 2px 12px;padding:12px 16px;max-width:85%;margin-left:auto;color:var(--bright)}
272
+ .voice-msg--assistant .voice-msg__bubble{background:var(--greendim);border:1px solid var(--green3);border-radius:12px 12px 12px 2px;padding:12px 16px;max-width:85%;color:var(--text);white-space:pre-wrap;word-wrap:break-word}
273
+ .voice-msg__label{font-size:10px;color:var(--dim);margin-bottom:3px}
274
+ .voice-msg--thinking{color:var(--dim);font-style:italic}
275
+
276
+ .voice-controls{display:flex;flex-direction:column;align-items:center;gap:16px;padding:20px 0;border-top:1px solid var(--border);flex-shrink:0}
277
+ .voice-mic{width:80px;height:80px;border-radius:50%;border:3px solid var(--green3);background:var(--bg2);cursor:pointer;display:flex;align-items:center;justify-content:center;transition:all .2s;position:relative}
278
+ .voice-mic:hover{border-color:var(--green);background:var(--greendim)}
279
+ .voice-mic--recording{border-color:var(--red);background:rgba(255,23,68,0.15);animation:pulse 1.5s ease-in-out infinite}
280
+ @keyframes pulse{0%,100%{box-shadow:0 0 0 0 rgba(255,23,68,0.4)}50%{box-shadow:0 0 0 20px rgba(255,23,68,0)}}
281
+ .voice-mic__icon{font-size:32px;color:var(--green);transition:color .2s}
282
+ .voice-mic--recording .voice-mic__icon{color:var(--red)}
283
+ .voice-status{font-size:11px;color:var(--dim);min-height:16px}
284
+ .voice-mode{font-size:10px;color:var(--dim)}
285
+ .voice-mode__btn{background:none;border:1px solid var(--border2);color:var(--dim);padding:4px 10px;border-radius:12px;font-size:10px;font-family:var(--font);cursor:pointer;margin:0 4px}
286
+ .voice-mode__btn--active{border-color:var(--green3);color:var(--green)}
287
+
288
+ .voice-text-bar{display:flex;gap:8px;width:100%;max-width:500px}
289
+ .voice-text-bar input{flex:1;background:var(--bg2);color:var(--text);border:1px solid var(--border);padding:10px 14px;border-radius:var(--r);font-size:13px;font-family:var(--font);outline:none}
290
+ .voice-text-bar input:focus{border-color:var(--green3)}
291
+ .voice-text-bar button{background:var(--green3);color:var(--bg);padding:10px 18px;border-radius:var(--r);font-weight:700;font-size:12px;font-family:var(--font);cursor:pointer;border:none}
292
+
293
+ .toast{position:fixed;top:16px;right:16px;z-index:500;background:var(--bg3);border:1px solid var(--green);border-radius:8px;padding:12px 16px;max-width:300px;animation:slideIn .3s;font-size:12px;color:var(--text)}
294
+ @keyframes slideIn{from{transform:translateX(100%);opacity:0}to{transform:none;opacity:1}}
295
+ `;
296
+
297
+ const JS = `
298
+ var API = '';
299
+ var chatHistory = [];
300
+ var isRecording = false;
301
+ var recognition = null;
302
+ var mediaRecorder = null;
303
+ var audioChunks = [];
304
+ var useWhisper = ${useWhisper ? 'true' : 'false'};
305
+ var speechSynthEnabled = true;
306
+
307
+ // ---- API ----
308
+ function apiPost(p, b) {
309
+ return fetch(API + p, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify(b) })
310
+ .then(function(r) { return r.ok ? r.json() : null })
311
+ .catch(function() { return null });
312
+ }
313
+
314
+ function esc(s) { return s ? String(s).replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;') : ''; }
315
+
316
+ // ---- SPEECH RECOGNITION (Browser-native) ----
317
+ function initWebSpeech() {
318
+ var SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
319
+ if (!SpeechRecognition) return null;
320
+
321
+ var rec = new SpeechRecognition();
322
+ rec.continuous = false;
323
+ rec.interimResults = true;
324
+ rec.lang = navigator.language || 'en-US';
325
+ rec.maxAlternatives = 1;
326
+
327
+ rec.onresult = function(event) {
328
+ var transcript = '';
329
+ for (var i = event.resultIndex; i < event.results.length; i++) {
330
+ transcript += event.results[i][0].transcript;
331
+ }
332
+ setStatus('Heard: ' + transcript.slice(0, 60) + (transcript.length > 60 ? '...' : ''));
333
+
334
+ // If final result
335
+ if (event.results[event.results.length - 1].isFinal) {
336
+ stopRecording();
337
+ if (transcript.trim()) {
338
+ processMessage(transcript.trim());
339
+ }
340
+ }
341
+ };
342
+
343
+ rec.onerror = function(e) {
344
+ setStatus('Speech error: ' + e.error);
345
+ stopRecording();
346
+ };
347
+
348
+ rec.onend = function() {
349
+ if (isRecording) stopRecording();
350
+ };
351
+
352
+ return rec;
353
+ }
354
+
355
+ // ---- WHISPER MODE (MediaRecorder + server) ----
356
+ function startWhisperRecording() {
357
+ navigator.mediaDevices.getUserMedia({ audio: true }).then(function(stream) {
358
+ audioChunks = [];
359
+ mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm;codecs=opus' });
360
+
361
+ mediaRecorder.ondataavailable = function(e) {
362
+ if (e.data.size > 0) audioChunks.push(e.data);
363
+ };
364
+
365
+ mediaRecorder.onstop = function() {
366
+ stream.getTracks().forEach(function(t) { t.stop(); });
367
+ if (audioChunks.length === 0) return;
368
+
369
+ var blob = new Blob(audioChunks, { type: 'audio/webm' });
370
+ setStatus('Transcribing with Whisper...');
371
+
372
+ var reader = new FileReader();
373
+ reader.onloadend = function() {
374
+ var base64 = reader.result.split(',')[1];
375
+ apiPost('/api/voice/transcribe', { audio: base64 }).then(function(r) {
376
+ if (r && r.text) {
377
+ processMessage(r.text);
378
+ } else {
379
+ setStatus('Transcription failed: ' + (r && r.error || 'unknown error'));
380
+ }
381
+ });
382
+ };
383
+ reader.readAsDataURL(blob);
384
+ };
385
+
386
+ mediaRecorder.start();
387
+ }).catch(function(err) {
388
+ setStatus('Microphone error: ' + err.message);
389
+ stopRecording();
390
+ });
391
+ }
392
+
393
+ // ---- RECORDING CONTROL ----
394
+ function toggleRecording() {
395
+ if (isRecording) {
396
+ stopRecording();
397
+ } else {
398
+ startRecording();
399
+ }
400
+ }
401
+
402
+ function startRecording() {
403
+ isRecording = true;
404
+ var mic = document.getElementById('mic');
405
+ mic.classList.add('voice-mic--recording');
406
+ setStatus('Listening...');
407
+
408
+ if (useWhisper) {
409
+ startWhisperRecording();
410
+ } else {
411
+ if (!recognition) recognition = initWebSpeech();
412
+ if (recognition) {
413
+ recognition.start();
414
+ } else {
415
+ setStatus('Speech Recognition not supported in this browser. Use Chrome.');
416
+ stopRecording();
417
+ }
418
+ }
419
+ }
420
+
421
+ function stopRecording() {
422
+ isRecording = false;
423
+ var mic = document.getElementById('mic');
424
+ mic.classList.remove('voice-mic--recording');
425
+
426
+ if (useWhisper && mediaRecorder && mediaRecorder.state === 'recording') {
427
+ mediaRecorder.stop();
428
+ } else if (recognition) {
429
+ try { recognition.stop(); } catch(e) {}
430
+ }
431
+ }
432
+
433
+ // ---- TEXT-TO-SPEECH ----
434
+ function speak(text) {
435
+ if (!speechSynthEnabled || !window.speechSynthesis) return;
436
+
437
+ // Cancel any ongoing speech
438
+ window.speechSynthesis.cancel();
439
+
440
+ // Clean text for speech (remove markdown, code blocks, etc.)
441
+ var codeBlockRe = new RegExp('\\x60\\x60\\x60[\\\\s\\\\S]*?\\x60\\x60\\x60', 'g');
442
+ var clean = text
443
+ .replace(codeBlockRe, '')
444
+ .replace(/\\*\\*([^*]+)\\*\\*/g, '$1')
445
+ .replace(/\\*([^*]+)\\*/g, '$1')
446
+ .replace(/#+\\s/g, '')
447
+ .replace(/\\[([^\\]]+)\\]\\([^)]+\\)/g, '$1')
448
+ .replace(/\\n/g, '. ')
449
+ .trim();
450
+
451
+ if (!clean) return;
452
+
453
+ var utterance = new SpeechSynthesisUtterance(clean);
454
+ utterance.rate = 1.0;
455
+ utterance.pitch = 1.0;
456
+
457
+ // Try to find a good voice
458
+ var voices = window.speechSynthesis.getVoices();
459
+ var preferred = voices.find(function(v) { return v.name.includes('Samantha') || v.name.includes('Google US English') || v.name.includes('Microsoft Zira'); });
460
+ if (preferred) utterance.voice = preferred;
461
+
462
+ window.speechSynthesis.speak(utterance);
463
+ }
464
+
465
+ // ---- PROCESS MESSAGE ----
466
+ function processMessage(text) {
467
+ chatHistory.push({ role: 'user', content: text });
468
+ renderMessages();
469
+
470
+ chatHistory.push({ role: 'assistant', content: 'Thinking...' });
471
+ renderMessages();
472
+
473
+ setStatus('Processing...');
474
+
475
+ apiPost('/api/chat', { message: text, history: chatHistory.slice(0, -1) }).then(function(r) {
476
+ chatHistory.pop();
477
+ var response = '';
478
+ if (r && r.response) {
479
+ response = r.response;
480
+ chatHistory.push({ role: 'assistant', content: response });
481
+ } else if (r && r.error) {
482
+ response = 'Error: ' + r.error;
483
+ chatHistory.push({ role: 'assistant', content: response });
484
+ } else {
485
+ response = 'No response from server.';
486
+ chatHistory.push({ role: 'assistant', content: response });
487
+ }
488
+ renderMessages();
489
+ setStatus('');
490
+
491
+ // Speak the response
492
+ speak(response);
493
+ });
494
+ }
495
+
496
+ // ---- RENDER ----
497
+ function renderMessages() {
498
+ var el = document.getElementById('messages');
499
+ if (!el) return;
500
+
501
+ if (chatHistory.length === 0) {
502
+ el.innerHTML = '<div style="text-align:center;padding:60px 16px;color:var(--dim)">' +
503
+ '<div style="font-size:48px;margin-bottom:12px">&#127908;</div>' +
504
+ '<div style="font-size:14px;color:var(--green);margin-bottom:8px">Voice Assistant Ready</div>' +
505
+ '<div style="font-size:11px">Click the microphone or type below</div></div>';
506
+ return;
507
+ }
508
+
509
+ var h = '';
510
+ chatHistory.forEach(function(m) {
511
+ var isThinking = m.content === 'Thinking...';
512
+ h += '<div class="voice-msg voice-msg--' + esc(m.role) + (isThinking ? ' voice-msg--thinking' : '') + '">' +
513
+ '<div class="voice-msg__label">' + esc(m.role === 'user' ? 'You' : 'NHA') + '</div>' +
514
+ '<div class="voice-msg__bubble">' + esc(m.content) + '</div></div>';
515
+ });
516
+
517
+ el.innerHTML = h;
518
+ el.scrollTop = el.scrollHeight;
519
+ }
520
+
521
+ function setStatus(text) {
522
+ var el = document.getElementById('status');
523
+ if (el) el.textContent = text;
524
+ }
525
+
526
+ // ---- TEXT INPUT ----
527
+ function sendTextInput() {
528
+ var inp = document.getElementById('textInput');
529
+ if (!inp) return;
530
+ var text = inp.value.trim();
531
+ if (!text) return;
532
+ inp.value = '';
533
+ processMessage(text);
534
+ }
535
+
536
+ // ---- MODE TOGGLE ----
537
+ function setMode(mode) {
538
+ useWhisper = (mode === 'whisper');
539
+ document.getElementById('modeWebSpeech').className = 'voice-mode__btn' + (!useWhisper ? ' voice-mode__btn--active' : '');
540
+ document.getElementById('modeWhisper').className = 'voice-mode__btn' + (useWhisper ? ' voice-mode__btn--active' : '');
541
+ }
542
+
543
+ function toggleSpeech() {
544
+ speechSynthEnabled = !speechSynthEnabled;
545
+ var btn = document.getElementById('toggleSpeechBtn');
546
+ if (btn) btn.textContent = speechSynthEnabled ? 'Speaker: ON' : 'Speaker: OFF';
547
+ }
548
+
549
+ // ---- KEYBOARD SHORTCUTS ----
550
+ document.addEventListener('keydown', function(e) {
551
+ // Space bar toggles recording when not focused on input
552
+ if (e.code === 'Space' && document.activeElement.tagName !== 'INPUT') {
553
+ e.preventDefault();
554
+ toggleRecording();
555
+ }
556
+ });
557
+
558
+ // ---- INIT ----
559
+ renderMessages();
560
+
561
+ // Pre-load voices
562
+ if (window.speechSynthesis) {
563
+ window.speechSynthesis.getVoices();
564
+ window.speechSynthesis.onvoiceschanged = function() { window.speechSynthesis.getVoices(); };
565
+ }
566
+ `;
567
+
568
+ return `<!DOCTYPE html>
569
+ <html lang="en">
570
+ <head>
571
+ <meta charset="utf-8">
572
+ <meta name="viewport" content="width=device-width,initial-scale=1,viewport-fit=cover">
573
+ <meta name="theme-color" content="#0a0a0a">
574
+ <title>NHA Voice</title>
575
+ <style>${CSS}</style>
576
+ </head>
577
+ <body>
578
+
579
+ <div class="voice-app">
580
+ <div class="voice-header">
581
+ <div class="voice-header__title">NHA VOICE</div>
582
+ <div class="voice-header__sub">Speak to your agents. Press Space or click the mic.</div>
583
+ </div>
584
+
585
+ <div class="voice-messages" id="messages"></div>
586
+
587
+ <div class="voice-controls">
588
+ <div class="voice-mode">
589
+ <button class="voice-mode__btn voice-mode__btn--active" id="modeWebSpeech" onclick="setMode('web')">Browser Speech</button>
590
+ <button class="voice-mode__btn" id="modeWhisper" onclick="setMode('whisper')">Whisper API</button>
591
+ <button class="voice-mode__btn" id="toggleSpeechBtn" onclick="toggleSpeech()">Speaker: ON</button>
592
+ </div>
593
+
594
+ <button class="voice-mic" id="mic" onclick="toggleRecording()">
595
+ <span class="voice-mic__icon">&#127908;</span>
596
+ </button>
597
+
598
+ <div class="voice-status" id="status"></div>
599
+
600
+ <div class="voice-text-bar">
601
+ <input type="text" id="textInput" placeholder="Or type here..." onkeydown="if(event.key==='Enter')sendTextInput()">
602
+ <button onclick="sendTextInput()">Send</button>
603
+ </div>
604
+ </div>
605
+ </div>
606
+
607
+ <script>${JS}</script>
608
+ </body>
609
+ </html>`;
610
+ }
611
+
612
+ // ── HTTP Helpers ──────────────────────────────────────────────────────────
613
+
614
+ function sendJSON(res, statusCode, data) {
615
+ const body = JSON.stringify(data);
616
+ res.writeHead(statusCode, {
617
+ 'Content-Type': 'application/json',
618
+ 'Access-Control-Allow-Origin': '*',
619
+ 'Access-Control-Allow-Methods': 'GET,POST,OPTIONS',
620
+ 'Access-Control-Allow-Headers': 'Content-Type',
621
+ 'Cache-Control': 'no-cache',
622
+ });
623
+ res.end(body);
624
+ }
625
+
626
+ function sendHTML(res, html) {
627
+ res.writeHead(200, {
628
+ 'Content-Type': 'text/html; charset=utf-8',
629
+ 'Cache-Control': 'no-store',
630
+ });
631
+ res.end(html);
632
+ }
633
+
634
+ function parseBody(req) {
635
+ return new Promise((resolve, reject) => {
636
+ const chunks = [];
637
+ let size = 0;
638
+ const MAX = 10_485_760; // 10 MB (audio can be large)
639
+ req.on('data', chunk => {
640
+ size += chunk.length;
641
+ if (size > MAX) { reject(new Error('Body too large')); req.destroy(); return; }
642
+ chunks.push(chunk);
643
+ });
644
+ req.on('end', () => {
645
+ try {
646
+ const raw = Buffer.concat(chunks).toString('utf-8');
647
+ resolve(raw ? JSON.parse(raw) : {});
648
+ } catch (e) { reject(e); }
649
+ });
650
+ req.on('error', reject);
651
+ });
652
+ }
653
+
654
+ function openBrowser(url) {
655
+ const platform = process.platform;
656
+ const cmd = platform === 'darwin' ? 'open' : platform === 'win32' ? 'start' : 'xdg-open';
657
+ exec(`${cmd} ${url}`, () => {});
658
+ }
659
+
660
+ // ── Server ───────────────────────────────────────────────────────────────
661
+
662
+ export async function cmdVoice(args) {
663
+ let port = DEFAULT_PORT;
664
+ let noBrowser = false;
665
+ for (const arg of args) {
666
+ if (arg.startsWith('--port=')) {
667
+ port = parseInt(arg.split('=')[1], 10) || DEFAULT_PORT;
668
+ } else if (arg === '--no-browser') {
669
+ noBrowser = true;
670
+ }
671
+ }
672
+
673
+ const config = loadConfig();
674
+
675
+ if (!config.llm.apiKey) {
676
+ fail('No API key configured. Run: nha config set key YOUR_KEY');
677
+ process.exit(1);
678
+ }
679
+
680
+ // Determine if Whisper mode is available
681
+ const hasOpenAIKey = !!(config.llm.openaiKey || (config.llm.provider === 'openai' && config.llm.apiKey));
682
+
683
+ const chatSystemPrompt = (() => {
684
+ const today = new Date().toISOString().split('T')[0];
685
+ const tz = Intl.DateTimeFormat().resolvedOptions().timeZone;
686
+ let prompt = TOOL_DEFINITIONS.replace('{{TODAY}}', today).replace('{{TIMEZONE}}', tz);
687
+ prompt += `\n\nYou are NHA Voice, a voice-powered personal operations assistant inside the NotHumanAllowed CLI. ` +
688
+ `You help the user manage their emails, calendar, and tasks through voice commands. ` +
689
+ `Keep responses SHORT and SPOKEN-FRIENDLY — 2-3 sentences max. ` +
690
+ `Avoid markdown formatting, bullet points, numbered lists. Use natural spoken language.`;
691
+ return prompt;
692
+ })();
693
+
694
+ const htmlPage = getVoiceHTML(port, hasOpenAIKey);
695
+
696
+ async function handleRequest(req, res) {
697
+ const url = new URL(req.url, `http://127.0.0.1:${port}`);
698
+ const pathname = url.pathname;
699
+ const method = req.method;
700
+
701
+ // CORS preflight
702
+ if (method === 'OPTIONS') {
703
+ res.writeHead(204, {
704
+ 'Access-Control-Allow-Origin': '*',
705
+ 'Access-Control-Allow-Methods': 'GET,POST,OPTIONS',
706
+ 'Access-Control-Allow-Headers': 'Content-Type',
707
+ });
708
+ res.end();
709
+ return;
710
+ }
711
+
712
+ try {
713
+ // Serve HTML page
714
+ if (method === 'GET' && (pathname === '/' || pathname === '/index.html')) {
715
+ sendHTML(res, htmlPage);
716
+ return;
717
+ }
718
+
719
+ // Chat endpoint (same as nha ui)
720
+ if (method === 'POST' && pathname === '/api/chat') {
721
+ const body = await parseBody(req);
722
+ if (!body.message) {
723
+ sendJSON(res, 400, { error: 'message required' });
724
+ return;
725
+ }
726
+
727
+ const history = body.history || [];
728
+ const parts = [];
729
+ for (const turn of history) {
730
+ const prefix = turn.role === 'user' ? '[User]' : '[Assistant]';
731
+ parts.push(`${prefix} ${turn.content}`);
732
+ }
733
+ parts.push(`[User] ${body.message}`);
734
+ const userMessage = parts.join('\n\n');
735
+
736
+ try {
737
+ const response = await callLLM(config, chatSystemPrompt, userMessage);
738
+ const { textParts, actions } = parseActions(response);
739
+ const textResponse = textParts.join('\n\n');
740
+
741
+ const toolResults = [];
742
+ for (const { action, params } of actions) {
743
+ try {
744
+ const result = await executeTool(action, params, config);
745
+ toolResults.push({ action, result: String(result) });
746
+ } catch (e) {
747
+ toolResults.push({ action, result: `Error: ${e.message}` });
748
+ }
749
+ }
750
+
751
+ let fullResponse;
752
+ if (toolResults.length > 0) {
753
+ const toolContext = toolResults.map(t => `[${t.action} result]: ${t.result}`).join('\n\n');
754
+ const followUp = `The user asked: "${body.message}"\n\nTool results:\n\n${toolContext}\n\nRespond conversationally based on these results. Keep it SHORT (2-3 sentences) since this will be spoken aloud.`;
755
+ try {
756
+ fullResponse = await callLLM(config, chatSystemPrompt, followUp);
757
+ } catch {
758
+ fullResponse = toolResults.map(t => `${t.result}`).join('. ');
759
+ }
760
+ } else {
761
+ fullResponse = textResponse;
762
+ }
763
+
764
+ sendJSON(res, 200, { response: fullResponse, toolResults, actions });
765
+ } catch (e) {
766
+ sendJSON(res, 200, { response: null, error: e.message });
767
+ }
768
+ return;
769
+ }
770
+
771
+ // Whisper transcription endpoint
772
+ if (method === 'POST' && pathname === '/api/voice/transcribe') {
773
+ const body = await parseBody(req);
774
+ if (!body.audio) {
775
+ sendJSON(res, 400, { error: 'audio (base64) required' });
776
+ return;
777
+ }
778
+
779
+ try {
780
+ const text = await transcribeWithWhisper(body.audio, config);
781
+ sendJSON(res, 200, { text });
782
+ } catch (e) {
783
+ sendJSON(res, 200, { text: null, error: e.message });
784
+ }
785
+ return;
786
+ }
787
+
788
+ // Favicon
789
+ if (pathname === '/favicon.ico') {
790
+ res.writeHead(204);
791
+ res.end();
792
+ return;
793
+ }
794
+
795
+ sendJSON(res, 404, { error: 'Not found' });
796
+ } catch (err) {
797
+ try { sendJSON(res, 500, { error: 'Internal server error' }); } catch {}
798
+ }
799
+ }
800
+
801
+ const server = http.createServer(handleRequest);
802
+
803
+ server.on('error', (err) => {
804
+ if (err.code === 'EADDRINUSE') {
805
+ fail(`Port ${port} is already in use. Try: nha voice --port=${port + 1}`);
806
+ process.exit(1);
807
+ }
808
+ fail(`Server error: ${err.message}`);
809
+ process.exit(1);
810
+ });
811
+
812
+ server.listen(port, '127.0.0.1', () => {
813
+ const localUrl = `http://127.0.0.1:${port}`;
814
+
815
+ console.log('');
816
+ console.log(` ${BOLD}${C}NHA Voice${NC}`);
817
+ console.log(` ${D}Voice-powered personal operations assistant${NC}`);
818
+ console.log('');
819
+ console.log(` ${G}Local:${NC} ${localUrl}`);
820
+ console.log(` ${D}Provider:${NC} ${config.llm.provider || 'not set'}`);
821
+ console.log(` ${D}Whisper available:${NC} ${hasOpenAIKey ? G + 'yes' + NC : D + 'no (set openai-key for Whisper)' + NC}`);
822
+ console.log(` ${D}Speech mode:${NC} Browser Web Speech API (default)`);
823
+ console.log('');
824
+ console.log(` ${D}Press Space to toggle mic, or click the microphone button.${NC}`);
825
+ console.log(` ${D}Responses are spoken aloud via Speech Synthesis.${NC}`);
826
+ console.log(` ${D}Press Ctrl+C to stop${NC}`);
827
+ console.log('');
828
+
829
+ if (!noBrowser) {
830
+ openBrowser(localUrl);
831
+ }
832
+ });
833
+
834
+ const shutdown = () => {
835
+ console.log(`\n ${D}Shutting down...${NC}`);
836
+ server.close(() => {
837
+ console.log(` ${D}Voice server stopped.${NC}\n`);
838
+ process.exit(0);
839
+ });
840
+ setTimeout(() => process.exit(0), 3000);
841
+ };
842
+
843
+ process.on('SIGINT', shutdown);
844
+ process.on('SIGTERM', shutdown);
845
+ }