@luckydraw/cumulus 0.28.8 → 0.29.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +26 -2
  2. package/dist/cli/cumulus.js +1 -1
  3. package/dist/cli/cumulus.js.map +1 -1
  4. package/dist/gateway/adapters/webchat.d.ts.map +1 -1
  5. package/dist/gateway/adapters/webchat.js +92 -8
  6. package/dist/gateway/adapters/webchat.js.map +1 -1
  7. package/dist/gateway/config.d.ts +2 -0
  8. package/dist/gateway/config.d.ts.map +1 -1
  9. package/dist/gateway/config.js +1 -1
  10. package/dist/gateway/config.js.map +1 -1
  11. package/dist/gateway/daemon.js +2 -1
  12. package/dist/gateway/daemon.js.map +1 -1
  13. package/dist/gateway/logger.js +1 -1
  14. package/dist/gateway/logger.js.map +1 -1
  15. package/dist/gateway/push.js +1 -1
  16. package/dist/gateway/push.js.map +1 -1
  17. package/dist/gateway/scheduler.d.ts.map +1 -1
  18. package/dist/gateway/scheduler.js +1 -1
  19. package/dist/gateway/scheduler.js.map +1 -1
  20. package/dist/gateway/server.d.ts +9 -0
  21. package/dist/gateway/server.d.ts.map +1 -1
  22. package/dist/gateway/server.js +50 -7
  23. package/dist/gateway/server.js.map +1 -1
  24. package/dist/gateway/static/widget.js +785 -9
  25. package/dist/gateway/voice-tts.d.ts +64 -0
  26. package/dist/gateway/voice-tts.d.ts.map +1 -0
  27. package/dist/gateway/voice-tts.js +191 -0
  28. package/dist/gateway/voice-tts.js.map +1 -0
  29. package/dist/lib/config.d.ts.map +1 -1
  30. package/dist/lib/config.js +8 -4
  31. package/dist/lib/config.js.map +1 -1
  32. package/dist/lib/gateway.d.ts +6 -0
  33. package/dist/lib/gateway.d.ts.map +1 -1
  34. package/dist/lib/gateway.js +53 -27
  35. package/dist/lib/gateway.js.map +1 -1
  36. package/dist/lib/history.d.ts.map +1 -1
  37. package/dist/lib/history.js +3 -2
  38. package/dist/lib/history.js.map +1 -1
  39. package/dist/lib/huggingface-provider.d.ts.map +1 -1
  40. package/dist/lib/huggingface-provider.js +123 -12
  41. package/dist/lib/huggingface-provider.js.map +1 -1
  42. package/dist/lib/jsonl-lock.d.ts +34 -0
  43. package/dist/lib/jsonl-lock.d.ts.map +1 -0
  44. package/dist/lib/jsonl-lock.js +81 -0
  45. package/dist/lib/jsonl-lock.js.map +1 -0
  46. package/dist/lib/templates.js +1 -1
  47. package/dist/lib/templates.js.map +1 -1
  48. package/dist/lib/version-check.d.ts.map +1 -1
  49. package/dist/lib/version-check.js +1 -1
  50. package/dist/lib/version-check.js.map +1 -1
  51. package/dist/tui/components/App.js +1 -1
  52. package/dist/tui/components/App.js.map +1 -1
  53. package/package.json +1 -1
@@ -381,15 +381,13 @@
381
381
  '}',
382
382
  '.code-block-edit-btn:hover { border-color: #0066cc; color: #ddd; }',
383
383
  '.texitool-embed-container {',
384
- ' border: 1px solid #3a3a3a;',
385
- ' border-radius: 0.4em;',
386
384
  ' overflow: hidden;',
387
385
  ' margin: 0.6em 0;',
388
- ' background: #1e1e1e;',
389
386
  '}',
390
387
  '.texitool-embed-container iframe {',
391
388
  ' width: 100%; border: none;',
392
389
  ' min-height: 400px;',
390
+ ' max-height: calc(100vh - 200px);',
393
391
  ' display: block;',
394
392
  '}',
395
393
  '.texitool-embed-actions {',
@@ -857,6 +855,7 @@
857
855
  ' flex-shrink: 0;',
858
856
  '}',
859
857
  '.cumulus-topbar-title { font-weight: 600; font-size: 15px; color: #e0e0e0; }',
858
+ '.cumulus-topbar-version { font-weight: 400; font-size: 11px; color: #777; margin-left: 4px; }',
860
859
  '.cumulus-topbar-right { display: flex; align-items: center; gap: 10px; }',
861
860
 
862
861
  /* ── Update banner ── */
@@ -1215,6 +1214,101 @@
1215
1214
  ' .cumulus-standalone-empty-hint { display: none; }',
1216
1215
 
1217
1216
  '}',
1217
+
1218
+ /* ── Voice Mode ── */
1219
+ '.cumulus-voice-overlay {',
1220
+ ' position: absolute; top: 0; left: 0; right: 0; bottom: 0;',
1221
+ ' z-index: 500;',
1222
+ ' display: flex; flex-direction: column;',
1223
+ ' align-items: center; justify-content: center;',
1224
+ ' transition: background 0.4s ease;',
1225
+ ' user-select: none;',
1226
+ '}',
1227
+ '.cumulus-voice-overlay.listening { background: #0a1a2e; }',
1228
+ '.cumulus-voice-overlay.processing { background: #0a2e1a; }',
1229
+ '.cumulus-voice-overlay.speaking { background: #1e1e1e; }',
1230
+ '.cumulus-voice-overlay.idle { background: #1a1a1a; }',
1231
+
1232
+ '.cumulus-voice-indicator {',
1233
+ ' width: 120px; height: 120px;',
1234
+ ' border-radius: 50%;',
1235
+ ' display: flex; align-items: center; justify-content: center;',
1236
+ ' transition: all 0.3s ease;',
1237
+ ' margin-bottom: 24px;',
1238
+ '}',
1239
+ '.cumulus-voice-overlay.listening .cumulus-voice-indicator {',
1240
+ ' background: rgba(0, 102, 204, 0.2);',
1241
+ ' border: 2px solid #0066cc;',
1242
+ ' animation: cumulus-voice-pulse 1.5s ease-in-out infinite;',
1243
+ '}',
1244
+ '.cumulus-voice-overlay.processing .cumulus-voice-indicator {',
1245
+ ' background: rgba(34, 197, 94, 0.15);',
1246
+ ' border: 2px solid #22c55e;',
1247
+ ' animation: cumulus-voice-spin 1s linear infinite;',
1248
+ '}',
1249
+ '.cumulus-voice-overlay.speaking .cumulus-voice-indicator {',
1250
+ ' background: rgba(255, 255, 255, 0.08);',
1251
+ ' border: 2px solid #888;',
1252
+ '}',
1253
+ '.cumulus-voice-overlay.idle .cumulus-voice-indicator {',
1254
+ ' background: rgba(255, 255, 255, 0.05);',
1255
+ ' border: 2px solid #555;',
1256
+ '}',
1257
+
1258
+ '@keyframes cumulus-voice-pulse {',
1259
+ ' 0%, 100% { transform: scale(1); opacity: 1; }',
1260
+ ' 50% { transform: scale(1.08); opacity: 0.7; }',
1261
+ '}',
1262
+ '@keyframes cumulus-voice-spin {',
1263
+ ' from { transform: rotate(0deg); }',
1264
+ ' to { transform: rotate(360deg); }',
1265
+ '}',
1266
+
1267
+ '.cumulus-voice-indicator-icon {',
1268
+ ' font-size: 36px; color: #e0e0e0;',
1269
+ '}',
1270
+ '.cumulus-voice-overlay.listening .cumulus-voice-indicator-icon::after { content: "\\1F3A4"; }',
1271
+ '.cumulus-voice-overlay.processing .cumulus-voice-indicator-icon::after { content: "\\2699"; }',
1272
+ '.cumulus-voice-overlay.speaking .cumulus-voice-indicator-icon::after { content: "\\1F50A"; }',
1273
+ '.cumulus-voice-overlay.idle .cumulus-voice-indicator-icon::after { content: "\\23F8"; }',
1274
+
1275
+ '.cumulus-voice-label {',
1276
+ ' font-size: 16px; color: #aaa;',
1277
+ ' margin-bottom: 32px;',
1278
+ ' min-height: 24px;',
1279
+ '}',
1280
+
1281
+ '.cumulus-voice-transcript {',
1282
+ ' font-size: 18px; color: #e0e0e0;',
1283
+ ' max-width: 80%; text-align: center;',
1284
+ ' min-height: 28px;',
1285
+ ' margin-bottom: 24px;',
1286
+ ' font-style: italic;',
1287
+ '}',
1288
+
1289
+ '.cumulus-voice-stop-btn {',
1290
+ ' background: rgba(239, 68, 68, 0.15);',
1291
+ ' border: 1px solid #ef4444;',
1292
+ ' border-radius: 2em;',
1293
+ ' color: #ef4444;',
1294
+ ' font-size: 16px;',
1295
+ ' padding: 12px 32px;',
1296
+ ' cursor: pointer;',
1297
+ ' transition: background 0.2s;',
1298
+ '}',
1299
+ '.cumulus-voice-stop-btn:hover { background: rgba(239, 68, 68, 0.25); }',
1300
+
1301
+ '.cumulus-mic-btn {',
1302
+ ' width: 2.7em; height: 2.7em; flex-shrink: 0;',
1303
+ ' background: #3d3d3d; border: 1px solid #4a4a4a;',
1304
+ ' border-radius: 0.55em; color: #aaa;',
1305
+ ' font-size: 14px; line-height: 1; cursor: pointer;',
1306
+ ' display: flex; align-items: center; justify-content: center;',
1307
+ ' padding: 0;',
1308
+ ' transition: border-color 0.2s, color 0.2s;',
1309
+ '}',
1310
+ '.cumulus-mic-btn:hover { border-color: #0066cc; color: #ddd; }',
1311
+ '.cumulus-mic-btn.active { border-color: #ef4444; color: #ef4444; background: rgba(239,68,68,0.15); }',
1218
1312
  ].join('\n');
1219
1313
 
1220
1314
  // ─── HTML Escaping ───────────────────────────────────────────────────────────
@@ -1857,12 +1951,18 @@
1857
1951
  // Inline code (backtick)
1858
1952
  html = html.replace(/`([^`]+)`/g, '<code>$1</code>');
1859
1953
 
1860
- // Links
1954
+ // Links (markdown syntax)
1861
1955
  html = html.replace(
1862
1956
  /\[([^\]]+)\]\(([^)]+)\)/g,
1863
1957
  '<a href="$2" target="_blank" rel="noopener noreferrer">$1</a>'
1864
1958
  );
1865
1959
 
1960
+ // Auto-link bare URLs (not already inside an <a> tag)
1961
+ html = html.replace(
1962
+ /(?<!href="|">)(https?:\/\/[^\s<)\]]+)/g,
1963
+ '<a href="$1" target="_blank" rel="noopener noreferrer">$1</a>'
1964
+ );
1965
+
1866
1966
  // Phase 6: Convert remaining newlines to <br> (skip block-level elements)
1867
1967
  // Wrap sequences of plain text lines in <p> tags
1868
1968
  html = renderParagraphs(html);
@@ -1921,20 +2021,44 @@
1921
2021
  }
1922
2022
 
1923
2023
  // Texitool embed URL — change this if self-hosting
1924
- var TEXITOOL_EMBED_URL = 'https://texi.soapko.com';
2024
+ var TEXITOOL_EMBED_URL = 'https://app.texitool.com';
1925
2025
 
1926
2026
  function openTexitoolEditor(wrapper) {
1927
2027
  var codeEl = wrapper.querySelector('code');
1928
2028
  if (!codeEl) return;
1929
2029
  var rawText = codeEl.textContent || '';
1930
2030
 
2031
+ // Measure the code block height before replacing it
2032
+ var codeBlockHeight = wrapper.getBoundingClientRect().height;
2033
+ var iframeHeight = Math.max(codeBlockHeight, 400);
2034
+
2035
+ // Calculate rows/cols from content for Texitool canvas sizing
2036
+ var contentLines = rawText.split('\n');
2037
+ var rows = contentLines.length;
2038
+ var cols = 0;
2039
+ for (var li = 0; li < contentLines.length; li++) {
2040
+ if (contentLines[li].length > cols) cols = contentLines[li].length;
2041
+ }
2042
+
1931
2043
  // Build iframe container
1932
2044
  var container = document.createElement('div');
1933
2045
  container.className = 'texitool-embed-container';
1934
2046
 
1935
2047
  var iframe = document.createElement('iframe');
1936
- var encodedContent = btoa(unescape(encodeURIComponent(rawText)));
1937
- iframe.src = TEXITOOL_EMBED_URL + '?embed=true&content=' + encodedContent;
2048
+ var encodedContent = btoa(
2049
+ encodeURIComponent(rawText).replace(/%([0-9A-F]{2})/g, function (_, p1) {
2050
+ return String.fromCharCode(parseInt(p1, 16));
2051
+ })
2052
+ );
2053
+ iframe.src =
2054
+ TEXITOOL_EMBED_URL +
2055
+ '?embed=true&rows=' +
2056
+ (rows + 10) +
2057
+ '&cols=' +
2058
+ (cols + 10) +
2059
+ '&content=' +
2060
+ encodeURIComponent(encodedContent);
2061
+ iframe.style.height = iframeHeight + 'px';
1938
2062
  iframe.setAttribute('sandbox', 'allow-scripts allow-same-origin');
1939
2063
  iframe.setAttribute('data-testid', 'texitool-embed');
1940
2064
  container.appendChild(iframe);
@@ -1969,6 +2093,8 @@
1969
2093
 
1970
2094
  function finishEdit(newContent) {
1971
2095
  window.removeEventListener('message', onMessage);
2096
+ // Trim trailing blank lines
2097
+ newContent = newContent.replace(/\n+$/, '');
1972
2098
  // Rebuild the code block with updated content
1973
2099
  var newWrapper = document.createElement('div');
1974
2100
  newWrapper.innerHTML = buildCodeBlock(
@@ -2272,6 +2398,7 @@
2272
2398
  var apiKey = opts.apiKey;
2273
2399
  var sessionId = opts.sessionId;
2274
2400
  var onMessage = opts.onMessage;
2401
+ var onBinary = opts.onBinary || null;
2275
2402
  var onStatus = opts.onStatus;
2276
2403
  var skipHistory = opts.skipHistory || false;
2277
2404
 
@@ -2288,6 +2415,7 @@
2288
2415
  onStatus('connecting');
2289
2416
  currentStatus = 'connecting';
2290
2417
  ws = new WebSocket(wsUrl);
2418
+ ws.binaryType = 'arraybuffer';
2291
2419
 
2292
2420
  ws.onopen = function () {
2293
2421
  if (destroyed) {
@@ -2312,6 +2440,11 @@
2312
2440
  currentStatus = 'connected';
2313
2441
  onStatus('connected');
2314
2442
  }
2443
+ // Binary frames = voice audio PCM
2444
+ if (event.data instanceof ArrayBuffer) {
2445
+ if (onBinary) onBinary(event.data);
2446
+ return;
2447
+ }
2315
2448
  try {
2316
2449
  var data = JSON.parse(event.data);
2317
2450
  onMessage(data);
@@ -2690,6 +2823,9 @@
2690
2823
  })(msg.id);
2691
2824
  row.appendChild(cb);
2692
2825
  }
2826
+ // Suppress thinking-role entries
2827
+ if (msg.role === 'thinking') continue;
2828
+
2693
2829
  if (msg.role === 'user' && isSystemMessage(msg.content)) {
2694
2830
  row.appendChild(buildSystemMsgEl(msg.content));
2695
2831
  } else if (msg.role === 'user') {
@@ -3348,7 +3484,7 @@
3348
3484
  topbar.className = 'cumulus-topbar';
3349
3485
  topbar.innerHTML =
3350
3486
  '<button class="cumulus-sidebar-toggle" data-testid="webchat-sidebar-toggle" aria-label="Toggle sidebar">&#9776;</button>' +
3351
- '<span class="cumulus-topbar-title">Cumulus</span>' +
3487
+ '<span class="cumulus-topbar-title">Cumulus <span class="cumulus-topbar-version" data-testid="webchat-version"></span></span>' +
3352
3488
  '<div class="cumulus-topbar-right">' +
3353
3489
  '<span class="cumulus-header-status">' +
3354
3490
  '<span class="cumulus-status-dot" data-testid="webchat-status-dot-app"></span>' +
@@ -3956,6 +4092,557 @@
3956
4092
  }
3957
4093
 
3958
4094
  // ── Thread panel builder ──
4095
+ // ── Voice Mode ────────────────────────────────────────────────────────────
4096
+ var SpeechRecognitionApi = window.SpeechRecognition || window.webkitSpeechRecognition;
4097
+
4098
+ // ── Safari Detection ──
4099
+ function isSafari() {
4100
+ var ua = navigator.userAgent;
4101
+ var isIOS = /iP(ad|od|hone)/i.test(ua);
4102
+ var isSafariBrowser = /Safari/i.test(ua) && !/Chrome/i.test(ua) && !/CriOS/i.test(ua);
4103
+ return isIOS || isSafariBrowser;
4104
+ }
4105
+
4106
+ function createVoiceMode(panel, threadName, sendFn) {
4107
+ var state = {
4108
+ active: false,
4109
+ phase: 'idle',
4110
+ recognition: null,
4111
+ synth: window.speechSynthesis,
4112
+ speaking: false,
4113
+ wakeLock: null,
4114
+ watchdog: null,
4115
+ transcript: '',
4116
+ // Server-side TTS (Piper)
4117
+ serverTTS: false,
4118
+ serverSampleRate: 22050,
4119
+ audioCtx: null,
4120
+ audioQueue: [], // queue of Float32Array PCM chunks
4121
+ audioPlaying: false, // currently playing audio
4122
+ audioDone: false, // server signaled all audio sent
4123
+ // Voice loading state for Safari compatibility
4124
+ voicesLoaded: false,
4125
+ speakQueue: [],
4126
+ voiceLoadTimer: null,
4127
+ // Safari: prefer server TTS over browser TTS
4128
+ isSafariClient: isSafari(),
4129
+ waitingForServerTTS: false, // Safari waiting for voice_info
4130
+ voicesLoaded: false,
4131
+ speakQueue: [],
4132
+ voiceLoadTimer: null,
4133
+ };
4134
+
4135
+ // ── Overlay DOM ──
4136
+ var overlay = document.createElement('div');
4137
+ overlay.className = 'cumulus-voice-overlay idle';
4138
+ overlay.style.display = 'none';
4139
+
4140
+ var indicator = document.createElement('div');
4141
+ indicator.className = 'cumulus-voice-indicator';
4142
+ var indicatorIcon = document.createElement('span');
4143
+ indicatorIcon.className = 'cumulus-voice-indicator-icon';
4144
+ indicator.appendChild(indicatorIcon);
4145
+ overlay.appendChild(indicator);
4146
+
4147
+ var transcriptEl = document.createElement('div');
4148
+ transcriptEl.className = 'cumulus-voice-transcript';
4149
+ overlay.appendChild(transcriptEl);
4150
+
4151
+ var label = document.createElement('div');
4152
+ label.className = 'cumulus-voice-label';
4153
+ overlay.appendChild(label);
4154
+
4155
+ var stopBtn = document.createElement('button');
4156
+ stopBtn.className = 'cumulus-voice-stop-btn';
4157
+ stopBtn.setAttribute('data-testid', 'webchat-voice-stop');
4158
+ stopBtn.textContent = 'Exit Voice Mode';
4159
+ stopBtn.addEventListener('click', function () {
4160
+ deactivate();
4161
+ });
4162
+ overlay.appendChild(stopBtn);
4163
+
4164
+ panel.appendChild(overlay);
4165
+
4166
+ function setPhase(p) {
4167
+ state.phase = p;
4168
+ overlay.className = 'cumulus-voice-overlay ' + p;
4169
+ if (p === 'listening') label.textContent = 'Listening\u2026';
4170
+ else if (p === 'processing') label.textContent = 'Thinking\u2026';
4171
+ else if (p === 'speaking') label.textContent = '';
4172
+ else label.textContent = 'Tap mic to speak';
4173
+ }
4174
+
4175
+ // ── Wake Lock ──
4176
+ function requestWakeLock() {
4177
+ if (navigator.wakeLock) {
4178
+ navigator.wakeLock
4179
+ .request('screen')
4180
+ .then(function (lock) {
4181
+ state.wakeLock = lock;
4182
+ })
4183
+ .catch(function () {});
4184
+ }
4185
+ }
4186
+ function releaseWakeLock() {
4187
+ if (state.wakeLock) {
4188
+ state.wakeLock.release().catch(function () {});
4189
+ state.wakeLock = null;
4190
+ }
4191
+ }
4192
+
4193
+ // ── Speech Recognition ──
4194
+ function startRecognition() {
4195
+ if (!SpeechRecognitionApi) {
4196
+ label.textContent = 'Speech recognition not supported in this browser';
4197
+ return;
4198
+ }
4199
+ if (state.recognition) {
4200
+ try {
4201
+ state.recognition.abort();
4202
+ } catch (e) {}
4203
+ }
4204
+
4205
+ var rec = new SpeechRecognitionApi();
4206
+ rec.continuous = false;
4207
+ rec.interimResults = true;
4208
+ rec.lang = 'en-US';
4209
+ state.recognition = rec;
4210
+ state.transcript = '';
4211
+ transcriptEl.textContent = '';
4212
+
4213
+ rec.onresult = function (event) {
4214
+ var interim = '';
4215
+ var final_ = '';
4216
+ for (var i = event.resultIndex; i < event.results.length; i++) {
4217
+ var t = event.results[i][0].transcript;
4218
+ if (event.results[i].isFinal) {
4219
+ final_ += t;
4220
+ } else {
4221
+ interim += t;
4222
+ }
4223
+ }
4224
+ if (final_) {
4225
+ state.transcript += final_;
4226
+ transcriptEl.textContent = state.transcript;
4227
+ } else {
4228
+ transcriptEl.textContent = state.transcript + interim;
4229
+ }
4230
+
4231
+ // Barge-in: if user speaks while TTS is playing, stop it
4232
+ if (state.speaking) {
4233
+ stopSpeaking();
4234
+ }
4235
+ };
4236
+
4237
+ rec.onend = function () {
4238
+ clearWatchdog();
4239
+ if (!state.active) return;
4240
+
4241
+ // If we got a transcript, send it
4242
+ if (state.transcript.trim()) {
4243
+ var msg = state.transcript.trim();
4244
+ transcriptEl.textContent = msg;
4245
+ setPhase('processing');
4246
+ sendFn(msg);
4247
+ } else {
4248
+ // No speech detected — restart listening
4249
+ setPhase('idle');
4250
+ setTimeout(function () {
4251
+ if (state.active) startRecognition();
4252
+ }, 300);
4253
+ }
4254
+ };
4255
+
4256
+ rec.onerror = function (event) {
4257
+ clearWatchdog();
4258
+ if (event.error === 'no-speech' || event.error === 'aborted') {
4259
+ // Normal — just restart
4260
+ if (state.active) {
4261
+ setPhase('idle');
4262
+ setTimeout(function () {
4263
+ if (state.active) startRecognition();
4264
+ }, 500);
4265
+ }
4266
+ } else {
4267
+ label.textContent = 'Mic error: ' + event.error;
4268
+ setTimeout(function () {
4269
+ if (state.active) startRecognition();
4270
+ }, 1000);
4271
+ }
4272
+ };
4273
+
4274
+ setPhase('listening');
4275
+ rec.start();
4276
+
4277
+ // Watchdog: SpeechRecognition dies after ~60s on iOS
4278
+ startWatchdog();
4279
+ }
4280
+
4281
+ function startWatchdog() {
4282
+ clearWatchdog();
4283
+ state.watchdog = setTimeout(function () {
4284
+ if (state.active && state.phase === 'listening') {
4285
+ try {
4286
+ state.recognition.abort();
4287
+ } catch (e) {}
4288
+ startRecognition();
4289
+ }
4290
+ }, 55000); // restart before 60s iOS limit
4291
+ }
4292
+
4293
+ function clearWatchdog() {
4294
+ if (state.watchdog) {
4295
+ clearTimeout(state.watchdog);
4296
+ state.watchdog = null;
4297
+ }
4298
+ }
4299
+
4300
+ // ── Voice Loading (Safari fix) ──
4301
+ function initVoices() {
4302
+ // Safari: skip browser TTS init — prefer server TTS
4303
+ if (state.isSafariClient) {
4304
+ console.log('[Voice] Safari detected — skipping browser TTS, waiting for server TTS');
4305
+ state.waitingForServerTTS = true;
4306
+ state.voicesLoaded = true; // Mark as "ready" but don't use browser TTS
4307
+ return;
4308
+ }
4309
+
4310
+ if (!state.synth) return;
4311
+
4312
+ // Check if voices already loaded
4313
+ var voices = state.synth.getVoices();
4314
+ if (voices.length > 0) {
4315
+ state.voicesLoaded = true;
4316
+ console.log('[Voice] Voices loaded:', voices.length);
4317
+ processSpeakQueue();
4318
+ return;
4319
+ }
4320
+
4321
+ console.log('[Voice] Waiting for voices to load...');
4322
+
4323
+ // Listen for voiceschanged event
4324
+ state.synth.onvoiceschanged = function () {
4325
+ state.voicesLoaded = true;
4326
+ var loadedVoices = state.synth.getVoices();
4327
+ console.log('[Voice] Voices loaded:', loadedVoices.length);
4328
+ if (state.voiceLoadTimer) {
4329
+ clearTimeout(state.voiceLoadTimer);
4330
+ state.voiceLoadTimer = null;
4331
+ }
4332
+ processSpeakQueue();
4333
+ };
4334
+
4335
+ // Fallback timeout: proceed after 3 seconds even if no voices
4336
+ state.voiceLoadTimer = setTimeout(function () {
4337
+ if (!state.voicesLoaded) {
4338
+ console.log('[Voice] Voice loading timeout, proceeding anyway');
4339
+ state.voicesLoaded = true;
4340
+ processSpeakQueue();
4341
+ }
4342
+ }, 3000);
4343
+ }
4344
+
4345
+ function processSpeakQueue() {
4346
+ if (state.speakQueue.length === 0) return;
4347
+ console.log('[Voice] Processing speak queue:', state.speakQueue.length, 'items');
4348
+ while (state.speakQueue.length > 0) {
4349
+ var text = state.speakQueue.shift();
4350
+ speakNow(text);
4351
+ }
4352
+ }
4353
+
4354
+ // ── Speech Synthesis (Phase 1 — browser TTS) ──
4355
+ function speak(text) {
4356
+ if (!state.active) return;
4357
+
4358
+ // Safari: skip browser TTS entirely, wait for server TTS
4359
+ if (state.isSafariClient) {
4360
+ if (!state.serverTTS) {
4361
+ console.log('[Voice] Safari — skipping browser TTS, waiting for server TTS (Piper)');
4362
+ return;
4363
+ }
4364
+ // Server TTS will handle this via binary frames
4365
+ return;
4366
+ }
4367
+
4368
+ if (!state.synth) return;
4369
+
4370
+ // Strip markdown and blex fences
4371
+ var clean = text
4372
+ .replace(/~~~blex:[\s\S]*?~~~/g, '')
4373
+ .replace(/```[\s\S]*?```/g, '')
4374
+ .replace(/[*_~`#]/g, '')
4375
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
4376
+ .replace(/https?:\/\/\S+/g, '')
4377
+ .trim();
4378
+
4379
+ if (!clean) {
4380
+ onSpeakDone();
4381
+ return;
4382
+ }
4383
+
4384
+ // Queue if voices not loaded yet
4385
+ if (!state.voicesLoaded) {
4386
+ console.log('[Voice] Voices not loaded, queueing speak()');
4387
+ state.speakQueue.push(clean);
4388
+ return;
4389
+ }
4390
+
4391
+ speakNow(clean);
4392
+ }
4393
+
4394
+ function speakNow(text) {
4395
+ console.log('[Voice] TTS started');
4396
+ state.speaking = true;
4397
+ setPhase('speaking');
4398
+
4399
+ var utterance = new SpeechSynthesisUtterance(text);
4400
+ utterance.rate = 1.0;
4401
+ utterance.pitch = 1.0;
4402
+
4403
+ // Try to pick a decent voice
4404
+ var voices = state.synth.getVoices();
4405
+ var preferred =
4406
+ voices.find(function (v) {
4407
+ return v.name.indexOf('Samantha') >= 0;
4408
+ }) ||
4409
+ voices.find(function (v) {
4410
+ return v.lang.startsWith('en') && v.localService;
4411
+ }) ||
4412
+ voices[0];
4413
+ if (preferred) {
4414
+ utterance.voice = preferred;
4415
+ console.log('[Voice] Using voice:', preferred.name);
4416
+ } else {
4417
+ console.log('[Voice] No preferred voice, using default');
4418
+ }
4419
+
4420
+ utterance.onend = function () {
4421
+ console.log('[Voice] TTS ended');
4422
+ onSpeakDone();
4423
+ };
4424
+ utterance.onerror = function (e) {
4425
+ console.log('[Voice] TTS error:', e.error);
4426
+ onSpeakDone();
4427
+ };
4428
+
4429
+ try {
4430
+ state.synth.speak(utterance);
4431
+ } catch (e) {
4432
+ console.log('[Voice] speechSynthesis.speak() failed:', e);
4433
+ onSpeakDone();
4434
+ }
4435
+ }
4436
+
4437
+ function stopSpeaking() {
4438
+ if (state.serverTTS) {
4439
+ stopServerAudio();
4440
+ }
4441
+ if (state.synth) state.synth.cancel();
4442
+ state.speaking = false;
4443
+ }
4444
+
4445
+ function onSpeakDone() {
4446
+ state.speaking = false;
4447
+ if (state.active) {
4448
+ setPhase('idle');
4449
+ setTimeout(function () {
4450
+ if (state.active) startRecognition();
4451
+ }, 300);
4452
+ }
4453
+ }
4454
+
4455
+ // ── Server-side TTS (Web Audio API playback) ──
4456
+ function ensureAudioCtx() {
4457
+ if (!state.audioCtx) {
4458
+ state.audioCtx = new (window.AudioContext || window.webkitAudioContext)({
4459
+ sampleRate: state.serverSampleRate,
4460
+ });
4461
+ }
4462
+ // Resume if suspended (iOS requires user gesture)
4463
+ if (state.audioCtx.state === 'suspended') {
4464
+ state.audioCtx.resume();
4465
+ }
4466
+ return state.audioCtx;
4467
+ }
4468
+
4469
+ function queuePCMAudio(arrayBuffer) {
4470
+ if (!state.active || state.phase === 'listening') return;
4471
+
4472
+ // Strip 4-byte "VPCM" header
4473
+ var pcmData = new Int16Array(arrayBuffer, 4);
4474
+ // Convert Int16 to Float32 for Web Audio API
4475
+ var float32 = new Float32Array(pcmData.length);
4476
+ for (var i = 0; i < pcmData.length; i++) {
4477
+ float32[i] = pcmData[i] / 32768.0;
4478
+ }
4479
+ state.audioQueue.push(float32);
4480
+
4481
+ if (!state.audioPlaying) {
4482
+ state.speaking = true;
4483
+ setPhase('speaking');
4484
+ playNextChunk();
4485
+ }
4486
+ }
4487
+
4488
+ function playNextChunk() {
4489
+ if (!state.active || state.audioQueue.length === 0) {
4490
+ state.audioPlaying = false;
4491
+ if (state.audioDone || state.audioQueue.length === 0) {
4492
+ onServerSpeakDone();
4493
+ }
4494
+ return;
4495
+ }
4496
+
4497
+ state.audioPlaying = true;
4498
+ var ctx = ensureAudioCtx();
4499
+ var samples = state.audioQueue.shift();
4500
+ var buffer = ctx.createBuffer(1, samples.length, state.serverSampleRate);
4501
+ buffer.getChannelData(0).set(samples);
4502
+
4503
+ var source = ctx.createBufferSource();
4504
+ source.buffer = buffer;
4505
+ source.connect(ctx.destination);
4506
+ source.onended = function () {
4507
+ playNextChunk();
4508
+ };
4509
+ source.start();
4510
+ state._currentSource = source;
4511
+ }
4512
+
4513
+ function stopServerAudio() {
4514
+ state.audioQueue.length = 0;
4515
+ state.audioPlaying = false;
4516
+ state.audioDone = false;
4517
+ if (state._currentSource) {
4518
+ try {
4519
+ state._currentSource.stop();
4520
+ } catch (e) {}
4521
+ state._currentSource = null;
4522
+ }
4523
+ }
4524
+
4525
+ function onServerSpeakDone() {
4526
+ state.speaking = false;
4527
+ state.audioDone = false;
4528
+ if (state.active) {
4529
+ setPhase('idle');
4530
+ setTimeout(function () {
4531
+ if (state.active) startRecognition();
4532
+ }, 300);
4533
+ }
4534
+ }
4535
+
4536
+ // ── Public API ──
4537
+ function activate() {
4538
+ if (!SpeechRecognitionApi) {
4539
+ alert('Speech recognition is not supported in this browser.');
4540
+ return;
4541
+ }
4542
+ state.active = true;
4543
+ overlay.style.display = 'flex';
4544
+ requestWakeLock();
4545
+
4546
+ console.log('[Voice] Activating...');
4547
+ console.log('[Voice] Safari detected:', state.isSafariClient);
4548
+
4549
+ // Initialize voice loading for Safari compatibility
4550
+ initVoices();
4551
+
4552
+ // Unlock Web Audio context on iOS (required for server TTS playback)
4553
+ ensureAudioCtx();
4554
+
4555
+ // For non-Safari: unlock browser TTS with silent utterance (requires user gesture)
4556
+ if (!state.isSafariClient && state.synth) {
4557
+ try {
4558
+ var unlockUtterance = new SpeechSynthesisUtterance('');
4559
+ unlockUtterance.volume = 0;
4560
+ state.synth.speak(unlockUtterance);
4561
+ console.log('[Voice] Sent silent utterance for browser TTS unlock');
4562
+ } catch (e) {
4563
+ console.log('[Voice] Silent unlock failed (not critical):', e);
4564
+ }
4565
+ }
4566
+
4567
+ setPhase('idle');
4568
+ startRecognition();
4569
+ }
4570
+
4571
+ function deactivate() {
4572
+ state.active = false;
4573
+ overlay.style.display = 'none';
4574
+ stopSpeaking();
4575
+ clearWatchdog();
4576
+ releaseWakeLock();
4577
+ if (state.recognition) {
4578
+ try {
4579
+ state.recognition.abort();
4580
+ } catch (e) {}
4581
+ state.recognition = null;
4582
+ }
4583
+ // Reset server TTS state
4584
+ state.serverTTS = false;
4585
+ if (state.audioCtx) {
4586
+ state.audioCtx.close().catch(function () {});
4587
+ state.audioCtx = null;
4588
+ }
4589
+ // Clear voice loading state
4590
+ if (state.voiceLoadTimer) {
4591
+ clearTimeout(state.voiceLoadTimer);
4592
+ state.voiceLoadTimer = null;
4593
+ }
4594
+ state.speakQueue.length = 0;
4595
+ state.voicesLoaded = false;
4596
+ state.waitingForServerTTS = false;
4597
+ console.log('[Voice] Deactivated');
4598
+ setPhase('idle');
4599
+ }
4600
+
4601
+ // Called by the message handler when assistant response arrives
4602
+ function onAssistantMessage(content) {
4603
+ if (!state.active) return;
4604
+ // Don't speak system or agent messages
4605
+ if (typeof content === 'string' && content.startsWith('[System]')) return;
4606
+ // If server TTS (Piper) is active, audio comes via binary WebSocket frames — skip browser TTS
4607
+ if (state.serverTTS) {
4608
+ console.log('[Voice] Using server TTS (Piper)');
4609
+ return;
4610
+ }
4611
+ // Safari without server TTS: no TTS available (browser TTS unreliable)
4612
+ if (state.isSafariClient) {
4613
+ console.log('[Voice] Safari — no server TTS available, skipping TTS');
4614
+ return;
4615
+ }
4616
+ speak(content);
4617
+ }
4618
+
4619
+ return {
4620
+ activate: activate,
4621
+ deactivate: deactivate,
4622
+ onAssistantMessage: onAssistantMessage,
4623
+ isActive: function () {
4624
+ return state.active;
4625
+ },
4626
+ // Server TTS methods
4627
+ setServerTTS: function (sampleRate) {
4628
+ console.log('[Voice] Using server TTS (Piper)');
4629
+ state.serverTTS = true;
4630
+ state.serverSampleRate = sampleRate || 22050;
4631
+ state.audioDone = false;
4632
+ state.waitingForServerTTS = false;
4633
+ },
4634
+ queuePCMAudio: queuePCMAudio,
4635
+ onVoiceAudioDone: function () {
4636
+ state.audioDone = true;
4637
+ // If not currently playing, trigger done immediately
4638
+ if (!state.audioPlaying && state.audioQueue.length === 0) {
4639
+ onServerSpeakDone();
4640
+ }
4641
+ },
4642
+ overlay: overlay,
4643
+ };
4644
+ }
4645
+
3959
4646
  function buildThreadPanel(threadName) {
3960
4647
  var state = getThreadState(threadName);
3961
4648
 
@@ -4222,9 +4909,49 @@
4222
4909
  inputRow.appendChild(attachBtn);
4223
4910
  inputRow.appendChild(inputEl);
4224
4911
  inputRow.appendChild(sendBtn);
4912
+
4913
+ // Mic button for voice mode
4914
+ var micBtn = document.createElement('button');
4915
+ micBtn.className = 'cumulus-mic-btn';
4916
+ micBtn.setAttribute('data-testid', 'webchat-mic-btn');
4917
+ micBtn.setAttribute('title', 'Voice mode');
4918
+ micBtn.innerHTML =
4919
+ '<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M12 2a3 3 0 0 0-3 3v7a3 3 0 0 0 6 0V5a3 3 0 0 0-3-3Z"/><path d="M19 10v2a7 7 0 0 1-14 0v-2"/><line x1="12" x2="12" y1="19" y2="22"/></svg>';
4920
+ if (!SpeechRecognitionApi) micBtn.style.display = 'none';
4921
+ inputRow.appendChild(micBtn);
4922
+
4225
4923
  inputArea.appendChild(inputRow);
4226
4924
  panel.appendChild(inputArea);
4227
4925
 
4926
+ // ── Voice mode setup ──
4927
+ var voiceMode = createVoiceMode(panel, threadName, function (spokenText) {
4928
+ // Send the spoken text with voiceMode flag — server injects voice prompt into system prompt
4929
+ if (!connection) return;
4930
+ state.messages.push({ role: 'user', content: spokenText });
4931
+ state.streaming = true;
4932
+ state.streamBuffer = '';
4933
+ renderPanelMessages();
4934
+ connection.send({
4935
+ type: 'message',
4936
+ threadName: threadName,
4937
+ message: spokenText,
4938
+ voiceMode: true,
4939
+ });
4940
+ });
4941
+
4942
+ // Store voice mode on thread state so the message handler can trigger TTS
4943
+ state._voiceMode = voiceMode;
4944
+
4945
+ micBtn.addEventListener('click', function () {
4946
+ if (voiceMode.isActive()) {
4947
+ voiceMode.deactivate();
4948
+ micBtn.classList.remove('active');
4949
+ } else {
4950
+ voiceMode.activate();
4951
+ micBtn.classList.add('active');
4952
+ }
4953
+ });
4954
+
4228
4955
  // ── Panel-local render functions ──
4229
4956
  function scrollToBottom() {
4230
4957
  requestAnimationFrame(function () {
@@ -4354,6 +5081,9 @@
4354
5081
  })(msg.id);
4355
5082
  row.appendChild(cb);
4356
5083
  }
5084
+ // Suppress thinking-role entries
5085
+ if (msg.role === 'thinking') continue;
5086
+
4357
5087
  if (msg.role === 'user' && isSystemMessage(msg.content)) {
4358
5088
  row.appendChild(buildSystemMsgEl(msg.content));
4359
5089
  } else if (msg.role === 'user') {
@@ -5174,6 +5904,16 @@
5174
5904
  });
5175
5905
  showAppView();
5176
5906
  registerPushNotifications();
5907
+ // Fetch and display version in topbar
5908
+ fetch(window.location.protocol + '//' + window.location.host + '/api/version')
5909
+ .then(function (r) {
5910
+ return r.json();
5911
+ })
5912
+ .then(function (v) {
5913
+ var vEl = document.querySelector('[data-testid="webchat-version"]');
5914
+ if (vEl && v.current) vEl.textContent = 'v' + v.current;
5915
+ })
5916
+ .catch(function () {});
5177
5917
  break;
5178
5918
 
5179
5919
  case 'auth_error':
@@ -5356,6 +6096,26 @@
5356
6096
  // Reserved for future verbose display
5357
6097
  break;
5358
6098
 
6099
+ case 'voice_info':
6100
+ // Server has Piper TTS available — switch to server-side audio
6101
+ if (data.threadName) {
6102
+ var state = getThreadState(data.threadName);
6103
+ if (state._voiceMode && state._voiceMode.isActive()) {
6104
+ state._voiceMode.setServerTTS(data.sampleRate);
6105
+ }
6106
+ }
6107
+ break;
6108
+
6109
+ case 'voice_audio_done':
6110
+ // Server finished sending all audio for this response
6111
+ if (data.threadName) {
6112
+ var state = getThreadState(data.threadName);
6113
+ if (state._voiceMode && state._voiceMode.isActive()) {
6114
+ state._voiceMode.onVoiceAudioDone();
6115
+ }
6116
+ }
6117
+ break;
6118
+
5359
6119
  case 'done':
5360
6120
  if (data.threadName) {
5361
6121
  clearStreamingTimeout(data.threadName);
@@ -5372,13 +6132,18 @@
5372
6132
  break;
5373
6133
  }
5374
6134
  state.streaming = false;
6135
+ var responseContent = data.response || state.streamBuffer;
5375
6136
  state.messages.push({
5376
6137
  role: 'assistant',
5377
- content: data.response || state.streamBuffer,
6138
+ content: responseContent,
5378
6139
  });
5379
6140
  state.streamBuffer = '';
5380
6141
  updateThreadActivity(data.threadName);
5381
6142
  refreshThreadPanel(data.threadName);
6143
+ // Voice mode: speak the assistant response
6144
+ if (state._voiceMode && state._voiceMode.isActive()) {
6145
+ state._voiceMode.onAssistantMessage(responseContent);
6146
+ }
5382
6147
  }
5383
6148
  break;
5384
6149
 
@@ -5605,6 +6370,17 @@
5605
6370
  apiKey: activeApiKey,
5606
6371
  sessionId: sessionId,
5607
6372
  onMessage: handleServerMessage,
6373
+ onBinary: function (arrayBuffer) {
6374
+ // Route binary PCM audio to the active voice mode session
6375
+ // Find which thread has active voice mode
6376
+ for (var tn in threadStates) {
6377
+ var ts = threadStates[tn];
6378
+ if (ts._voiceMode && ts._voiceMode.isActive()) {
6379
+ ts._voiceMode.queuePCMAudio(arrayBuffer);
6380
+ break;
6381
+ }
6382
+ }
6383
+ },
5608
6384
  onStatus: updateStatus,
5609
6385
  skipHistory: true, // standalone manages history per-thread
5610
6386
  });