npm - myaiforone - Versions diffs - 1.1.63 → 1.1.65 - Mend

myaiforone 1.1.63 → 1.1.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +1 -1
package/public/voice-mode.html +83 -10

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "myaiforone",
-  "version": "1.1.63",
+  "version": "1.1.65",
   "type": "module",
   "description": "Routes messages from phone channels to project-specific Claude Code agents",
   "bin": {

package/public/voice-mode.html CHANGED Viewed

@@ -426,7 +426,8 @@ let currentJobId = null;       // in-flight chat job id
 let lastUserText = '';         // last transcribed user input
 let lastAgentText = '';        // last agent response (full)
 let lastAudioBlob = null;      // cached TTS audio Blob for re-reading
-let audioElement = null;       // HTMLAudioElement currently playing TTS
+let audioElement = null;       // HTMLAudioElement currently playing TTS (reused — keeps iOS user-activation)
+let audioUnlocked = false;     // iOS Safari: have we primed audio in a user gesture yet?
 // ─── Voice config (decides server-side vs browser path) ──
 async function loadVoiceConfig(){
@@ -621,8 +622,41 @@ async function newSession(){
   }
 }
+// ─── iOS Safari audio unlock ──────────────────────────
+// Must be called SYNCHRONOUSLY from a user-gesture handler (e.g. the tap on the big button).
+// After this primes successfully once, later async-triggered audio.play() and
+// speechSynthesis.speak() calls are permitted by iOS for the lifetime of the page.
+function unlockAudio(){
+  if (audioUnlocked) return;
+  try {
+    // 1) Warm an HTMLAudioElement we'll reuse for blob playback.
+    //    Tiny silent WAV — plays instantly, satisfies the user-activation requirement.
+    if (!audioElement){
+      audioElement = new Audio();
+      audioElement.preload = 'auto';
+      audioElement.playsInline = true;
+      audioElement.setAttribute('playsinline', '');
+    }
+    audioElement.src = 'data:audio/wav;base64,UklGRiQAAABXQVZFZm10IBAAAAABAAEAESsAACJWAAACABAAZGF0YQAAAAA=';
+    const p = audioElement.play();
+    if (p && typeof p.then === 'function') p.catch(() => {});
+  } catch {}
+  try {
+    // 2) Warm speechSynthesis with an empty utterance.
+    if (window.speechSynthesis){
+      const u = new SpeechSynthesisUtterance('');
+      u.volume = 0;
+      window.speechSynthesis.speak(u);
+    }
+  } catch {}
+  audioUnlocked = true;
+}
 // ─── Big button dispatcher ────────────────────────────
 function onBigBtn(){
+  // Run audio-unlock FIRST and synchronously — iOS only counts this as a user-gesture
+  // call if it happens before any await.
+  unlockAudio();
   if (state === 'idle')         startListening();
   else if (state === 'listening')  stopListeningAndSend();
   else if (state === 'processing') abortJob();
@@ -909,27 +943,66 @@ async function playTtsForText(text){
 function playBlob(blob){
   return new Promise((resolve) => {
-    if (audioElement){
+    let done = false;
+    const url = URL.createObjectURL(blob);
+    const finish = () => { if (done) return; done = true; try { URL.revokeObjectURL(url); } catch {}; resolve(); };
+    // IMPORTANT: reuse the existing audioElement (warmed by unlockAudio on the tap),
+    // so iOS Safari still considers it user-activated. Creating `new Audio()` here
+    // produces an un-activated element that iOS silently refuses to play.
+    if (!audioElement){
+      audioElement = new Audio();
+      audioElement.preload = 'auto';
+      audioElement.playsInline = true;
+      audioElement.setAttribute('playsinline', '');
+    } else {
       try { audioElement.pause(); } catch {}
-      try { URL.revokeObjectURL(audioElement.src); } catch {}
     }
-    const url = URL.createObjectURL(blob);
-    audioElement = new Audio(url);
-    audioElement.onended = () => { try { URL.revokeObjectURL(url); } catch {}; resolve(); };
-    audioElement.onerror = () => { resolve(); };
-    audioElement.play().catch(() => resolve());
+    audioElement.onended = finish;
+    audioElement.onerror = finish;
+    audioElement.onpause = () => { // iOS Safari sometimes fires pause instead of ended at completion
+      if (audioElement && audioElement.currentTime >= (audioElement.duration || 0) - 0.25) finish();
+    };
+    // Hard ceiling: once we know the duration, force-resolve a bit after it would naturally end.
+    // Covers iOS Safari case where playback never actually starts or 'ended' is never fired.
+    let ceilingTimer = setTimeout(finish, 60000); // absolute fallback if metadata never loads
+    audioElement.onloadedmetadata = () => {
+      const dur = isFinite(audioElement.duration) ? audioElement.duration : 0;
+      if (dur > 0){
+        clearTimeout(ceilingTimer);
+        ceilingTimer = setTimeout(finish, Math.ceil(dur * 1000) + 2000);
+      }
+    };
+    audioElement.src = url;
+    try { audioElement.currentTime = 0; } catch {}
+    const p = audioElement.play();
+    if (p && typeof p.catch === 'function') p.catch(finish);
   });
 }
 function speakBrowser(text){
   return new Promise((resolve) => {
     if (!window.speechSynthesis){ resolve(); return; }
+    let done = false;
+    const finish = () => { if (done) return; done = true; clearInterval(poll); resolve(); };
     window.speechSynthesis.cancel();
     const u = new SpeechSynthesisUtterance(text);
     u.rate = 1.05;
-    u.onend = () => resolve();
-    u.onerror = () => resolve();
+    u.onend = finish;
+    u.onerror = finish;
     window.speechSynthesis.speak(u);
+    // iOS Safari frequently drops the 'end' event on long utterances — poll the speaking flag.
+    // Allow a small grace period for synthesis to actually start before treating !speaking as done.
+    let started = false;
+    const startedBy = Date.now() + 1500;
+    const poll = setInterval(() => {
+      const sp = window.speechSynthesis;
+      if (!sp){ finish(); return; }
+      if (sp.speaking) started = true;
+      if (started && !sp.speaking && !sp.pending) finish();
+      else if (!started && Date.now() > startedBy && !sp.speaking && !sp.pending) finish(); // never started
+    }, 250);
+    // Absolute hard ceiling so the UI can never get stuck.
+    setTimeout(finish, Math.max(15000, Math.min(180000, text.length * 90)));
   });
 }