myaiforone 1.1.63 → 1.1.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/public/voice-mode.html +83 -10
package/package.json
CHANGED
package/public/voice-mode.html
CHANGED
|
@@ -426,7 +426,8 @@ let currentJobId = null; // in-flight chat job id
|
|
|
426
426
|
let lastUserText = ''; // last transcribed user input
|
|
427
427
|
let lastAgentText = ''; // last agent response (full)
|
|
428
428
|
let lastAudioBlob = null; // cached TTS audio Blob for re-reading
|
|
429
|
-
let audioElement = null; // HTMLAudioElement currently playing TTS
|
|
429
|
+
let audioElement = null; // HTMLAudioElement currently playing TTS (reused — keeps iOS user-activation)
|
|
430
|
+
let audioUnlocked = false; // iOS Safari: have we primed audio in a user gesture yet?
|
|
430
431
|
|
|
431
432
|
// ─── Voice config (decides server-side vs browser path) ──
|
|
432
433
|
async function loadVoiceConfig(){
|
|
@@ -621,8 +622,41 @@ async function newSession(){
|
|
|
621
622
|
}
|
|
622
623
|
}
|
|
623
624
|
|
|
625
|
+
// ─── iOS Safari audio unlock ──────────────────────────
|
|
626
|
+
// Must be called SYNCHRONOUSLY from a user-gesture handler (e.g. the tap on the big button).
|
|
627
|
+
// After this primes successfully once, later async-triggered audio.play() and
|
|
628
|
+
// speechSynthesis.speak() calls are permitted by iOS for the lifetime of the page.
|
|
629
|
+
function unlockAudio(){
|
|
630
|
+
if (audioUnlocked) return;
|
|
631
|
+
try {
|
|
632
|
+
// 1) Warm an HTMLAudioElement we'll reuse for blob playback.
|
|
633
|
+
// Tiny silent WAV — plays instantly, satisfies the user-activation requirement.
|
|
634
|
+
if (!audioElement){
|
|
635
|
+
audioElement = new Audio();
|
|
636
|
+
audioElement.preload = 'auto';
|
|
637
|
+
audioElement.playsInline = true;
|
|
638
|
+
audioElement.setAttribute('playsinline', '');
|
|
639
|
+
}
|
|
640
|
+
audioElement.src = 'data:audio/wav;base64,UklGRiQAAABXQVZFZm10IBAAAAABAAEAESsAACJWAAACABAAZGF0YQAAAAA=';
|
|
641
|
+
const p = audioElement.play();
|
|
642
|
+
if (p && typeof p.then === 'function') p.catch(() => {});
|
|
643
|
+
} catch {}
|
|
644
|
+
try {
|
|
645
|
+
// 2) Warm speechSynthesis with an empty utterance.
|
|
646
|
+
if (window.speechSynthesis){
|
|
647
|
+
const u = new SpeechSynthesisUtterance('');
|
|
648
|
+
u.volume = 0;
|
|
649
|
+
window.speechSynthesis.speak(u);
|
|
650
|
+
}
|
|
651
|
+
} catch {}
|
|
652
|
+
audioUnlocked = true;
|
|
653
|
+
}
|
|
654
|
+
|
|
624
655
|
// ─── Big button dispatcher ────────────────────────────
|
|
625
656
|
function onBigBtn(){
|
|
657
|
+
// Run audio-unlock FIRST and synchronously — iOS only counts this as a user-gesture
|
|
658
|
+
// call if it happens before any await.
|
|
659
|
+
unlockAudio();
|
|
626
660
|
if (state === 'idle') startListening();
|
|
627
661
|
else if (state === 'listening') stopListeningAndSend();
|
|
628
662
|
else if (state === 'processing') abortJob();
|
|
@@ -909,27 +943,66 @@ async function playTtsForText(text){
|
|
|
909
943
|
|
|
910
944
|
function playBlob(blob){
|
|
911
945
|
return new Promise((resolve) => {
|
|
912
|
-
|
|
946
|
+
let done = false;
|
|
947
|
+
const url = URL.createObjectURL(blob);
|
|
948
|
+
const finish = () => { if (done) return; done = true; try { URL.revokeObjectURL(url); } catch {}; resolve(); };
|
|
949
|
+
// IMPORTANT: reuse the existing audioElement (warmed by unlockAudio on the tap),
|
|
950
|
+
// so iOS Safari still considers it user-activated. Creating `new Audio()` here
|
|
951
|
+
// produces an un-activated element that iOS silently refuses to play.
|
|
952
|
+
if (!audioElement){
|
|
953
|
+
audioElement = new Audio();
|
|
954
|
+
audioElement.preload = 'auto';
|
|
955
|
+
audioElement.playsInline = true;
|
|
956
|
+
audioElement.setAttribute('playsinline', '');
|
|
957
|
+
} else {
|
|
913
958
|
try { audioElement.pause(); } catch {}
|
|
914
|
-
try { URL.revokeObjectURL(audioElement.src); } catch {}
|
|
915
959
|
}
|
|
916
|
-
|
|
917
|
-
audioElement =
|
|
918
|
-
audioElement.
|
|
919
|
-
|
|
920
|
-
|
|
960
|
+
audioElement.onended = finish;
|
|
961
|
+
audioElement.onerror = finish;
|
|
962
|
+
audioElement.onpause = () => { // iOS Safari sometimes fires pause instead of ended at completion
|
|
963
|
+
if (audioElement && audioElement.currentTime >= (audioElement.duration || 0) - 0.25) finish();
|
|
964
|
+
};
|
|
965
|
+
// Hard ceiling: once we know the duration, force-resolve a bit after it would naturally end.
|
|
966
|
+
// Covers iOS Safari case where playback never actually starts or 'ended' is never fired.
|
|
967
|
+
let ceilingTimer = setTimeout(finish, 60000); // absolute fallback if metadata never loads
|
|
968
|
+
audioElement.onloadedmetadata = () => {
|
|
969
|
+
const dur = isFinite(audioElement.duration) ? audioElement.duration : 0;
|
|
970
|
+
if (dur > 0){
|
|
971
|
+
clearTimeout(ceilingTimer);
|
|
972
|
+
ceilingTimer = setTimeout(finish, Math.ceil(dur * 1000) + 2000);
|
|
973
|
+
}
|
|
974
|
+
};
|
|
975
|
+
audioElement.src = url;
|
|
976
|
+
try { audioElement.currentTime = 0; } catch {}
|
|
977
|
+
const p = audioElement.play();
|
|
978
|
+
if (p && typeof p.catch === 'function') p.catch(finish);
|
|
921
979
|
});
|
|
922
980
|
}
|
|
923
981
|
|
|
924
982
|
function speakBrowser(text){
|
|
925
983
|
return new Promise((resolve) => {
|
|
926
984
|
if (!window.speechSynthesis){ resolve(); return; }
|
|
985
|
+
let done = false;
|
|
986
|
+
const finish = () => { if (done) return; done = true; clearInterval(poll); resolve(); };
|
|
927
987
|
window.speechSynthesis.cancel();
|
|
928
988
|
const u = new SpeechSynthesisUtterance(text);
|
|
929
989
|
u.rate = 1.05;
|
|
930
|
-
u.onend =
|
|
931
|
-
u.onerror =
|
|
990
|
+
u.onend = finish;
|
|
991
|
+
u.onerror = finish;
|
|
932
992
|
window.speechSynthesis.speak(u);
|
|
993
|
+
// iOS Safari frequently drops the 'end' event on long utterances — poll the speaking flag.
|
|
994
|
+
// Allow a small grace period for synthesis to actually start before treating !speaking as done.
|
|
995
|
+
let started = false;
|
|
996
|
+
const startedBy = Date.now() + 1500;
|
|
997
|
+
const poll = setInterval(() => {
|
|
998
|
+
const sp = window.speechSynthesis;
|
|
999
|
+
if (!sp){ finish(); return; }
|
|
1000
|
+
if (sp.speaking) started = true;
|
|
1001
|
+
if (started && !sp.speaking && !sp.pending) finish();
|
|
1002
|
+
else if (!started && Date.now() > startedBy && !sp.speaking && !sp.pending) finish(); // never started
|
|
1003
|
+
}, 250);
|
|
1004
|
+
// Absolute hard ceiling so the UI can never get stuck.
|
|
1005
|
+
setTimeout(finish, Math.max(15000, Math.min(180000, text.length * 90)));
|
|
933
1006
|
});
|
|
934
1007
|
}
|
|
935
1008
|
|