npm - fluxy-bot - Versions diffs - 0.8.4 → 0.8.6 - Mend

fluxy-bot 0.8.4 → 0.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist-fluxy/assets/{fluxy-V26b7AxV.js → fluxy-CY9PvJvb.js} +30 -30
package/dist-fluxy/fluxy.html +1 -1
package/package.json +1 -1
package/supervisor/chat/src/components/Chat/InputBar.tsx +85 -67
package/supervisor/chat/src/hooks/useSpeechRecognition.ts +36 -10

package/dist-fluxy/fluxy.html CHANGED Viewed

@@ -4,7 +4,7 @@
     <meta charset="UTF-8" />
     <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, interactive-widget=resizes-content" />
     <title>Fluxy Chat</title>
-    <script type="module" crossorigin src="/fluxy/assets/fluxy-V26b7AxV.js"></script>
+    <script type="module" crossorigin src="/fluxy/assets/fluxy-CY9PvJvb.js"></script>
     <link rel="modulepreload" crossorigin href="/fluxy/assets/globals-DYbVw4jJ.js">
     <link rel="stylesheet" crossorigin href="/fluxy/assets/globals-BrtAPuMR.css">
   </head>

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "fluxy-bot",
-  "version": "0.8.4",
+  "version": "0.8.6",
   "releaseNotes": [
     "Fixed some bugs to iOs ",
     "2. ",

package/supervisor/chat/src/components/Chat/InputBar.tsx CHANGED Viewed

@@ -58,6 +58,7 @@ const DRAFT_KEY = 'fluxy_draft';
 export default function InputBar({ onSend, onStop, streaming, whisperEnabled, onTranscribe }: Props) {
   const { start: startSpeech, stop: stopSpeech, abort: abortSpeech, isSupported: webSpeechSupported } = useSpeechRecognition();
   const voiceEnabled = whisperEnabled || webSpeechSupported;
+  console.log('[InputBar] render - whisperEnabled:', whisperEnabled, 'webSpeechSupported:', webSpeechSupported, 'voiceEnabled:', voiceEnabled);
   const [text, setText] = useState(() => {
     try { return localStorage.getItem(DRAFT_KEY) || ''; } catch { return ''; }
   });
@@ -107,7 +108,8 @@ export default function InputBar({ onSend, onStop, streaming, whisperEnabled, on
     return () => { if (intervalRef.current) clearInterval(intervalRef.current); };
   }, [isRecording]);
-  const stopRecording = useCallback((cancelled: boolean) => {
+  const stopRecording = useCallback(async (cancelled: boolean) => {
+    console.log('[InputBar] stopRecording called, cancelled:', cancelled, 'recorder:', !!mediaRecorderRef.current, 'whisper:', whisperEnabled);
     if (intervalRef.current) clearInterval(intervalRef.current);
     if (holdTimerRef.current) { clearTimeout(holdTimerRef.current); holdTimerRef.current = null; }
     isHolding.current = false;
@@ -116,77 +118,76 @@ export default function InputBar({ onSend, onStop, streaming, whisperEnabled, on
     const stream = streamRef.current;
     if (cancelled) {
-      // Clean up mic + abort speech recognition
       stream?.getTracks().forEach((t) => t.stop());
       mediaRecorderRef.current = null;
       streamRef.current = null;
       audioChunksRef.current = [];
       abortSpeech();
-    } else if (!recorder || recorder.state === 'inactive') {
-      // No recorder active, clean up
-      stream?.getTracks().forEach((t) => t.stop());
-      mediaRecorderRef.current = null;
-      streamRef.current = null;
-      audioChunksRef.current = [];
-      abortSpeech();
-    } else {
-      // Stop recorder — ondataavailable + onstop will fire
+    } else if (recorder && recorder.state !== 'inactive') {
+      // Whisper path: stop MediaRecorder and use its audio
       recorder.onstop = async () => {
+        console.log('[InputBar] recorder.onstop fired, chunks:', audioChunksRef.current.length);
         stream?.getTracks().forEach((t) => t.stop());
         const blob = new Blob(audioChunksRef.current, { type: 'audio/webm' });
         audioChunksRef.current = [];
         mediaRecorderRef.current = null;
         streamRef.current = null;
-        if (blob.size < 1000) {
-          abortSpeech();
-          return; // too small, skip
-        }
+        console.log('[InputBar] blob size:', blob.size);
+        if (blob.size < 1000) return;
-        // Convert to base64
-        const reader = new FileReader();
-        reader.onloadend = async () => {
-          const dataUrl = reader.result as string;
+        const fileReader = new FileReader();
+        fileReader.onloadend = async () => {
+          const dataUrl = fileReader.result as string;
           const base64 = dataUrl.split(',')[1];
           if (!base64) return;
+          console.log('[InputBar] Whisper path, base64 length:', base64.length);
           try {
-            // Whisper path: send audio to Whisper API for transcription
-            if (whisperEnabled && (onTranscribe || true)) {
-              let data: { transcript?: string };
-              if (onTranscribe) {
-                data = await onTranscribe(base64);
-              } else {
-                const res = await fetch('/api/whisper/transcribe', {
-                  method: 'POST',
-                  headers: { 'Content-Type': 'application/json' },
-                  body: JSON.stringify({ audio: base64 }),
-                });
-                data = await res.json();
-              }
-              abortSpeech(); // discard Web Speech results when using Whisper
-              if (data.transcript?.trim()) {
-                const pendingAtts = attachments.length > 0 ? attachments : undefined;
-                onSend(data.transcript.trim(), pendingAtts, dataUrl);
-                if (pendingAtts) setAttachments([]);
-              }
+            let data: { transcript?: string };
+            if (onTranscribe) {
+              data = await onTranscribe(base64);
             } else {
-              // Web Speech API path: use accumulated transcript
-              const transcript = await stopSpeech();
-              if (transcript.trim()) {
-                const pendingAtts = attachments.length > 0 ? attachments : undefined;
-                onSend(transcript.trim(), pendingAtts, dataUrl);
-                if (pendingAtts) setAttachments([]);
-              }
+              const res = await fetch('/api/whisper/transcribe', {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({ audio: base64 }),
+              });
+              data = await res.json();
+            }
+            if (data.transcript?.trim()) {
+              const pendingAtts = attachments.length > 0 ? attachments : undefined;
+              onSend(data.transcript.trim(), pendingAtts, dataUrl);
+              if (pendingAtts) setAttachments([]);
             }
-          } catch {
-            abortSpeech();
-            // Transcription failed silently
+          } catch (err) {
+            console.error('[InputBar] Whisper transcription error:', err);
           }
         };
-        reader.readAsDataURL(blob);
+        fileReader.readAsDataURL(blob);
       };
       recorder.stop();
+    } else {
+      // Web Speech API path (no MediaRecorder): get transcript directly
+      console.log('[InputBar] Web Speech path, stopping speech recognition...');
+      stream?.getTracks().forEach((t) => t.stop());
+      mediaRecorderRef.current = null;
+      streamRef.current = null;
+      audioChunksRef.current = [];
+      try {
+        const transcript = await stopSpeech();
+        console.log('[InputBar] Web Speech transcript:', JSON.stringify(transcript));
+        if (transcript.trim()) {
+          const pendingAtts = attachments.length > 0 ? attachments : undefined;
+          onSend(transcript.trim(), pendingAtts);
+          if (pendingAtts) setAttachments([]);
+        } else {
+          console.log('[InputBar] Web Speech transcript was empty');
+        }
+      } catch (err) {
+        console.error('[InputBar] Web Speech stop error:', err);
+      }
     }
     if (micRef.current) micRef.current.style.transform = '';
@@ -262,38 +263,50 @@ export default function InputBar({ onSend, onStop, streaming, whisperEnabled, on
   // ── Mic pointer handlers ──
   const handleMicDown = useCallback((e: RPointerEvent) => {
+    console.log('[InputBar] handleMicDown fired, voiceEnabled:', voiceEnabled, 'whisper:', whisperEnabled);
     e.preventDefault();
     startXRef.current = e.clientX;
     dragRef.current = 0;
     (e.currentTarget as HTMLElement).setPointerCapture(e.pointerId);
     holdTimerRef.current = setTimeout(async () => {
-      if (!voiceEnabled) return;
+      console.log('[InputBar] hold timer fired, voiceEnabled:', voiceEnabled, 'whisper:', whisperEnabled);
+      if (!voiceEnabled) {
+        console.log('[InputBar] voiceEnabled is false, returning');
+        return;
+      }
       try {
-        const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
-        streamRef.current = stream;
-        const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') ? 'audio/webm;codecs=opus' : 'audio/webm';
-        const recorder = new MediaRecorder(stream, { mimeType });
-        audioChunksRef.current = [];
-        recorder.ondataavailable = (e) => {
-          if (e.data.size > 0) audioChunksRef.current.push(e.data);
-        };
-        mediaRecorderRef.current = recorder;
-        recorder.start();
-        // Start Web Speech API alongside MediaRecorder (used as fallback or primary)
-        if (webSpeechSupported) {
+        if (whisperEnabled) {
+          // Whisper path: need getUserMedia + MediaRecorder for audio capture
+          console.log('[InputBar] Whisper path: requesting getUserMedia...');
+          const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+          console.log('[InputBar] getUserMedia succeeded, tracks:', stream.getTracks().length);
+          streamRef.current = stream;
+          const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') ? 'audio/webm;codecs=opus' : 'audio/webm';
+          const recorder = new MediaRecorder(stream, { mimeType });
+          audioChunksRef.current = [];
+          recorder.ondataavailable = (ev) => {
+            if (ev.data.size > 0) audioChunksRef.current.push(ev.data);
+          };
+          mediaRecorderRef.current = recorder;
+          recorder.start();
+          console.log('[InputBar] MediaRecorder started');
+        } else {
+          // Web Speech path: only SpeechRecognition, no getUserMedia (avoids mic conflict on mobile)
+          console.log('[InputBar] Web Speech path: starting SpeechRecognition only...');
           startSpeech();
+          console.log('[InputBar] SpeechRecognition started');
         }
         isHolding.current = true;
         setIsRecording(true);
         setRecordingTime(0);
-      } catch {
-        // Mic permission denied or not available
+        console.log('[InputBar] recording started, isHolding=true');
+      } catch (err) {
+        console.error('[InputBar] recording setup failed:', err);
       }
     }, 200);
-  }, [voiceEnabled, webSpeechSupported, startSpeech]);
+  }, [voiceEnabled, whisperEnabled, startSpeech]);
   const handleMicMove = useCallback((e: RPointerEvent) => {
     if (!isHolding.current) return;
@@ -311,12 +324,17 @@ export default function InputBar({ onSend, onStop, streaming, whisperEnabled, on
   }, [stopRecording]);
   const handleMicUp = useCallback(() => {
+    console.log('[InputBar] handleMicUp, isHolding:', isHolding.current);
     if (holdTimerRef.current) { clearTimeout(holdTimerRef.current); holdTimerRef.current = null; }
-    if (!isHolding.current) return;
+    if (!isHolding.current) {
+      console.log('[InputBar] handleMicUp - not holding, ignoring');
+      return;
+    }
     stopRecording(false);
   }, [stopRecording]);
   const handleMicCancel = useCallback(() => {
+    console.log('[InputBar] handleMicCancel fired, isHolding:', isHolding.current, 'holdTimer:', !!holdTimerRef.current);
     if (holdTimerRef.current) { clearTimeout(holdTimerRef.current); holdTimerRef.current = null; }
     if (isHolding.current) stopRecording(true);
   }, [stopRecording]);

package/supervisor/chat/src/hooks/useSpeechRecognition.ts CHANGED Viewed

@@ -22,14 +22,20 @@ interface SpeechRecognitionInstance extends EventTarget {
 const getSpeechRecognitionCtor = (): SpeechRecognitionConstructor | null => {
   if (typeof window === 'undefined') return null;
-  return (
+  const ctor = (
     (window as unknown as Record<string, SpeechRecognitionConstructor>).SpeechRecognition ??
     (window as unknown as Record<string, SpeechRecognitionConstructor>).webkitSpeechRecognition ??
     null
   );
+  console.log('[SpeechRecognition] getCtor:', ctor ? 'found' : 'NOT found', {
+    hasSpeechRecognition: 'SpeechRecognition' in window,
+    hasWebkit: 'webkitSpeechRecognition' in window,
+  });
+  return ctor;
 };
 export const isWebSpeechSupported = getSpeechRecognitionCtor() !== null;
+console.log('[SpeechRecognition] isWebSpeechSupported:', isWebSpeechSupported);
 export function useSpeechRecognition() {
   const instanceRef = useRef<SpeechRecognitionInstance | null>(null);
@@ -40,11 +46,16 @@ export function useSpeechRecognition() {
   const isSupported = useMemo(() => isWebSpeechSupported, []);
   const start = useCallback(() => {
+    console.log('[SpeechRecognition] start() called');
     const Ctor = getSpeechRecognitionCtor();
-    if (!Ctor) return;
+    if (!Ctor) {
+      console.log('[SpeechRecognition] start() - no constructor available');
+      return;
+    }
     // Clean up any lingering instance
     if (instanceRef.current) {
+      console.log('[SpeechRecognition] start() - aborting previous instance');
       try { instanceRef.current.abort(); } catch {}
     }
@@ -52,6 +63,7 @@ export function useSpeechRecognition() {
     recognition.continuous = true;
     recognition.interimResults = true;
     recognition.lang = navigator.language || 'en-US';
+    console.log('[SpeechRecognition] start() - created instance, lang:', recognition.lang);
     transcriptRef.current = '';
     activeRef.current = true;
@@ -59,24 +71,30 @@ export function useSpeechRecognition() {
     recognition.onresult = (event: SpeechRecognitionEvent) => {
       let finalTranscript = '';
+      let interimTranscript = '';
       for (let i = 0; i < event.results.length; i++) {
+        const text = event.results[i][0].transcript;
         if (event.results[i].isFinal) {
-          finalTranscript += event.results[i][0].transcript;
+          finalTranscript += text;
+        } else {
+          interimTranscript += text;
         }
       }
-      if (finalTranscript) {
-        transcriptRef.current = finalTranscript;
-      }
+      console.log('[SpeechRecognition] onresult - final:', JSON.stringify(finalTranscript), 'interim:', JSON.stringify(interimTranscript));
+      // Always keep the best transcript available (final preferred, interim as fallback)
+      transcriptRef.current = finalTranscript || interimTranscript;
     };
     recognition.onend = () => {
+      console.log('[SpeechRecognition] onend - activeRef:', activeRef.current);
       // Auto-restart if we're still supposed to be listening (handles silence gaps)
       if (activeRef.current) {
-        try { recognition.start(); } catch {}
+        try { recognition.start(); } catch (e) { console.error('[SpeechRecognition] restart failed:', e); }
       }
     };
-    recognition.onerror = () => {
+    recognition.onerror = (event) => {
+      console.error('[SpeechRecognition] onerror:', event.error);
       // On error, stop gracefully
       if (activeRef.current) {
         activeRef.current = false;
@@ -85,14 +103,21 @@ export function useSpeechRecognition() {
       }
     };
-    try { recognition.start(); } catch {}
+    try {
+      recognition.start();
+      console.log('[SpeechRecognition] start() - recognition.start() succeeded');
+    } catch (e) {
+      console.error('[SpeechRecognition] start() - recognition.start() failed:', e);
+    }
   }, []);
   const stop = useCallback((): Promise<string> => {
+    console.log('[SpeechRecognition] stop() called, current transcript:', JSON.stringify(transcriptRef.current));
     return new Promise((resolve) => {
       activeRef.current = false;
       const instance = instanceRef.current;
       if (!instance) {
+        console.log('[SpeechRecognition] stop() - no instance, resolving with:', JSON.stringify(transcriptRef.current));
         resolve(transcriptRef.current);
         return;
       }
@@ -101,11 +126,12 @@ export function useSpeechRecognition() {
       // Give a brief moment for any final results, then resolve
       setTimeout(() => {
-        try { instance.stop(); } catch {}
+        try { instance.stop(); } catch (e) { console.error('[SpeechRecognition] stop() - instance.stop() failed:', e); }
         instanceRef.current = null;
         // Resolve with whatever we accumulated
         const text = transcriptRef.current;
+        console.log('[SpeechRecognition] stop() - resolving with:', JSON.stringify(text));
         if (resolveRef.current) {
           resolveRef.current(text);
           resolveRef.current = null;