fluxy-bot 0.8.4 → 0.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@
4
4
  <meta charset="UTF-8" />
5
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, interactive-widget=resizes-content" />
6
6
  <title>Fluxy Chat</title>
7
- <script type="module" crossorigin src="/fluxy/assets/fluxy-V26b7AxV.js"></script>
7
+ <script type="module" crossorigin src="/fluxy/assets/fluxy-CY9PvJvb.js"></script>
8
8
  <link rel="modulepreload" crossorigin href="/fluxy/assets/globals-DYbVw4jJ.js">
9
9
  <link rel="stylesheet" crossorigin href="/fluxy/assets/globals-BrtAPuMR.css">
10
10
  </head>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "fluxy-bot",
3
- "version": "0.8.4",
3
+ "version": "0.8.6",
4
4
  "releaseNotes": [
5
5
  "Fixed some bugs to iOs ",
6
6
  "2. ",
@@ -58,6 +58,7 @@ const DRAFT_KEY = 'fluxy_draft';
58
58
  export default function InputBar({ onSend, onStop, streaming, whisperEnabled, onTranscribe }: Props) {
59
59
  const { start: startSpeech, stop: stopSpeech, abort: abortSpeech, isSupported: webSpeechSupported } = useSpeechRecognition();
60
60
  const voiceEnabled = whisperEnabled || webSpeechSupported;
61
+ console.log('[InputBar] render - whisperEnabled:', whisperEnabled, 'webSpeechSupported:', webSpeechSupported, 'voiceEnabled:', voiceEnabled);
61
62
  const [text, setText] = useState(() => {
62
63
  try { return localStorage.getItem(DRAFT_KEY) || ''; } catch { return ''; }
63
64
  });
@@ -107,7 +108,8 @@ export default function InputBar({ onSend, onStop, streaming, whisperEnabled, on
107
108
  return () => { if (intervalRef.current) clearInterval(intervalRef.current); };
108
109
  }, [isRecording]);
109
110
 
110
- const stopRecording = useCallback((cancelled: boolean) => {
111
+ const stopRecording = useCallback(async (cancelled: boolean) => {
112
+ console.log('[InputBar] stopRecording called, cancelled:', cancelled, 'recorder:', !!mediaRecorderRef.current, 'whisper:', whisperEnabled);
111
113
  if (intervalRef.current) clearInterval(intervalRef.current);
112
114
  if (holdTimerRef.current) { clearTimeout(holdTimerRef.current); holdTimerRef.current = null; }
113
115
  isHolding.current = false;
@@ -116,77 +118,76 @@ export default function InputBar({ onSend, onStop, streaming, whisperEnabled, on
116
118
  const stream = streamRef.current;
117
119
 
118
120
  if (cancelled) {
119
- // Clean up mic + abort speech recognition
120
121
  stream?.getTracks().forEach((t) => t.stop());
121
122
  mediaRecorderRef.current = null;
122
123
  streamRef.current = null;
123
124
  audioChunksRef.current = [];
124
125
  abortSpeech();
125
- } else if (!recorder || recorder.state === 'inactive') {
126
- // No recorder active, clean up
127
- stream?.getTracks().forEach((t) => t.stop());
128
- mediaRecorderRef.current = null;
129
- streamRef.current = null;
130
- audioChunksRef.current = [];
131
- abortSpeech();
132
- } else {
133
- // Stop recorder — ondataavailable + onstop will fire
126
+ } else if (recorder && recorder.state !== 'inactive') {
127
+ // Whisper path: stop MediaRecorder and use its audio
134
128
  recorder.onstop = async () => {
129
+ console.log('[InputBar] recorder.onstop fired, chunks:', audioChunksRef.current.length);
135
130
  stream?.getTracks().forEach((t) => t.stop());
136
131
  const blob = new Blob(audioChunksRef.current, { type: 'audio/webm' });
137
132
  audioChunksRef.current = [];
138
133
  mediaRecorderRef.current = null;
139
134
  streamRef.current = null;
140
135
 
141
- if (blob.size < 1000) {
142
- abortSpeech();
143
- return; // too small, skip
144
- }
136
+ console.log('[InputBar] blob size:', blob.size);
137
+ if (blob.size < 1000) return;
145
138
 
146
- // Convert to base64
147
- const reader = new FileReader();
148
- reader.onloadend = async () => {
149
- const dataUrl = reader.result as string;
139
+ const fileReader = new FileReader();
140
+ fileReader.onloadend = async () => {
141
+ const dataUrl = fileReader.result as string;
150
142
  const base64 = dataUrl.split(',')[1];
151
143
  if (!base64) return;
152
144
 
145
+ console.log('[InputBar] Whisper path, base64 length:', base64.length);
153
146
  try {
154
- // Whisper path: send audio to Whisper API for transcription
155
- if (whisperEnabled && (onTranscribe || true)) {
156
- let data: { transcript?: string };
157
- if (onTranscribe) {
158
- data = await onTranscribe(base64);
159
- } else {
160
- const res = await fetch('/api/whisper/transcribe', {
161
- method: 'POST',
162
- headers: { 'Content-Type': 'application/json' },
163
- body: JSON.stringify({ audio: base64 }),
164
- });
165
- data = await res.json();
166
- }
167
- abortSpeech(); // discard Web Speech results when using Whisper
168
- if (data.transcript?.trim()) {
169
- const pendingAtts = attachments.length > 0 ? attachments : undefined;
170
- onSend(data.transcript.trim(), pendingAtts, dataUrl);
171
- if (pendingAtts) setAttachments([]);
172
- }
147
+ let data: { transcript?: string };
148
+ if (onTranscribe) {
149
+ data = await onTranscribe(base64);
173
150
  } else {
174
- // Web Speech API path: use accumulated transcript
175
- const transcript = await stopSpeech();
176
- if (transcript.trim()) {
177
- const pendingAtts = attachments.length > 0 ? attachments : undefined;
178
- onSend(transcript.trim(), pendingAtts, dataUrl);
179
- if (pendingAtts) setAttachments([]);
180
- }
151
+ const res = await fetch('/api/whisper/transcribe', {
152
+ method: 'POST',
153
+ headers: { 'Content-Type': 'application/json' },
154
+ body: JSON.stringify({ audio: base64 }),
155
+ });
156
+ data = await res.json();
157
+ }
158
+ if (data.transcript?.trim()) {
159
+ const pendingAtts = attachments.length > 0 ? attachments : undefined;
160
+ onSend(data.transcript.trim(), pendingAtts, dataUrl);
161
+ if (pendingAtts) setAttachments([]);
181
162
  }
182
- } catch {
183
- abortSpeech();
184
- // Transcription failed silently
163
+ } catch (err) {
164
+ console.error('[InputBar] Whisper transcription error:', err);
185
165
  }
186
166
  };
187
- reader.readAsDataURL(blob);
167
+ fileReader.readAsDataURL(blob);
188
168
  };
189
169
  recorder.stop();
170
+ } else {
171
+ // Web Speech API path (no MediaRecorder): get transcript directly
172
+ console.log('[InputBar] Web Speech path, stopping speech recognition...');
173
+ stream?.getTracks().forEach((t) => t.stop());
174
+ mediaRecorderRef.current = null;
175
+ streamRef.current = null;
176
+ audioChunksRef.current = [];
177
+
178
+ try {
179
+ const transcript = await stopSpeech();
180
+ console.log('[InputBar] Web Speech transcript:', JSON.stringify(transcript));
181
+ if (transcript.trim()) {
182
+ const pendingAtts = attachments.length > 0 ? attachments : undefined;
183
+ onSend(transcript.trim(), pendingAtts);
184
+ if (pendingAtts) setAttachments([]);
185
+ } else {
186
+ console.log('[InputBar] Web Speech transcript was empty');
187
+ }
188
+ } catch (err) {
189
+ console.error('[InputBar] Web Speech stop error:', err);
190
+ }
190
191
  }
191
192
 
192
193
  if (micRef.current) micRef.current.style.transform = '';
@@ -262,38 +263,50 @@ export default function InputBar({ onSend, onStop, streaming, whisperEnabled, on
262
263
 
263
264
  // ── Mic pointer handlers ──
264
265
  const handleMicDown = useCallback((e: RPointerEvent) => {
266
+ console.log('[InputBar] handleMicDown fired, voiceEnabled:', voiceEnabled, 'whisper:', whisperEnabled);
265
267
  e.preventDefault();
266
268
  startXRef.current = e.clientX;
267
269
  dragRef.current = 0;
268
270
  (e.currentTarget as HTMLElement).setPointerCapture(e.pointerId);
269
271
 
270
272
  holdTimerRef.current = setTimeout(async () => {
271
- if (!voiceEnabled) return;
273
+ console.log('[InputBar] hold timer fired, voiceEnabled:', voiceEnabled, 'whisper:', whisperEnabled);
274
+ if (!voiceEnabled) {
275
+ console.log('[InputBar] voiceEnabled is false, returning');
276
+ return;
277
+ }
272
278
  try {
273
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
274
- streamRef.current = stream;
275
- const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') ? 'audio/webm;codecs=opus' : 'audio/webm';
276
- const recorder = new MediaRecorder(stream, { mimeType });
277
- audioChunksRef.current = [];
278
- recorder.ondataavailable = (e) => {
279
- if (e.data.size > 0) audioChunksRef.current.push(e.data);
280
- };
281
- mediaRecorderRef.current = recorder;
282
- recorder.start();
283
-
284
- // Start Web Speech API alongside MediaRecorder (used as fallback or primary)
285
- if (webSpeechSupported) {
279
+ if (whisperEnabled) {
280
+ // Whisper path: need getUserMedia + MediaRecorder for audio capture
281
+ console.log('[InputBar] Whisper path: requesting getUserMedia...');
282
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
283
+ console.log('[InputBar] getUserMedia succeeded, tracks:', stream.getTracks().length);
284
+ streamRef.current = stream;
285
+ const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') ? 'audio/webm;codecs=opus' : 'audio/webm';
286
+ const recorder = new MediaRecorder(stream, { mimeType });
287
+ audioChunksRef.current = [];
288
+ recorder.ondataavailable = (ev) => {
289
+ if (ev.data.size > 0) audioChunksRef.current.push(ev.data);
290
+ };
291
+ mediaRecorderRef.current = recorder;
292
+ recorder.start();
293
+ console.log('[InputBar] MediaRecorder started');
294
+ } else {
295
+ // Web Speech path: only SpeechRecognition, no getUserMedia (avoids mic conflict on mobile)
296
+ console.log('[InputBar] Web Speech path: starting SpeechRecognition only...');
286
297
  startSpeech();
298
+ console.log('[InputBar] SpeechRecognition started');
287
299
  }
288
300
 
289
301
  isHolding.current = true;
290
302
  setIsRecording(true);
291
303
  setRecordingTime(0);
292
- } catch {
293
- // Mic permission denied or not available
304
+ console.log('[InputBar] recording started, isHolding=true');
305
+ } catch (err) {
306
+ console.error('[InputBar] recording setup failed:', err);
294
307
  }
295
308
  }, 200);
296
- }, [voiceEnabled, webSpeechSupported, startSpeech]);
309
+ }, [voiceEnabled, whisperEnabled, startSpeech]);
297
310
 
298
311
  const handleMicMove = useCallback((e: RPointerEvent) => {
299
312
  if (!isHolding.current) return;
@@ -311,12 +324,17 @@ export default function InputBar({ onSend, onStop, streaming, whisperEnabled, on
311
324
  }, [stopRecording]);
312
325
 
313
326
  const handleMicUp = useCallback(() => {
327
+ console.log('[InputBar] handleMicUp, isHolding:', isHolding.current);
314
328
  if (holdTimerRef.current) { clearTimeout(holdTimerRef.current); holdTimerRef.current = null; }
315
- if (!isHolding.current) return;
329
+ if (!isHolding.current) {
330
+ console.log('[InputBar] handleMicUp - not holding, ignoring');
331
+ return;
332
+ }
316
333
  stopRecording(false);
317
334
  }, [stopRecording]);
318
335
 
319
336
  const handleMicCancel = useCallback(() => {
337
+ console.log('[InputBar] handleMicCancel fired, isHolding:', isHolding.current, 'holdTimer:', !!holdTimerRef.current);
320
338
  if (holdTimerRef.current) { clearTimeout(holdTimerRef.current); holdTimerRef.current = null; }
321
339
  if (isHolding.current) stopRecording(true);
322
340
  }, [stopRecording]);
@@ -22,14 +22,20 @@ interface SpeechRecognitionInstance extends EventTarget {
22
22
 
23
23
  const getSpeechRecognitionCtor = (): SpeechRecognitionConstructor | null => {
24
24
  if (typeof window === 'undefined') return null;
25
- return (
25
+ const ctor = (
26
26
  (window as unknown as Record<string, SpeechRecognitionConstructor>).SpeechRecognition ??
27
27
  (window as unknown as Record<string, SpeechRecognitionConstructor>).webkitSpeechRecognition ??
28
28
  null
29
29
  );
30
+ console.log('[SpeechRecognition] getCtor:', ctor ? 'found' : 'NOT found', {
31
+ hasSpeechRecognition: 'SpeechRecognition' in window,
32
+ hasWebkit: 'webkitSpeechRecognition' in window,
33
+ });
34
+ return ctor;
30
35
  };
31
36
 
32
37
  export const isWebSpeechSupported = getSpeechRecognitionCtor() !== null;
38
+ console.log('[SpeechRecognition] isWebSpeechSupported:', isWebSpeechSupported);
33
39
 
34
40
  export function useSpeechRecognition() {
35
41
  const instanceRef = useRef<SpeechRecognitionInstance | null>(null);
@@ -40,11 +46,16 @@ export function useSpeechRecognition() {
40
46
  const isSupported = useMemo(() => isWebSpeechSupported, []);
41
47
 
42
48
  const start = useCallback(() => {
49
+ console.log('[SpeechRecognition] start() called');
43
50
  const Ctor = getSpeechRecognitionCtor();
44
- if (!Ctor) return;
51
+ if (!Ctor) {
52
+ console.log('[SpeechRecognition] start() - no constructor available');
53
+ return;
54
+ }
45
55
 
46
56
  // Clean up any lingering instance
47
57
  if (instanceRef.current) {
58
+ console.log('[SpeechRecognition] start() - aborting previous instance');
48
59
  try { instanceRef.current.abort(); } catch {}
49
60
  }
50
61
 
@@ -52,6 +63,7 @@ export function useSpeechRecognition() {
52
63
  recognition.continuous = true;
53
64
  recognition.interimResults = true;
54
65
  recognition.lang = navigator.language || 'en-US';
66
+ console.log('[SpeechRecognition] start() - created instance, lang:', recognition.lang);
55
67
 
56
68
  transcriptRef.current = '';
57
69
  activeRef.current = true;
@@ -59,24 +71,30 @@ export function useSpeechRecognition() {
59
71
 
60
72
  recognition.onresult = (event: SpeechRecognitionEvent) => {
61
73
  let finalTranscript = '';
74
+ let interimTranscript = '';
62
75
  for (let i = 0; i < event.results.length; i++) {
76
+ const text = event.results[i][0].transcript;
63
77
  if (event.results[i].isFinal) {
64
- finalTranscript += event.results[i][0].transcript;
78
+ finalTranscript += text;
79
+ } else {
80
+ interimTranscript += text;
65
81
  }
66
82
  }
67
- if (finalTranscript) {
68
- transcriptRef.current = finalTranscript;
69
- }
83
+ console.log('[SpeechRecognition] onresult - final:', JSON.stringify(finalTranscript), 'interim:', JSON.stringify(interimTranscript));
84
+ // Always keep the best transcript available (final preferred, interim as fallback)
85
+ transcriptRef.current = finalTranscript || interimTranscript;
70
86
  };
71
87
 
72
88
  recognition.onend = () => {
89
+ console.log('[SpeechRecognition] onend - activeRef:', activeRef.current);
73
90
  // Auto-restart if we're still supposed to be listening (handles silence gaps)
74
91
  if (activeRef.current) {
75
- try { recognition.start(); } catch {}
92
+ try { recognition.start(); } catch (e) { console.error('[SpeechRecognition] restart failed:', e); }
76
93
  }
77
94
  };
78
95
 
79
- recognition.onerror = () => {
96
+ recognition.onerror = (event) => {
97
+ console.error('[SpeechRecognition] onerror:', event.error);
80
98
  // On error, stop gracefully
81
99
  if (activeRef.current) {
82
100
  activeRef.current = false;
@@ -85,14 +103,21 @@ export function useSpeechRecognition() {
85
103
  }
86
104
  };
87
105
 
88
- try { recognition.start(); } catch {}
106
+ try {
107
+ recognition.start();
108
+ console.log('[SpeechRecognition] start() - recognition.start() succeeded');
109
+ } catch (e) {
110
+ console.error('[SpeechRecognition] start() - recognition.start() failed:', e);
111
+ }
89
112
  }, []);
90
113
 
91
114
  const stop = useCallback((): Promise<string> => {
115
+ console.log('[SpeechRecognition] stop() called, current transcript:', JSON.stringify(transcriptRef.current));
92
116
  return new Promise((resolve) => {
93
117
  activeRef.current = false;
94
118
  const instance = instanceRef.current;
95
119
  if (!instance) {
120
+ console.log('[SpeechRecognition] stop() - no instance, resolving with:', JSON.stringify(transcriptRef.current));
96
121
  resolve(transcriptRef.current);
97
122
  return;
98
123
  }
@@ -101,11 +126,12 @@ export function useSpeechRecognition() {
101
126
 
102
127
  // Give a brief moment for any final results, then resolve
103
128
  setTimeout(() => {
104
- try { instance.stop(); } catch {}
129
+ try { instance.stop(); } catch (e) { console.error('[SpeechRecognition] stop() - instance.stop() failed:', e); }
105
130
  instanceRef.current = null;
106
131
 
107
132
  // Resolve with whatever we accumulated
108
133
  const text = transcriptRef.current;
134
+ console.log('[SpeechRecognition] stop() - resolving with:', JSON.stringify(text));
109
135
  if (resolveRef.current) {
110
136
  resolveRef.current(text);
111
137
  resolveRef.current = null;