fluxy-bot 0.8.4 → 0.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist-fluxy/fluxy.html
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
<meta charset="UTF-8" />
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, interactive-widget=resizes-content" />
|
|
6
6
|
<title>Fluxy Chat</title>
|
|
7
|
-
<script type="module" crossorigin src="/fluxy/assets/fluxy-
|
|
7
|
+
<script type="module" crossorigin src="/fluxy/assets/fluxy-CY9PvJvb.js"></script>
|
|
8
8
|
<link rel="modulepreload" crossorigin href="/fluxy/assets/globals-DYbVw4jJ.js">
|
|
9
9
|
<link rel="stylesheet" crossorigin href="/fluxy/assets/globals-BrtAPuMR.css">
|
|
10
10
|
</head>
|
package/package.json
CHANGED
|
@@ -58,6 +58,7 @@ const DRAFT_KEY = 'fluxy_draft';
|
|
|
58
58
|
export default function InputBar({ onSend, onStop, streaming, whisperEnabled, onTranscribe }: Props) {
|
|
59
59
|
const { start: startSpeech, stop: stopSpeech, abort: abortSpeech, isSupported: webSpeechSupported } = useSpeechRecognition();
|
|
60
60
|
const voiceEnabled = whisperEnabled || webSpeechSupported;
|
|
61
|
+
console.log('[InputBar] render - whisperEnabled:', whisperEnabled, 'webSpeechSupported:', webSpeechSupported, 'voiceEnabled:', voiceEnabled);
|
|
61
62
|
const [text, setText] = useState(() => {
|
|
62
63
|
try { return localStorage.getItem(DRAFT_KEY) || ''; } catch { return ''; }
|
|
63
64
|
});
|
|
@@ -107,7 +108,8 @@ export default function InputBar({ onSend, onStop, streaming, whisperEnabled, on
|
|
|
107
108
|
return () => { if (intervalRef.current) clearInterval(intervalRef.current); };
|
|
108
109
|
}, [isRecording]);
|
|
109
110
|
|
|
110
|
-
const stopRecording = useCallback((cancelled: boolean) => {
|
|
111
|
+
const stopRecording = useCallback(async (cancelled: boolean) => {
|
|
112
|
+
console.log('[InputBar] stopRecording called, cancelled:', cancelled, 'recorder:', !!mediaRecorderRef.current, 'whisper:', whisperEnabled);
|
|
111
113
|
if (intervalRef.current) clearInterval(intervalRef.current);
|
|
112
114
|
if (holdTimerRef.current) { clearTimeout(holdTimerRef.current); holdTimerRef.current = null; }
|
|
113
115
|
isHolding.current = false;
|
|
@@ -116,77 +118,76 @@ export default function InputBar({ onSend, onStop, streaming, whisperEnabled, on
|
|
|
116
118
|
const stream = streamRef.current;
|
|
117
119
|
|
|
118
120
|
if (cancelled) {
|
|
119
|
-
// Clean up mic + abort speech recognition
|
|
120
121
|
stream?.getTracks().forEach((t) => t.stop());
|
|
121
122
|
mediaRecorderRef.current = null;
|
|
122
123
|
streamRef.current = null;
|
|
123
124
|
audioChunksRef.current = [];
|
|
124
125
|
abortSpeech();
|
|
125
|
-
} else if (
|
|
126
|
-
//
|
|
127
|
-
stream?.getTracks().forEach((t) => t.stop());
|
|
128
|
-
mediaRecorderRef.current = null;
|
|
129
|
-
streamRef.current = null;
|
|
130
|
-
audioChunksRef.current = [];
|
|
131
|
-
abortSpeech();
|
|
132
|
-
} else {
|
|
133
|
-
// Stop recorder — ondataavailable + onstop will fire
|
|
126
|
+
} else if (recorder && recorder.state !== 'inactive') {
|
|
127
|
+
// Whisper path: stop MediaRecorder and use its audio
|
|
134
128
|
recorder.onstop = async () => {
|
|
129
|
+
console.log('[InputBar] recorder.onstop fired, chunks:', audioChunksRef.current.length);
|
|
135
130
|
stream?.getTracks().forEach((t) => t.stop());
|
|
136
131
|
const blob = new Blob(audioChunksRef.current, { type: 'audio/webm' });
|
|
137
132
|
audioChunksRef.current = [];
|
|
138
133
|
mediaRecorderRef.current = null;
|
|
139
134
|
streamRef.current = null;
|
|
140
135
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
return; // too small, skip
|
|
144
|
-
}
|
|
136
|
+
console.log('[InputBar] blob size:', blob.size);
|
|
137
|
+
if (blob.size < 1000) return;
|
|
145
138
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
const dataUrl = reader.result as string;
|
|
139
|
+
const fileReader = new FileReader();
|
|
140
|
+
fileReader.onloadend = async () => {
|
|
141
|
+
const dataUrl = fileReader.result as string;
|
|
150
142
|
const base64 = dataUrl.split(',')[1];
|
|
151
143
|
if (!base64) return;
|
|
152
144
|
|
|
145
|
+
console.log('[InputBar] Whisper path, base64 length:', base64.length);
|
|
153
146
|
try {
|
|
154
|
-
|
|
155
|
-
if (
|
|
156
|
-
|
|
157
|
-
if (onTranscribe) {
|
|
158
|
-
data = await onTranscribe(base64);
|
|
159
|
-
} else {
|
|
160
|
-
const res = await fetch('/api/whisper/transcribe', {
|
|
161
|
-
method: 'POST',
|
|
162
|
-
headers: { 'Content-Type': 'application/json' },
|
|
163
|
-
body: JSON.stringify({ audio: base64 }),
|
|
164
|
-
});
|
|
165
|
-
data = await res.json();
|
|
166
|
-
}
|
|
167
|
-
abortSpeech(); // discard Web Speech results when using Whisper
|
|
168
|
-
if (data.transcript?.trim()) {
|
|
169
|
-
const pendingAtts = attachments.length > 0 ? attachments : undefined;
|
|
170
|
-
onSend(data.transcript.trim(), pendingAtts, dataUrl);
|
|
171
|
-
if (pendingAtts) setAttachments([]);
|
|
172
|
-
}
|
|
147
|
+
let data: { transcript?: string };
|
|
148
|
+
if (onTranscribe) {
|
|
149
|
+
data = await onTranscribe(base64);
|
|
173
150
|
} else {
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
151
|
+
const res = await fetch('/api/whisper/transcribe', {
|
|
152
|
+
method: 'POST',
|
|
153
|
+
headers: { 'Content-Type': 'application/json' },
|
|
154
|
+
body: JSON.stringify({ audio: base64 }),
|
|
155
|
+
});
|
|
156
|
+
data = await res.json();
|
|
157
|
+
}
|
|
158
|
+
if (data.transcript?.trim()) {
|
|
159
|
+
const pendingAtts = attachments.length > 0 ? attachments : undefined;
|
|
160
|
+
onSend(data.transcript.trim(), pendingAtts, dataUrl);
|
|
161
|
+
if (pendingAtts) setAttachments([]);
|
|
181
162
|
}
|
|
182
|
-
} catch {
|
|
183
|
-
|
|
184
|
-
// Transcription failed silently
|
|
163
|
+
} catch (err) {
|
|
164
|
+
console.error('[InputBar] Whisper transcription error:', err);
|
|
185
165
|
}
|
|
186
166
|
};
|
|
187
|
-
|
|
167
|
+
fileReader.readAsDataURL(blob);
|
|
188
168
|
};
|
|
189
169
|
recorder.stop();
|
|
170
|
+
} else {
|
|
171
|
+
// Web Speech API path (no MediaRecorder): get transcript directly
|
|
172
|
+
console.log('[InputBar] Web Speech path, stopping speech recognition...');
|
|
173
|
+
stream?.getTracks().forEach((t) => t.stop());
|
|
174
|
+
mediaRecorderRef.current = null;
|
|
175
|
+
streamRef.current = null;
|
|
176
|
+
audioChunksRef.current = [];
|
|
177
|
+
|
|
178
|
+
try {
|
|
179
|
+
const transcript = await stopSpeech();
|
|
180
|
+
console.log('[InputBar] Web Speech transcript:', JSON.stringify(transcript));
|
|
181
|
+
if (transcript.trim()) {
|
|
182
|
+
const pendingAtts = attachments.length > 0 ? attachments : undefined;
|
|
183
|
+
onSend(transcript.trim(), pendingAtts);
|
|
184
|
+
if (pendingAtts) setAttachments([]);
|
|
185
|
+
} else {
|
|
186
|
+
console.log('[InputBar] Web Speech transcript was empty');
|
|
187
|
+
}
|
|
188
|
+
} catch (err) {
|
|
189
|
+
console.error('[InputBar] Web Speech stop error:', err);
|
|
190
|
+
}
|
|
190
191
|
}
|
|
191
192
|
|
|
192
193
|
if (micRef.current) micRef.current.style.transform = '';
|
|
@@ -262,38 +263,50 @@ export default function InputBar({ onSend, onStop, streaming, whisperEnabled, on
|
|
|
262
263
|
|
|
263
264
|
// ── Mic pointer handlers ──
|
|
264
265
|
const handleMicDown = useCallback((e: RPointerEvent) => {
|
|
266
|
+
console.log('[InputBar] handleMicDown fired, voiceEnabled:', voiceEnabled, 'whisper:', whisperEnabled);
|
|
265
267
|
e.preventDefault();
|
|
266
268
|
startXRef.current = e.clientX;
|
|
267
269
|
dragRef.current = 0;
|
|
268
270
|
(e.currentTarget as HTMLElement).setPointerCapture(e.pointerId);
|
|
269
271
|
|
|
270
272
|
holdTimerRef.current = setTimeout(async () => {
|
|
271
|
-
|
|
273
|
+
console.log('[InputBar] hold timer fired, voiceEnabled:', voiceEnabled, 'whisper:', whisperEnabled);
|
|
274
|
+
if (!voiceEnabled) {
|
|
275
|
+
console.log('[InputBar] voiceEnabled is false, returning');
|
|
276
|
+
return;
|
|
277
|
+
}
|
|
272
278
|
try {
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
279
|
+
if (whisperEnabled) {
|
|
280
|
+
// Whisper path: need getUserMedia + MediaRecorder for audio capture
|
|
281
|
+
console.log('[InputBar] Whisper path: requesting getUserMedia...');
|
|
282
|
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
283
|
+
console.log('[InputBar] getUserMedia succeeded, tracks:', stream.getTracks().length);
|
|
284
|
+
streamRef.current = stream;
|
|
285
|
+
const mimeType = MediaRecorder.isTypeSupported('audio/webm;codecs=opus') ? 'audio/webm;codecs=opus' : 'audio/webm';
|
|
286
|
+
const recorder = new MediaRecorder(stream, { mimeType });
|
|
287
|
+
audioChunksRef.current = [];
|
|
288
|
+
recorder.ondataavailable = (ev) => {
|
|
289
|
+
if (ev.data.size > 0) audioChunksRef.current.push(ev.data);
|
|
290
|
+
};
|
|
291
|
+
mediaRecorderRef.current = recorder;
|
|
292
|
+
recorder.start();
|
|
293
|
+
console.log('[InputBar] MediaRecorder started');
|
|
294
|
+
} else {
|
|
295
|
+
// Web Speech path: only SpeechRecognition, no getUserMedia (avoids mic conflict on mobile)
|
|
296
|
+
console.log('[InputBar] Web Speech path: starting SpeechRecognition only...');
|
|
286
297
|
startSpeech();
|
|
298
|
+
console.log('[InputBar] SpeechRecognition started');
|
|
287
299
|
}
|
|
288
300
|
|
|
289
301
|
isHolding.current = true;
|
|
290
302
|
setIsRecording(true);
|
|
291
303
|
setRecordingTime(0);
|
|
292
|
-
|
|
293
|
-
|
|
304
|
+
console.log('[InputBar] recording started, isHolding=true');
|
|
305
|
+
} catch (err) {
|
|
306
|
+
console.error('[InputBar] recording setup failed:', err);
|
|
294
307
|
}
|
|
295
308
|
}, 200);
|
|
296
|
-
}, [voiceEnabled,
|
|
309
|
+
}, [voiceEnabled, whisperEnabled, startSpeech]);
|
|
297
310
|
|
|
298
311
|
const handleMicMove = useCallback((e: RPointerEvent) => {
|
|
299
312
|
if (!isHolding.current) return;
|
|
@@ -311,12 +324,17 @@ export default function InputBar({ onSend, onStop, streaming, whisperEnabled, on
|
|
|
311
324
|
}, [stopRecording]);
|
|
312
325
|
|
|
313
326
|
const handleMicUp = useCallback(() => {
|
|
327
|
+
console.log('[InputBar] handleMicUp, isHolding:', isHolding.current);
|
|
314
328
|
if (holdTimerRef.current) { clearTimeout(holdTimerRef.current); holdTimerRef.current = null; }
|
|
315
|
-
if (!isHolding.current)
|
|
329
|
+
if (!isHolding.current) {
|
|
330
|
+
console.log('[InputBar] handleMicUp - not holding, ignoring');
|
|
331
|
+
return;
|
|
332
|
+
}
|
|
316
333
|
stopRecording(false);
|
|
317
334
|
}, [stopRecording]);
|
|
318
335
|
|
|
319
336
|
const handleMicCancel = useCallback(() => {
|
|
337
|
+
console.log('[InputBar] handleMicCancel fired, isHolding:', isHolding.current, 'holdTimer:', !!holdTimerRef.current);
|
|
320
338
|
if (holdTimerRef.current) { clearTimeout(holdTimerRef.current); holdTimerRef.current = null; }
|
|
321
339
|
if (isHolding.current) stopRecording(true);
|
|
322
340
|
}, [stopRecording]);
|
|
@@ -22,14 +22,20 @@ interface SpeechRecognitionInstance extends EventTarget {
|
|
|
22
22
|
|
|
23
23
|
const getSpeechRecognitionCtor = (): SpeechRecognitionConstructor | null => {
|
|
24
24
|
if (typeof window === 'undefined') return null;
|
|
25
|
-
|
|
25
|
+
const ctor = (
|
|
26
26
|
(window as unknown as Record<string, SpeechRecognitionConstructor>).SpeechRecognition ??
|
|
27
27
|
(window as unknown as Record<string, SpeechRecognitionConstructor>).webkitSpeechRecognition ??
|
|
28
28
|
null
|
|
29
29
|
);
|
|
30
|
+
console.log('[SpeechRecognition] getCtor:', ctor ? 'found' : 'NOT found', {
|
|
31
|
+
hasSpeechRecognition: 'SpeechRecognition' in window,
|
|
32
|
+
hasWebkit: 'webkitSpeechRecognition' in window,
|
|
33
|
+
});
|
|
34
|
+
return ctor;
|
|
30
35
|
};
|
|
31
36
|
|
|
32
37
|
export const isWebSpeechSupported = getSpeechRecognitionCtor() !== null;
|
|
38
|
+
console.log('[SpeechRecognition] isWebSpeechSupported:', isWebSpeechSupported);
|
|
33
39
|
|
|
34
40
|
export function useSpeechRecognition() {
|
|
35
41
|
const instanceRef = useRef<SpeechRecognitionInstance | null>(null);
|
|
@@ -40,11 +46,16 @@ export function useSpeechRecognition() {
|
|
|
40
46
|
const isSupported = useMemo(() => isWebSpeechSupported, []);
|
|
41
47
|
|
|
42
48
|
const start = useCallback(() => {
|
|
49
|
+
console.log('[SpeechRecognition] start() called');
|
|
43
50
|
const Ctor = getSpeechRecognitionCtor();
|
|
44
|
-
if (!Ctor)
|
|
51
|
+
if (!Ctor) {
|
|
52
|
+
console.log('[SpeechRecognition] start() - no constructor available');
|
|
53
|
+
return;
|
|
54
|
+
}
|
|
45
55
|
|
|
46
56
|
// Clean up any lingering instance
|
|
47
57
|
if (instanceRef.current) {
|
|
58
|
+
console.log('[SpeechRecognition] start() - aborting previous instance');
|
|
48
59
|
try { instanceRef.current.abort(); } catch {}
|
|
49
60
|
}
|
|
50
61
|
|
|
@@ -52,6 +63,7 @@ export function useSpeechRecognition() {
|
|
|
52
63
|
recognition.continuous = true;
|
|
53
64
|
recognition.interimResults = true;
|
|
54
65
|
recognition.lang = navigator.language || 'en-US';
|
|
66
|
+
console.log('[SpeechRecognition] start() - created instance, lang:', recognition.lang);
|
|
55
67
|
|
|
56
68
|
transcriptRef.current = '';
|
|
57
69
|
activeRef.current = true;
|
|
@@ -59,24 +71,30 @@ export function useSpeechRecognition() {
|
|
|
59
71
|
|
|
60
72
|
recognition.onresult = (event: SpeechRecognitionEvent) => {
|
|
61
73
|
let finalTranscript = '';
|
|
74
|
+
let interimTranscript = '';
|
|
62
75
|
for (let i = 0; i < event.results.length; i++) {
|
|
76
|
+
const text = event.results[i][0].transcript;
|
|
63
77
|
if (event.results[i].isFinal) {
|
|
64
|
-
finalTranscript +=
|
|
78
|
+
finalTranscript += text;
|
|
79
|
+
} else {
|
|
80
|
+
interimTranscript += text;
|
|
65
81
|
}
|
|
66
82
|
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
83
|
+
console.log('[SpeechRecognition] onresult - final:', JSON.stringify(finalTranscript), 'interim:', JSON.stringify(interimTranscript));
|
|
84
|
+
// Always keep the best transcript available (final preferred, interim as fallback)
|
|
85
|
+
transcriptRef.current = finalTranscript || interimTranscript;
|
|
70
86
|
};
|
|
71
87
|
|
|
72
88
|
recognition.onend = () => {
|
|
89
|
+
console.log('[SpeechRecognition] onend - activeRef:', activeRef.current);
|
|
73
90
|
// Auto-restart if we're still supposed to be listening (handles silence gaps)
|
|
74
91
|
if (activeRef.current) {
|
|
75
|
-
try { recognition.start(); } catch {}
|
|
92
|
+
try { recognition.start(); } catch (e) { console.error('[SpeechRecognition] restart failed:', e); }
|
|
76
93
|
}
|
|
77
94
|
};
|
|
78
95
|
|
|
79
|
-
recognition.onerror = () => {
|
|
96
|
+
recognition.onerror = (event) => {
|
|
97
|
+
console.error('[SpeechRecognition] onerror:', event.error);
|
|
80
98
|
// On error, stop gracefully
|
|
81
99
|
if (activeRef.current) {
|
|
82
100
|
activeRef.current = false;
|
|
@@ -85,14 +103,21 @@ export function useSpeechRecognition() {
|
|
|
85
103
|
}
|
|
86
104
|
};
|
|
87
105
|
|
|
88
|
-
try {
|
|
106
|
+
try {
|
|
107
|
+
recognition.start();
|
|
108
|
+
console.log('[SpeechRecognition] start() - recognition.start() succeeded');
|
|
109
|
+
} catch (e) {
|
|
110
|
+
console.error('[SpeechRecognition] start() - recognition.start() failed:', e);
|
|
111
|
+
}
|
|
89
112
|
}, []);
|
|
90
113
|
|
|
91
114
|
const stop = useCallback((): Promise<string> => {
|
|
115
|
+
console.log('[SpeechRecognition] stop() called, current transcript:', JSON.stringify(transcriptRef.current));
|
|
92
116
|
return new Promise((resolve) => {
|
|
93
117
|
activeRef.current = false;
|
|
94
118
|
const instance = instanceRef.current;
|
|
95
119
|
if (!instance) {
|
|
120
|
+
console.log('[SpeechRecognition] stop() - no instance, resolving with:', JSON.stringify(transcriptRef.current));
|
|
96
121
|
resolve(transcriptRef.current);
|
|
97
122
|
return;
|
|
98
123
|
}
|
|
@@ -101,11 +126,12 @@ export function useSpeechRecognition() {
|
|
|
101
126
|
|
|
102
127
|
// Give a brief moment for any final results, then resolve
|
|
103
128
|
setTimeout(() => {
|
|
104
|
-
try { instance.stop(); } catch {}
|
|
129
|
+
try { instance.stop(); } catch (e) { console.error('[SpeechRecognition] stop() - instance.stop() failed:', e); }
|
|
105
130
|
instanceRef.current = null;
|
|
106
131
|
|
|
107
132
|
// Resolve with whatever we accumulated
|
|
108
133
|
const text = transcriptRef.current;
|
|
134
|
+
console.log('[SpeechRecognition] stop() - resolving with:', JSON.stringify(text));
|
|
109
135
|
if (resolveRef.current) {
|
|
110
136
|
resolveRef.current(text);
|
|
111
137
|
resolveRef.current = null;
|