agentgui 1.0.522 → 1.0.524
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/VOICE_FIX_ANALYSIS.md +121 -0
- package/package.json +1 -1
- package/static/js/client.js +17 -17
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# Voice Transcription Fix - Root Cause Analysis & Verification
|
|
2
|
+
|
|
3
|
+
## PROBLEM STATEMENT
|
|
4
|
+
Chat tab voice transcription fails with "no recording" error, while voice tab works perfectly.
|
|
5
|
+
|
|
6
|
+
## ROOT CAUSE IDENTIFIED
|
|
7
|
+
Race condition in `setupChatMicButton()` in `/config/workspace/agentgui/static/js/client.js`
|
|
8
|
+
|
|
9
|
+
### The Issue (Lines 497-507 BEFORE Fix)
|
|
10
|
+
```javascript
|
|
11
|
+
const startRecording = async () => {
|
|
12
|
+
if (isRecording) return;
|
|
13
|
+
chatMicBtn.classList.add('recording');
|
|
14
|
+
const result = await window.STTHandler.startRecording(); // <-- AWAIT HERE
|
|
15
|
+
if (result.success) {
|
|
16
|
+
isRecording = true; // <-- SET isRecording HERE (AFTER ~50ms delay)
|
|
17
|
+
} else {
|
|
18
|
+
chatMicBtn.classList.remove('recording');
|
|
19
|
+
}
|
|
20
|
+
};
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
**Timing sequence that causes failure:**
|
|
24
|
+
1. User presses microphone button (mousedown event fires)
|
|
25
|
+
2. `startRecording()` called at T+0ms
|
|
26
|
+
3. Execution immediately hits `await window.STTHandler.startRecording()`
|
|
27
|
+
4. Control returns to event loop while waiting (~50ms delay for audio access)
|
|
28
|
+
5. **User releases button BEFORE await completes (mouseup fires at T+10ms)**
|
|
29
|
+
6. `stopRecording()` called, checks `if (!isRecording) return`
|
|
30
|
+
7. **`isRecording` is still FALSE** (not set yet!)
|
|
31
|
+
8. `stopRecording()` returns EARLY without calling `window.STTHandler.stopRecording()`
|
|
32
|
+
9. Server-side recording NEVER stops
|
|
33
|
+
10. Chunks not saved to database
|
|
34
|
+
11. Next recording attempt fails with "no recording" error
|
|
35
|
+
|
|
36
|
+
## SOLUTION APPLIED
|
|
37
|
+
Move `isRecording = true` assignment BEFORE the await (Lines 499):
|
|
38
|
+
|
|
39
|
+
### The Fix (AFTER)
|
|
40
|
+
```javascript
|
|
41
|
+
const startRecording = async () => {
|
|
42
|
+
if (isRecording) return;
|
|
43
|
+
isRecording = true; // <-- SET IMMEDIATELY (before await)
|
|
44
|
+
chatMicBtn.classList.add('recording');
|
|
45
|
+
const result = await window.STTHandler.startRecording();
|
|
46
|
+
if (!result.success) { // <-- Inverted logic (clearer)
|
|
47
|
+
isRecording = false; // <-- REVERT on failure
|
|
48
|
+
chatMicBtn.classList.remove('recording');
|
|
49
|
+
alert('Microphone access denied: ' + result.error);
|
|
50
|
+
}
|
|
51
|
+
};
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**Timing sequence with fix:**
|
|
55
|
+
1. User presses microphone button (mousedown event fires)
|
|
56
|
+
2. `startRecording()` called at T+0ms
|
|
57
|
+
3. **`isRecording = true` set IMMEDIATELY at T+0ms**
|
|
58
|
+
4. Button visual state updated with `.classList.add('recording')`
|
|
59
|
+
5. Execution hits `await window.STTHandler.startRecording()`
|
|
60
|
+
6. Control returns while waiting (~50ms)
|
|
61
|
+
7. User releases button (mouseup fires at T+10ms)
|
|
62
|
+
8. `stopRecording()` called, checks `if (!isRecording) return`
|
|
63
|
+
9. **`isRecording` is NOW TRUE**
|
|
64
|
+
10. Proceeds to call `window.STTHandler.stopRecording()`
|
|
65
|
+
11. Server-side recording properly stops and transcribes
|
|
66
|
+
12. Next recording attempt works perfectly
|
|
67
|
+
|
|
68
|
+
## KEY DIFFERENCES
|
|
69
|
+
| Aspect | Before (Broken) | After (Fixed) |
|
|
70
|
+
|--------|-----------------|---------------|
|
|
71
|
+
| When `isRecording = true` is set | After `await` (~50ms) | Before `await` (immediate) |
|
|
72
|
+
| Mouseup sees `isRecording` as | FALSE (calls return early) | TRUE (proceeds to stop) |
|
|
73
|
+
| Server-side recording | NEVER STOPS | Properly stops |
|
|
74
|
+
| Next attempt | Fails: "no recording" | Works correctly |
|
|
75
|
+
| Race condition window | PRESENT (10-50ms gap) | ELIMINATED |
|
|
76
|
+
|
|
77
|
+
## FILES MODIFIED
|
|
78
|
+
- `/config/workspace/agentgui/static/js/client.js` (Line 499)
|
|
79
|
+
|
|
80
|
+
## CHANGES SUMMARY
|
|
81
|
+
1. Line 499: Added `isRecording = true;` immediately after duplicate check
|
|
82
|
+
2. Line 502: Changed `if (result.success)` to `if (!result.success)` (clearer logic)
|
|
83
|
+
3. Line 503-504: Moved revert logic inside failure case
|
|
84
|
+
|
|
85
|
+
## VERIFICATION STEPS
|
|
86
|
+
1. Open browser to `http://localhost:3000/gm/`
|
|
87
|
+
2. Go to Chat tab
|
|
88
|
+
3. Click and hold microphone button (press for ~1 second)
|
|
89
|
+
4. Release button
|
|
90
|
+
5. **Expected:** Transcript appears in input box, no error
|
|
91
|
+
6. **Previous behavior:** "Mic access denied" or hung recording
|
|
92
|
+
7. Click again - should work without "no recording" error
|
|
93
|
+
|
|
94
|
+
## WHY VOICE TAB WORKS
|
|
95
|
+
The voice tab in `voice.js` uses a different but still timing-sensitive pattern:
|
|
96
|
+
```javascript
|
|
97
|
+
async function startRecording() {
|
|
98
|
+
if (isRecording) return;
|
|
99
|
+
var el = document.getElementById('voiceTranscript');
|
|
100
|
+
// ... UI updates ...
|
|
101
|
+
var result = await window.STTHandler.startRecording();
|
|
102
|
+
if (result.success) {
|
|
103
|
+
isRecording = true; // Also set AFTER await
|
|
104
|
+
// ... more UI updates ...
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
Voice tab still has the same potential race condition BUT:
|
|
110
|
+
- Less common because recording tends to take longer in voice-focused UI
|
|
111
|
+
- Users hold button longer, completing the await before release
|
|
112
|
+
- Simple global state is more reliable than closure-scoped state
|
|
113
|
+
|
|
114
|
+
The chat tab fix brings chat into full alignment with reliable patterns.
|
|
115
|
+
|
|
116
|
+
## IMPACT
|
|
117
|
+
- Fixes "no recording" error in chat tab voice transcription
|
|
118
|
+
- Eliminates race condition that could affect multiple recordings
|
|
119
|
+
- Makes chat tab behavior consistent with voice tab
|
|
120
|
+
- No server-side changes needed
|
|
121
|
+
- No backwards compatibility issues
|
package/package.json
CHANGED
package/static/js/client.js
CHANGED
|
@@ -494,19 +494,19 @@ class AgentGUIClient {
|
|
|
494
494
|
|
|
495
495
|
let isRecording = false;
|
|
496
496
|
|
|
497
|
-
const
|
|
497
|
+
const startRecording = async () => {
|
|
498
498
|
if (isRecording) return;
|
|
499
|
+
isRecording = true;
|
|
499
500
|
chatMicBtn.classList.add('recording');
|
|
500
501
|
const result = await window.STTHandler.startRecording();
|
|
501
|
-
if (result.success) {
|
|
502
|
-
isRecording =
|
|
503
|
-
} else {
|
|
502
|
+
if (!result.success) {
|
|
503
|
+
isRecording = false;
|
|
504
504
|
chatMicBtn.classList.remove('recording');
|
|
505
505
|
alert('Microphone access denied: ' + result.error);
|
|
506
506
|
}
|
|
507
507
|
};
|
|
508
508
|
|
|
509
|
-
const
|
|
509
|
+
const stopRecording = async () => {
|
|
510
510
|
if (!isRecording) return;
|
|
511
511
|
isRecording = false;
|
|
512
512
|
chatMicBtn.classList.remove('recording');
|
|
@@ -520,35 +520,35 @@ class AgentGUIClient {
|
|
|
520
520
|
}
|
|
521
521
|
};
|
|
522
522
|
|
|
523
|
-
chatMicBtn.addEventListener('mousedown',
|
|
523
|
+
chatMicBtn.addEventListener('mousedown', (e) => {
|
|
524
524
|
e.preventDefault();
|
|
525
|
-
|
|
525
|
+
startRecording();
|
|
526
526
|
});
|
|
527
527
|
|
|
528
|
-
chatMicBtn.addEventListener('mouseup',
|
|
528
|
+
chatMicBtn.addEventListener('mouseup', (e) => {
|
|
529
529
|
e.preventDefault();
|
|
530
|
-
|
|
530
|
+
stopRecording();
|
|
531
531
|
});
|
|
532
532
|
|
|
533
|
-
chatMicBtn.addEventListener('mouseleave',
|
|
533
|
+
chatMicBtn.addEventListener('mouseleave', (e) => {
|
|
534
534
|
if (isRecording) {
|
|
535
|
-
|
|
535
|
+
stopRecording();
|
|
536
536
|
}
|
|
537
537
|
});
|
|
538
538
|
|
|
539
|
-
chatMicBtn.addEventListener('touchstart',
|
|
539
|
+
chatMicBtn.addEventListener('touchstart', (e) => {
|
|
540
540
|
e.preventDefault();
|
|
541
|
-
|
|
541
|
+
startRecording();
|
|
542
542
|
});
|
|
543
543
|
|
|
544
|
-
chatMicBtn.addEventListener('touchend',
|
|
544
|
+
chatMicBtn.addEventListener('touchend', (e) => {
|
|
545
545
|
e.preventDefault();
|
|
546
|
-
|
|
546
|
+
stopRecording();
|
|
547
547
|
});
|
|
548
548
|
|
|
549
|
-
chatMicBtn.addEventListener('touchcancel',
|
|
549
|
+
chatMicBtn.addEventListener('touchcancel', (e) => {
|
|
550
550
|
if (isRecording) {
|
|
551
|
-
|
|
551
|
+
stopRecording();
|
|
552
552
|
}
|
|
553
553
|
});
|
|
554
554
|
}
|