ai-or-die 0.1.71 → 0.1.73
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/ai-or-die.js +10 -12
- package/package.json +1 -1
- package/src/public/app.js +83 -14
- package/src/public/heartbeat-watchdog.js +25 -0
- package/src/public/index.html +1 -0
- package/src/public/voice-frame.js +73 -0
- package/src/public/voice-handler.js +4 -0
- package/src/server.js +225 -56
- package/src/sticky-note-engine.js +75 -32
- package/src/sticky-note-jsonl.js +9 -2
- package/src/sticky-note-worker.js +7 -0
- package/src/stt-engine.js +179 -14
- package/src/stt-worker.js +31 -4
- package/src/usage-reader.js +5 -2
- package/src/utils/pcm.js +22 -0
- package/src/utils/ws-voice-frame.js +73 -0
package/bin/ai-or-die.js
CHANGED
|
@@ -142,7 +142,7 @@ async function main() {
|
|
|
142
142
|
}
|
|
143
143
|
|
|
144
144
|
const app = new ClaudeCodeWebServer(serverOptions);
|
|
145
|
-
|
|
145
|
+
await app.start();
|
|
146
146
|
|
|
147
147
|
const protocol = options.https ? 'https' : 'http';
|
|
148
148
|
const baseUrl = `${protocol}://localhost:${port}`;
|
|
@@ -187,17 +187,15 @@ async function main() {
|
|
|
187
187
|
|
|
188
188
|
console.log('\nPress Ctrl+C to stop the server\n');
|
|
189
189
|
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
process.on('SIGINT', () => { shutdown(); });
|
|
200
|
-
process.on('SIGTERM', () => { shutdown(); });
|
|
190
|
+
// Shutdown is owned by the server's single SIGINT/SIGTERM handler
|
|
191
|
+
// (ClaudeCodeWebServer.handleShutdown), which performs the ordered graceful
|
|
192
|
+
// teardown: cooperative disposal of the local-LLM (sticky-note) and STT
|
|
193
|
+
// native worker threads, tunnel stop, session save, then server close.
|
|
194
|
+
// A second handler here used to race it — its httpServer.close() callback
|
|
195
|
+
// fires immediately when there are no open connections and called
|
|
196
|
+
// process.exit(0) before the worker threads could dispose their ggml-based
|
|
197
|
+
// native models, which aborted the process (SIGABRT / exit 134) on Ctrl+C.
|
|
198
|
+
// So we deliberately do NOT register a SIGINT/SIGTERM handler here.
|
|
201
199
|
|
|
202
200
|
} catch (error) {
|
|
203
201
|
console.error('Error starting server:', error.message);
|
package/package.json
CHANGED
package/src/public/app.js
CHANGED
|
@@ -1386,8 +1386,19 @@ class ClaudeCodeWebInterface {
|
|
|
1386
1386
|
|
|
1387
1387
|
this.voiceController = new window.VoiceHandler.VoiceInputController({
|
|
1388
1388
|
mode: this.voiceMode,
|
|
1389
|
+
// Refuse a new recording while a previous transcription is still
|
|
1390
|
+
// pending (single timeout slot + no correlation id — overlapping
|
|
1391
|
+
// uploads would clobber each other's spinner/timeout).
|
|
1392
|
+
canStart: function () {
|
|
1393
|
+
return !self._voiceTranscriptionTimeout;
|
|
1394
|
+
},
|
|
1389
1395
|
onRecordingStart: function () {
|
|
1390
1396
|
self._playMicChime('on');
|
|
1397
|
+
// Suspend the heartbeat pong-timeout while capturing: the main
|
|
1398
|
+
// thread can be busy enough (esp. the ScriptProcessor fallback)
|
|
1399
|
+
// to miss a pong, which would otherwise force a spurious reconnect.
|
|
1400
|
+
self._voiceRecordingActive = true;
|
|
1401
|
+
if (self._heartbeat) self._heartbeat.pause();
|
|
1391
1402
|
btn.classList.add('recording');
|
|
1392
1403
|
btn.classList.remove('processing');
|
|
1393
1404
|
btn.setAttribute('aria-pressed', 'true');
|
|
@@ -1415,6 +1426,8 @@ class ClaudeCodeWebInterface {
|
|
|
1415
1426
|
},
|
|
1416
1427
|
onRecordingStop: function (result) {
|
|
1417
1428
|
self._playMicChime('off');
|
|
1429
|
+
self._voiceRecordingActive = false;
|
|
1430
|
+
if (self._heartbeat) self._heartbeat.resume();
|
|
1418
1431
|
btn.classList.remove('recording');
|
|
1419
1432
|
btn.setAttribute('aria-pressed', 'false');
|
|
1420
1433
|
btn.title = 'Voice Input (Ctrl+Shift+M)';
|
|
@@ -1425,21 +1438,33 @@ class ClaudeCodeWebInterface {
|
|
|
1425
1438
|
}
|
|
1426
1439
|
|
|
1427
1440
|
if (self.voiceMode === 'local' && result && result.samples) {
|
|
1441
|
+
// Guard against a zero-sample recording (would send a
|
|
1442
|
+
// header-only frame the server rejects as "too short").
|
|
1443
|
+
if (!result.samples.byteLength || result.samples.byteLength < 2) {
|
|
1444
|
+
btn.classList.remove('processing');
|
|
1445
|
+
if (window.feedback) window.feedback.error('No audio captured');
|
|
1446
|
+
return;
|
|
1447
|
+
}
|
|
1448
|
+
|
|
1428
1449
|
btn.classList.add('processing');
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1450
|
+
|
|
1451
|
+
// Send raw Int16 PCM as a tagged binary WS frame (no base64 —
|
|
1452
|
+
// base64's 33% inflation is what pushed long clips past the
|
|
1453
|
+
// 1 MiB frame guard and crashed the page).
|
|
1454
|
+
var frame = window.VoiceFrame.buildVoiceFrame(result.samples);
|
|
1455
|
+
var sent = self.sendBinary(frame);
|
|
1456
|
+
|
|
1457
|
+
if (!sent) {
|
|
1458
|
+
// Socket not OPEN (e.g. mid-reconnect): fail fast instead of
|
|
1459
|
+
// silently dropping the frame and hanging the spinner 90 s.
|
|
1460
|
+
btn.classList.remove('processing');
|
|
1461
|
+
var notSentMsg = 'Connection not ready — recording not sent';
|
|
1462
|
+
if (window.feedback) window.feedback.error(notSentMsg);
|
|
1463
|
+
if (self.terminal) {
|
|
1464
|
+
self.terminal.write('\r\n\x1b[31m[Voice error] ' + notSentMsg + '\x1b[0m\r\n');
|
|
1465
|
+
}
|
|
1466
|
+
return;
|
|
1436
1467
|
}
|
|
1437
|
-
var base64Audio = btoa(parts.join(''));
|
|
1438
|
-
self.send({
|
|
1439
|
-
type: 'voice_upload',
|
|
1440
|
-
audio: base64Audio,
|
|
1441
|
-
durationMs: result.durationMs
|
|
1442
|
-
});
|
|
1443
1468
|
|
|
1444
1469
|
// Client-side timeout for transcription processing (90 seconds)
|
|
1445
1470
|
self._voiceTranscriptionTimeout = setTimeout(function () {
|
|
@@ -1467,6 +1492,8 @@ class ClaudeCodeWebInterface {
|
|
|
1467
1492
|
self._deliverVoiceTranscription(text);
|
|
1468
1493
|
},
|
|
1469
1494
|
onError: function (err) {
|
|
1495
|
+
self._voiceRecordingActive = false;
|
|
1496
|
+
if (self._heartbeat) self._heartbeat.resume();
|
|
1470
1497
|
btn.classList.remove('recording', 'processing');
|
|
1471
1498
|
btn.setAttribute('aria-pressed', 'false');
|
|
1472
1499
|
btn.title = 'Voice Input (Ctrl+Shift+M)';
|
|
@@ -1497,6 +1524,8 @@ class ClaudeCodeWebInterface {
|
|
|
1497
1524
|
}
|
|
1498
1525
|
},
|
|
1499
1526
|
onCancel: function () {
|
|
1527
|
+
self._voiceRecordingActive = false;
|
|
1528
|
+
if (self._heartbeat) self._heartbeat.resume();
|
|
1500
1529
|
btn.classList.remove('recording', 'processing');
|
|
1501
1530
|
btn.setAttribute('aria-pressed', 'false');
|
|
1502
1531
|
btn.title = 'Voice Input (Ctrl+Shift+M)';
|
|
@@ -2043,6 +2072,32 @@ class ClaudeCodeWebInterface {
|
|
|
2043
2072
|
if (this._heartbeat) { this._heartbeat.stop(); this._heartbeat = null; }
|
|
2044
2073
|
if (this._heartbeatTimer) { clearInterval(this._heartbeatTimer); this._heartbeatTimer = null; }
|
|
2045
2074
|
if (this._pongTimer) { clearTimeout(this._pongTimer); this._pongTimer = null; }
|
|
2075
|
+
|
|
2076
|
+
// A close mid-transcription must not leave the mic spinner + its
|
|
2077
|
+
// 90 s timeout hanging.
|
|
2078
|
+
if (this._voiceTranscriptionTimeout) {
|
|
2079
|
+
clearTimeout(this._voiceTranscriptionTimeout);
|
|
2080
|
+
this._voiceTranscriptionTimeout = null;
|
|
2081
|
+
}
|
|
2082
|
+
this._voiceRecordingActive = false;
|
|
2083
|
+
const voiceBtn = document.getElementById('voiceInputBtn');
|
|
2084
|
+
if (voiceBtn) voiceBtn.classList.remove('processing');
|
|
2085
|
+
|
|
2086
|
+
// Log the close code so field reports can tell a server frame
|
|
2087
|
+
// rejection (1009/1003, at stop) from a heartbeat pong-timeout
|
|
2088
|
+
// (4000, mid-recording).
|
|
2089
|
+
console.warn('[ws] closed', event.code, event.reason || '');
|
|
2090
|
+
|
|
2091
|
+
// 1009/1003 are server-initiated CLEAN closes (wasClean=true): the
|
|
2092
|
+
// server rejected our frame. Surface a specific message and still
|
|
2093
|
+
// reconnect below, instead of dead-ending on "refresh the page".
|
|
2094
|
+
const voiceClose = (window.VoiceFrame && window.VoiceFrame.classifyVoiceClose)
|
|
2095
|
+
? window.VoiceFrame.classifyVoiceClose(event.code)
|
|
2096
|
+
: { rejected: false, message: null };
|
|
2097
|
+
if (voiceClose.rejected && window.feedback) {
|
|
2098
|
+
window.feedback.error(voiceClose.message);
|
|
2099
|
+
}
|
|
2100
|
+
|
|
2046
2101
|
// During server restart, don't count failures against reconnect budget
|
|
2047
2102
|
// but still use backoff to avoid thundering herd
|
|
2048
2103
|
if (this._serverRestarting) {
|
|
@@ -2056,7 +2111,7 @@ class ClaudeCodeWebInterface {
|
|
|
2056
2111
|
if (restartGen !== this._socketGeneration) return;
|
|
2057
2112
|
this.reconnect();
|
|
2058
2113
|
}, restartBackoff);
|
|
2059
|
-
} else if (!event.wasClean && this.reconnectAttempts < this.maxReconnectAttempts) {
|
|
2114
|
+
} else if ((!event.wasClean || voiceClose.rejected) && this.reconnectAttempts < this.maxReconnectAttempts) {
|
|
2060
2115
|
this.updateStatus('Reconnecting (' + (this.reconnectAttempts + 1) + '/' + this.maxReconnectAttempts + ')...');
|
|
2061
2116
|
// First attempt is fast (250ms covers a server-process restart window);
|
|
2062
2117
|
// subsequent attempts use exponential backoff with jitter.
|
|
@@ -2174,6 +2229,17 @@ class ClaudeCodeWebInterface {
|
|
|
2174
2229
|
}
|
|
2175
2230
|
}
|
|
2176
2231
|
|
|
2232
|
+
// Send a binary WS frame (e.g. a voice PCM frame). Returns true if it was
|
|
2233
|
+
// handed to an OPEN socket, false otherwise so the caller can react to a
|
|
2234
|
+
// closed/closing socket instead of silently dropping the frame.
|
|
2235
|
+
sendBinary(view) {
|
|
2236
|
+
if (this.socket && this.socket.readyState === WebSocket.OPEN) {
|
|
2237
|
+
this.socket.send(view);
|
|
2238
|
+
return true;
|
|
2239
|
+
}
|
|
2240
|
+
return false;
|
|
2241
|
+
}
|
|
2242
|
+
|
|
2177
2243
|
_handleStickyNoteUpdate(message) {
|
|
2178
2244
|
if (!message || !message.sessionId) return;
|
|
2179
2245
|
const sm = this.sessionTabManager;
|
|
@@ -4368,6 +4434,9 @@ class ClaudeCodeWebInterface {
|
|
|
4368
4434
|
log: (m) => console.warn('[heartbeat]', m),
|
|
4369
4435
|
});
|
|
4370
4436
|
this._heartbeat.start();
|
|
4437
|
+
// If a recording is in progress (e.g. this heartbeat was re-created after
|
|
4438
|
+
// a reconnect mid-recording), keep pong-timeout enforcement suspended.
|
|
4439
|
+
if (this._voiceRecordingActive) this._heartbeat.pause();
|
|
4371
4440
|
// Keep _heartbeatTimer/_pongTimer references in sync for legacy code
|
|
4372
4441
|
// (disconnect() still nulls them defensively); the watchdog owns the
|
|
4373
4442
|
// real timer lifecycle via stop().
|
|
@@ -59,6 +59,10 @@
|
|
|
59
59
|
this._clearTimeout = t.clearTimeout || ((id) => clearTimeout(id));
|
|
60
60
|
this._heartbeatTimer = null;
|
|
61
61
|
this._pongTimer = null;
|
|
62
|
+
// When paused (e.g. during mic recording), pings still go out but a
|
|
63
|
+
// missed pong does NOT force a reconnect — the client main thread can
|
|
64
|
+
// be busy capturing audio and briefly stop servicing the pong.
|
|
65
|
+
this._paused = false;
|
|
62
66
|
}
|
|
63
67
|
|
|
64
68
|
_isStale() {
|
|
@@ -75,6 +79,9 @@
|
|
|
75
79
|
} catch (_) {
|
|
76
80
|
return;
|
|
77
81
|
}
|
|
82
|
+
// Paused: keep liveness pings flowing but do NOT arm the pong-timeout
|
|
83
|
+
// (a missed pong while recording must not force-close the socket).
|
|
84
|
+
if (this._paused) return;
|
|
78
85
|
if (this._pongTimer) this._clearTimeout(this._pongTimer);
|
|
79
86
|
this._pongTimer = this._setTimeout(() => {
|
|
80
87
|
if (this._isStale()) return;
|
|
@@ -118,6 +125,24 @@
|
|
|
118
125
|
this._pongTimer = null;
|
|
119
126
|
}
|
|
120
127
|
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Suspend pong-timeout enforcement (pings continue). Use while the client
|
|
131
|
+
* main thread may be busy enough to miss a pong — e.g. mic recording —
|
|
132
|
+
* so a transient stall doesn't trigger a spurious reconnect.
|
|
133
|
+
*/
|
|
134
|
+
pause() {
|
|
135
|
+
this._paused = true;
|
|
136
|
+
if (this._pongTimer) {
|
|
137
|
+
this._clearTimeout(this._pongTimer);
|
|
138
|
+
this._pongTimer = null;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/** Resume normal pong-timeout enforcement (next ping re-arms it). */
|
|
143
|
+
resume() {
|
|
144
|
+
this._paused = false;
|
|
145
|
+
}
|
|
121
146
|
}
|
|
122
147
|
|
|
123
148
|
return HeartbeatWatchdog;
|
package/src/public/index.html
CHANGED
|
@@ -801,6 +801,7 @@
|
|
|
801
801
|
<script src="vscode-tunnel.js"></script>
|
|
802
802
|
<script src="app-tunnel.js"></script>
|
|
803
803
|
<script src="voice-handler.js"></script>
|
|
804
|
+
<script src="voice-frame.js"></script>
|
|
804
805
|
<script src="command-palette.js"></script>
|
|
805
806
|
<script src="extra-keys.js"></script>
|
|
806
807
|
<script src="input-overlay.js"></script>
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VoiceFrame
|
|
3
|
+
*
|
|
4
|
+
* Pure helpers for the client->server binary voice path, factored out of app.js
|
|
5
|
+
* so they can be unit-tested in Node. Mirrors the UMD shape of
|
|
6
|
+
* heartbeat-watchdog.js (CommonJS in tests, `window.VoiceFrame` in the browser).
|
|
7
|
+
*/
|
|
8
|
+
(function (global, factory) {
|
|
9
|
+
if (typeof module === 'object' && module.exports) {
|
|
10
|
+
module.exports = factory();
|
|
11
|
+
} else {
|
|
12
|
+
global.VoiceFrame = factory();
|
|
13
|
+
}
|
|
14
|
+
}(typeof self !== 'undefined' ? self : this, function () {
|
|
15
|
+
|
|
16
|
+
// Wire header: [ "VUP1" (4) ][ version (1) ][ type (1) ] then raw 16-bit PCM.
|
|
17
|
+
var MAGIC_V = 0x56; // 'V'
|
|
18
|
+
var MAGIC_U = 0x55; // 'U'
|
|
19
|
+
var MAGIC_P = 0x50; // 'P'
|
|
20
|
+
var MAGIC_1 = 0x31; // '1'
|
|
21
|
+
var PROTO_VERSION = 0x01;
|
|
22
|
+
var FRAME_TYPE_PCM = 0x01;
|
|
23
|
+
var HEADER_BYTES = 6;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Build a binary voice frame: the 6-byte header followed by the PCM bytes of
|
|
27
|
+
* `samples`. Uses byteOffset/byteLength so a subarray-backed Int16Array is
|
|
28
|
+
* copied correctly (not the whole underlying buffer).
|
|
29
|
+
*
|
|
30
|
+
* @param {Int16Array} samples
|
|
31
|
+
* @returns {Uint8Array}
|
|
32
|
+
*/
|
|
33
|
+
function buildVoiceFrame(samples) {
|
|
34
|
+
var pcm = new Uint8Array(samples.buffer, samples.byteOffset, samples.byteLength);
|
|
35
|
+
var frame = new Uint8Array(HEADER_BYTES + pcm.length);
|
|
36
|
+
frame[0] = MAGIC_V;
|
|
37
|
+
frame[1] = MAGIC_U;
|
|
38
|
+
frame[2] = MAGIC_P;
|
|
39
|
+
frame[3] = MAGIC_1;
|
|
40
|
+
frame[4] = PROTO_VERSION;
|
|
41
|
+
frame[5] = FRAME_TYPE_PCM;
|
|
42
|
+
frame.set(pcm, HEADER_BYTES);
|
|
43
|
+
return frame;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Classify a WebSocket close code for the voice path.
|
|
48
|
+
*
|
|
49
|
+
* 1009 (server rejected an oversized frame) and 1003 (unsupported/garbage
|
|
50
|
+
* binary) are server-initiated CLEAN closes, so `event.wasClean` is true and
|
|
51
|
+
* the default onclose path would SKIP reconnect and dead-end on
|
|
52
|
+
* "refresh the page". Treat them as recoverable: show a specific message and
|
|
53
|
+
* still reconnect (bounded by the normal attempt budget).
|
|
54
|
+
*
|
|
55
|
+
* @param {number} code
|
|
56
|
+
* @returns {{rejected: boolean, message: (string|null)}}
|
|
57
|
+
*/
|
|
58
|
+
function classifyVoiceClose(code) {
|
|
59
|
+
if (code === 1009 || code === 1003) {
|
|
60
|
+
return {
|
|
61
|
+
rejected: true,
|
|
62
|
+
message: 'A voice message was rejected by the server. Reconnecting…'
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
return { rejected: false, message: null };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return {
|
|
69
|
+
HEADER_BYTES: HEADER_BYTES,
|
|
70
|
+
buildVoiceFrame: buildVoiceFrame,
|
|
71
|
+
classifyVoiceClose: classifyVoiceClose
|
|
72
|
+
};
|
|
73
|
+
}));
|
|
@@ -625,6 +625,9 @@ function VoiceInputController(options) {
|
|
|
625
625
|
this._onTranscription = options.onTranscription || null;
|
|
626
626
|
this._onError = options.onError || null;
|
|
627
627
|
this._onCancel = options.onCancel || null;
|
|
628
|
+
// Optional predicate: if it returns false, a start request is ignored (e.g.
|
|
629
|
+
// a previous transcription is still pending). Gates both button + keyboard.
|
|
630
|
+
this._canStart = options.canStart || null;
|
|
628
631
|
|
|
629
632
|
this._recorder = null;
|
|
630
633
|
this._starting = false;
|
|
@@ -670,6 +673,7 @@ VoiceInputController.prototype.startRecording = function () {
|
|
|
670
673
|
var self = this;
|
|
671
674
|
if (self._starting) return;
|
|
672
675
|
if (self._recorder && self._recorder.isRecording) return;
|
|
676
|
+
if (self._canStart && !self._canStart()) return;
|
|
673
677
|
|
|
674
678
|
self._starting = true;
|
|
675
679
|
self._recorder = self._createRecorder();
|