shmakk 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent.js +0 -0
- package/src/services/voice.js +10 -7
- package/src/session.js +44 -18
- package/src/system-prompt.js +2 -0
package/package.json
CHANGED
package/src/agent.js
CHANGED
|
Binary file
|
package/src/services/voice.js
CHANGED
|
@@ -17,7 +17,7 @@ const PAD_START_SEC = parseFloat(process.env.SHMAKK_VOICE_PAD_START_SEC || '0.3'
|
|
|
17
17
|
// as noise/silence and never sent to Whisper. Tunable for noisy rooms.
|
|
18
18
|
const MIN_RMS = parseFloat(process.env.SHMAKK_VOICE_MIN_RMS || '0.003');
|
|
19
19
|
// Minimum captured speech duration in seconds (anything shorter is noise).
|
|
20
|
-
const MIN_SPEECH_SEC = parseFloat(process.env.SHMAKK_VOICE_MIN_SPEECH_SEC || '0.
|
|
20
|
+
const MIN_SPEECH_SEC = parseFloat(process.env.SHMAKK_VOICE_MIN_SPEECH_SEC || '0.5');
|
|
21
21
|
|
|
22
22
|
// Track active TTS playback process so we can kill it on interrupt
|
|
23
23
|
let _ttsProc = null;
|
|
@@ -206,7 +206,7 @@ function filterHallucination(text) {
|
|
|
206
206
|
return text;
|
|
207
207
|
}
|
|
208
208
|
|
|
209
|
-
async function recordAndTranscribe({ language, maxDurationSec, onStart, onStop } = {}) {
|
|
209
|
+
async function recordAndTranscribe({ language, maxDurationSec, onStart, onStop, suppressKillTts = false } = {}) {
|
|
210
210
|
ensureAudioDir();
|
|
211
211
|
const recorder = detectRecorder();
|
|
212
212
|
if (!recorder) {
|
|
@@ -215,8 +215,9 @@ async function recordAndTranscribe({ language, maxDurationSec, onStart, onStop }
|
|
|
215
215
|
);
|
|
216
216
|
}
|
|
217
217
|
|
|
218
|
-
// Kill TTS so the AI stops talking when user starts speaking
|
|
219
|
-
|
|
218
|
+
// Kill TTS so the AI stops talking when user starts speaking.
|
|
219
|
+
// Suppressed in the interrupt-listener path where TTS is intentionally running.
|
|
220
|
+
if (!suppressKillTts) _killTts();
|
|
220
221
|
|
|
221
222
|
const outFile = path.join(AUDIO_DIR, `voice-${Date.now()}.wav`);
|
|
222
223
|
if (onStart) onStart();
|
|
@@ -235,9 +236,11 @@ async function recordAndTranscribe({ language, maxDurationSec, onStart, onStop }
|
|
|
235
236
|
try {
|
|
236
237
|
const { rms, durationSec } = audioStats(outFile);
|
|
237
238
|
if (rms < MIN_RMS || durationSec < MIN_SPEECH_SEC) {
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
239
|
+
if (rms > 0.0001) { // only log if there was actual audio — skip pure silence
|
|
240
|
+
process.stderr.write(
|
|
241
|
+
`\r\x1b[90m[voice] skip (too quiet): rms=${rms.toFixed(4)} dur=${durationSec.toFixed(2)}s — tune with SHMAKK_VOICE_MIN_RMS\x1b[0m\n`,
|
|
242
|
+
);
|
|
243
|
+
}
|
|
241
244
|
cleanupFile(outFile);
|
|
242
245
|
return '';
|
|
243
246
|
}
|
package/src/session.js
CHANGED
|
@@ -35,6 +35,17 @@ const FLUSH_AFTER_BYTES = 8 * 1024;
|
|
|
35
35
|
// limit are dropped from the front, preserving the most recent context).
|
|
36
36
|
const HISTORY_MAX_ENTRIES = 30;
|
|
37
37
|
|
|
38
|
+
// Kitty terminal sends \x1b[99;5u instead of \x03 for Ctrl+C.
|
|
39
|
+
// Returns the byte index of the first Ctrl+C (either form), or -1.
|
|
40
|
+
const KITTY_CTRL_C = Buffer.from([0x1b, 0x5b, 0x39, 0x39, 0x3b, 0x35, 0x75]); // \x1b[99;5u
|
|
41
|
+
function findCtrlC(data) {
|
|
42
|
+
for (let i = 0; i < data.length; i++) {
|
|
43
|
+
if (data[i] === 0x03) return i;
|
|
44
|
+
if (data[i] === 0x1b && data.slice(i, i + KITTY_CTRL_C.length).equals(KITTY_CTRL_C)) return i;
|
|
45
|
+
}
|
|
46
|
+
return -1;
|
|
47
|
+
}
|
|
48
|
+
|
|
38
49
|
function isAbortError(e) {
|
|
39
50
|
return e && (e.name === 'AbortError' || /aborted/i.test(String(e.message || '')));
|
|
40
51
|
}
|
|
@@ -117,20 +128,14 @@ async function runOneSession(opts, registerSession) {
|
|
|
117
128
|
audit.append({ kind: 'session-start', workspace: cwd, pinnedWorkspace, review: !!opts.review, pid: process.pid });
|
|
118
129
|
|
|
119
130
|
// ── Global Ctrl+C handler (persistent bottom-of-stack) ──
|
|
120
|
-
//
|
|
121
|
-
//
|
|
122
|
-
// 1. Stop TTS playback if active
|
|
123
|
-
// 2. Kill voice recording if in progress
|
|
124
|
-
// 3. On a second Ctrl+C within 2s while --sts is active, exit the
|
|
125
|
-
// always-on voice loop so the user is left at a normal shell prompt.
|
|
126
|
-
// If none apply, pass through to the child shell.
|
|
127
|
-
// Ctrl+C = shut up. Kills TTS, recorder, and voice loop. Always.
|
|
128
|
-
// Ctrl+D exits the shell as normal (we never touch it).
|
|
131
|
+
// Ctrl+C = shut up. Kills TTS, recorder, and voice loop immediately.
|
|
132
|
+
// Ctrl+D exits the shell as normal (we never intercept it).
|
|
129
133
|
session.captureStdin((data) => {
|
|
130
|
-
|
|
131
|
-
|
|
134
|
+
if (opts.tts || opts.stt || opts.sts) {
|
|
135
|
+
const cut = findCtrlC(data);
|
|
136
|
+
if (cut !== -1) {
|
|
132
137
|
try { fullVoiceTeardown(); } catch {}
|
|
133
|
-
if (
|
|
138
|
+
if (cut > 0) session.childWrite(data.slice(0, cut));
|
|
134
139
|
session.childWrite('\r');
|
|
135
140
|
return;
|
|
136
141
|
}
|
|
@@ -175,10 +180,7 @@ async function runOneSession(opts, registerSession) {
|
|
|
175
180
|
const ctrl = new AbortController();
|
|
176
181
|
setMaxListeners(0, ctrl.signal);
|
|
177
182
|
const release = session.captureStdin((data) => {
|
|
178
|
-
|
|
179
|
-
for (let i = 0; i < data.length; i++) {
|
|
180
|
-
if (data[i] === 0x03) { cut = i; break; }
|
|
181
|
-
}
|
|
183
|
+
const cut = findCtrlC(data);
|
|
182
184
|
if (cut === -1) {
|
|
183
185
|
session.childWrite(data);
|
|
184
186
|
return;
|
|
@@ -250,6 +252,7 @@ async function runOneSession(opts, registerSession) {
|
|
|
250
252
|
history,
|
|
251
253
|
profile: opts.profile,
|
|
252
254
|
colors: colorsEnabled,
|
|
255
|
+
voiceMode: true,
|
|
253
256
|
});
|
|
254
257
|
history = trimHistory(updated || history);
|
|
255
258
|
if ((opts.tts || opts.sts) && updated && updated.length) {
|
|
@@ -272,7 +275,30 @@ async function runOneSession(opts, registerSession) {
|
|
|
272
275
|
if (session._stsFlags) session._stsFlags.setTtsSpeaking(false);
|
|
273
276
|
if (err && opts.debug) process.stderr.write(`[shmakk] tts: ${err.message}\n`);
|
|
274
277
|
};
|
|
275
|
-
|
|
278
|
+
// Parallel interrupt listener — lets user say "stop" to cut TTS.
|
|
279
|
+
// suppressKillTts=true so recording alongside TTS doesn't immediately kill it.
|
|
280
|
+
// Loop is gated on myGen so it stops the moment settle() fires.
|
|
281
|
+
const STOP_WORDS = new Set(['stop', 'quiet', 'shut up', 'silence', 'enough', 'cancel']);
|
|
282
|
+
let interruptListening = true;
|
|
283
|
+
const listenForInterrupt = async () => {
|
|
284
|
+
const vs = getVoiceService();
|
|
285
|
+
while (interruptListening && session._ttsGen === myGen) {
|
|
286
|
+
try {
|
|
287
|
+
const heard = await vs.recordAndTranscribe({ maxDurationSec: 2, suppressKillTts: true });
|
|
288
|
+
if (!heard) continue;
|
|
289
|
+
if (STOP_WORDS.has(heard.toLowerCase().trim().replace(/[.!?]$/, ''))) {
|
|
290
|
+
try { fullVoiceTeardown(); } catch {}
|
|
291
|
+
break;
|
|
292
|
+
}
|
|
293
|
+
} catch { break; }
|
|
294
|
+
}
|
|
295
|
+
};
|
|
296
|
+
listenForInterrupt().catch(() => {});
|
|
297
|
+
const settleAndStop = (err) => {
|
|
298
|
+
interruptListening = false; // stop interrupt loop before unpausing voice loop
|
|
299
|
+
settle(err);
|
|
300
|
+
};
|
|
301
|
+
tts.speak(reply, { voice: ttsVoice }).then(() => settleAndStop()).catch(settleAndStop);
|
|
276
302
|
}
|
|
277
303
|
}
|
|
278
304
|
}
|
|
@@ -372,7 +398,7 @@ async function runOneSession(opts, registerSession) {
|
|
|
372
398
|
let recordingDone = false;
|
|
373
399
|
const release = session.captureStdin((data) => {
|
|
374
400
|
for (let i = 0; i < data.length; i++) {
|
|
375
|
-
if (data[i] === 0x03 || data[i] === 0x0f) {
|
|
401
|
+
if (data[i] === 0x03 || data[i] === 0x0f || findCtrlC(data) !== -1) {
|
|
376
402
|
recordingDone = true;
|
|
377
403
|
// Kill the recorder process immediately
|
|
378
404
|
try { vs._killRecorder(); } catch {}
|
package/src/system-prompt.js
CHANGED
|
@@ -10,6 +10,7 @@ function buildSystemPrompt({
|
|
|
10
10
|
activeSkillText,
|
|
11
11
|
maxDiscoveryCallsPerRound,
|
|
12
12
|
runtimeProfile,
|
|
13
|
+
voiceMode = false,
|
|
13
14
|
}) {
|
|
14
15
|
return `You are an expert AI coding assistant running inside shmakk.
|
|
15
16
|
|
|
@@ -255,6 +256,7 @@ Otherwise output only:
|
|
|
255
256
|
{"shmakk_actions":[{"tool":"tool_name","args":{...}}]}
|
|
256
257
|
${indexHint}
|
|
257
258
|
${activeSkillText ? `\n\n${activeSkillText}` : ''}
|
|
259
|
+
${voiceMode ? `\n\nVOICE MODE: The user is speaking to you. Keep every reply under 2 sentences. No bullet points, no markdown, no code blocks unless explicitly asked. Speak like a human, not a document.` : ''}
|
|
258
260
|
`;
|
|
259
261
|
}
|
|
260
262
|
|