shmakk 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "shmakk",
3
- "version": "1.1.0",
3
+ "version": "1.1.1",
4
4
  "description": "AI-supervised terminal wrapper — command correction, tool-driven tasks, safety controls",
5
5
  "license": "MIT",
6
6
  "keywords": [
package/src/agent.js CHANGED
Binary file
@@ -17,7 +17,7 @@ const PAD_START_SEC = parseFloat(process.env.SHMAKK_VOICE_PAD_START_SEC || '0.3'
17
17
  // as noise/silence and never sent to Whisper. Tunable for noisy rooms.
18
18
  const MIN_RMS = parseFloat(process.env.SHMAKK_VOICE_MIN_RMS || '0.003');
19
19
  // Minimum captured speech duration in seconds (anything shorter is noise).
20
- const MIN_SPEECH_SEC = parseFloat(process.env.SHMAKK_VOICE_MIN_SPEECH_SEC || '0.8');
20
+ const MIN_SPEECH_SEC = parseFloat(process.env.SHMAKK_VOICE_MIN_SPEECH_SEC || '0.5');
21
21
 
22
22
  // Track active TTS playback process so we can kill it on interrupt
23
23
  let _ttsProc = null;
@@ -206,7 +206,7 @@ function filterHallucination(text) {
206
206
  return text;
207
207
  }
208
208
 
209
- async function recordAndTranscribe({ language, maxDurationSec, onStart, onStop } = {}) {
209
+ async function recordAndTranscribe({ language, maxDurationSec, onStart, onStop, suppressKillTts = false } = {}) {
210
210
  ensureAudioDir();
211
211
  const recorder = detectRecorder();
212
212
  if (!recorder) {
@@ -215,8 +215,9 @@ async function recordAndTranscribe({ language, maxDurationSec, onStart, onStop }
215
215
  );
216
216
  }
217
217
 
218
- // Kill TTS so the AI stops talking when user starts speaking
219
- _killTts();
218
+ // Kill TTS so the AI stops talking when user starts speaking.
219
+ // Suppressed in the interrupt-listener path where TTS is intentionally running.
220
+ if (!suppressKillTts) _killTts();
220
221
 
221
222
  const outFile = path.join(AUDIO_DIR, `voice-${Date.now()}.wav`);
222
223
  if (onStart) onStart();
@@ -235,9 +236,11 @@ async function recordAndTranscribe({ language, maxDurationSec, onStart, onStop }
235
236
  try {
236
237
  const { rms, durationSec } = audioStats(outFile);
237
238
  if (rms < MIN_RMS || durationSec < MIN_SPEECH_SEC) {
238
- process.stderr.write(
239
- `\r\x1b[90m[voice] skip (too quiet): rms=${rms.toFixed(4)} dur=${durationSec.toFixed(2)}s — tune with SHMAKK_VOICE_MIN_RMS\x1b[0m\n`,
240
- );
239
+ if (rms > 0.0001) { // only log if there was actual audio — skip pure silence
240
+ process.stderr.write(
241
+ `\r\x1b[90m[voice] skip (too quiet): rms=${rms.toFixed(4)} dur=${durationSec.toFixed(2)}s — tune with SHMAKK_VOICE_MIN_RMS\x1b[0m\n`,
242
+ );
243
+ }
241
244
  cleanupFile(outFile);
242
245
  return '';
243
246
  }
package/src/session.js CHANGED
@@ -35,6 +35,17 @@ const FLUSH_AFTER_BYTES = 8 * 1024;
35
35
  // limit are dropped from the front, preserving the most recent context).
36
36
  const HISTORY_MAX_ENTRIES = 30;
37
37
 
38
+ // Kitty terminal sends \x1b[99;5u instead of \x03 for Ctrl+C.
39
+ // Returns the byte index of the first Ctrl+C (either form), or -1.
40
+ const KITTY_CTRL_C = Buffer.from([0x1b, 0x5b, 0x39, 0x39, 0x3b, 0x35, 0x75]); // \x1b[99;5u
41
+ function findCtrlC(data) {
42
+ for (let i = 0; i < data.length; i++) {
43
+ if (data[i] === 0x03) return i;
44
+ if (data[i] === 0x1b && data.slice(i, i + KITTY_CTRL_C.length).equals(KITTY_CTRL_C)) return i;
45
+ }
46
+ return -1;
47
+ }
48
+
38
49
  function isAbortError(e) {
39
50
  return e && (e.name === 'AbortError' || /aborted/i.test(String(e.message || '')));
40
51
  }
@@ -117,20 +128,14 @@ async function runOneSession(opts, registerSession) {
117
128
  audit.append({ kind: 'session-start', workspace: cwd, pinnedWorkspace, review: !!opts.review, pid: process.pid });
118
129
 
119
130
  // ── Global Ctrl+C handler (persistent bottom-of-stack) ──
120
- // Sits below any AI-task handler. When no AI is running, this is the
121
- // top handler and intercepts Ctrl+C to:
122
- // 1. Stop TTS playback if active
123
- // 2. Kill voice recording if in progress
124
- // 3. On a second Ctrl+C within 2s while --sts is active, exit the
125
- // always-on voice loop so the user is left at a normal shell prompt.
126
- // If none apply, pass through to the child shell.
127
- // Ctrl+C = shut up. Kills TTS, recorder, and voice loop. Always.
128
- // Ctrl+D exits the shell as normal (we never touch it).
131
+ // Ctrl+C = shut up. Kills TTS, recorder, and voice loop immediately.
132
+ // Ctrl+D exits the shell as normal (we never intercept it).
129
133
  session.captureStdin((data) => {
130
- for (let i = 0; i < data.length; i++) {
131
- if (data[i] === 0x03 && (opts.tts || opts.stt || opts.sts)) {
134
+ if (opts.tts || opts.stt || opts.sts) {
135
+ const cut = findCtrlC(data);
136
+ if (cut !== -1) {
132
137
  try { fullVoiceTeardown(); } catch {}
133
- if (i > 0) session.childWrite(data.slice(0, i));
138
+ if (cut > 0) session.childWrite(data.slice(0, cut));
134
139
  session.childWrite('\r');
135
140
  return;
136
141
  }
@@ -175,10 +180,7 @@ async function runOneSession(opts, registerSession) {
175
180
  const ctrl = new AbortController();
176
181
  setMaxListeners(0, ctrl.signal);
177
182
  const release = session.captureStdin((data) => {
178
- let cut = -1;
179
- for (let i = 0; i < data.length; i++) {
180
- if (data[i] === 0x03) { cut = i; break; }
181
- }
183
+ const cut = findCtrlC(data);
182
184
  if (cut === -1) {
183
185
  session.childWrite(data);
184
186
  return;
@@ -250,6 +252,7 @@ async function runOneSession(opts, registerSession) {
250
252
  history,
251
253
  profile: opts.profile,
252
254
  colors: colorsEnabled,
255
+ voiceMode: true,
253
256
  });
254
257
  history = trimHistory(updated || history);
255
258
  if ((opts.tts || opts.sts) && updated && updated.length) {
@@ -272,7 +275,30 @@ async function runOneSession(opts, registerSession) {
272
275
  if (session._stsFlags) session._stsFlags.setTtsSpeaking(false);
273
276
  if (err && opts.debug) process.stderr.write(`[shmakk] tts: ${err.message}\n`);
274
277
  };
275
- tts.speak(reply, { voice: ttsVoice }).then(() => settle()).catch(settle);
278
+ // Parallel interrupt listener lets user say "stop" to cut TTS.
279
+ // suppressKillTts=true so recording alongside TTS doesn't immediately kill it.
280
+ // Loop is gated on myGen so it stops the moment settle() fires.
281
+ const STOP_WORDS = new Set(['stop', 'quiet', 'shut up', 'silence', 'enough', 'cancel']);
282
+ let interruptListening = true;
283
+ const listenForInterrupt = async () => {
284
+ const vs = getVoiceService();
285
+ while (interruptListening && session._ttsGen === myGen) {
286
+ try {
287
+ const heard = await vs.recordAndTranscribe({ maxDurationSec: 2, suppressKillTts: true });
288
+ if (!heard) continue;
289
+ if (STOP_WORDS.has(heard.toLowerCase().trim().replace(/[.!?]$/, ''))) {
290
+ try { fullVoiceTeardown(); } catch {}
291
+ break;
292
+ }
293
+ } catch { break; }
294
+ }
295
+ };
296
+ listenForInterrupt().catch(() => {});
297
+ const settleAndStop = (err) => {
298
+ interruptListening = false; // stop interrupt loop before unpausing voice loop
299
+ settle(err);
300
+ };
301
+ tts.speak(reply, { voice: ttsVoice }).then(() => settleAndStop()).catch(settleAndStop);
276
302
  }
277
303
  }
278
304
  }
@@ -372,7 +398,7 @@ async function runOneSession(opts, registerSession) {
372
398
  let recordingDone = false;
373
399
  const release = session.captureStdin((data) => {
374
400
  for (let i = 0; i < data.length; i++) {
375
- if (data[i] === 0x03 || data[i] === 0x0f) {
401
+ if (data[i] === 0x03 || data[i] === 0x0f || findCtrlC(data) !== -1) {
376
402
  recordingDone = true;
377
403
  // Kill the recorder process immediately
378
404
  try { vs._killRecorder(); } catch {}
@@ -10,6 +10,7 @@ function buildSystemPrompt({
10
10
  activeSkillText,
11
11
  maxDiscoveryCallsPerRound,
12
12
  runtimeProfile,
13
+ voiceMode = false,
13
14
  }) {
14
15
  return `You are an expert AI coding assistant running inside shmakk.
15
16
 
@@ -255,6 +256,7 @@ Otherwise output only:
255
256
  {"shmakk_actions":[{"tool":"tool_name","args":{...}}]}
256
257
  ${indexHint}
257
258
  ${activeSkillText ? `\n\n${activeSkillText}` : ''}
259
+ ${voiceMode ? `\n\nVOICE MODE: The user is speaking to you. Keep every reply under 2 sentences. No bullet points, no markdown, no code blocks unless explicitly asked. Speak like a human, not a document.` : ''}
258
260
  `;
259
261
  }
260
262