shmakk 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/safety.js ADDED
@@ -0,0 +1,77 @@
1
+ const path = require('path');
2
+ const os = require('os');
3
+
4
+ // run-tool / shell-command danger patterns. Matched against the literal cmd
5
+ // string the agent passes to `run`. If matched → 'unsafe' (always prompts).
6
+ const DANGER_RE = new RegExp([
7
+ // privilege escalation
8
+ '\\bsudo\\b', '\\bsu\\s', '\\bdoas\\b', '\\bpkexec\\b',
9
+ // any deletion (recursive or not — user wants confirmation on every delete)
10
+ '\\brm\\b(?!\\s+--?(version|help))', '\\brmdir\\b', '\\bunlink\\b',
11
+ '\\btrash(-put)?\\b', '\\bgio\\s+trash\\b',
12
+ '\\bchmod\\s+-R\\b', '\\bchown\\s+-R\\b',
13
+ '\\bfind\\b[^\\n]*-delete\\b',
14
+ // disk / fs
15
+ '\\bmkfs\\b', '\\bdd\\s+if=', '\\bshred\\b', '\\bwipe\\b',
16
+ // pipe-to-shell
17
+ '\\|\\s*(sh|bash|zsh|fish)\\b',
18
+ // redirect into system paths
19
+ '>\\s*/(?!tmp|home|var/tmp|dev/null)',
20
+ // personal config / display state mutation (lessons learned)
21
+ '\\bsetxkbmap\\b', '\\blocalectl\\s+set\\b', '\\bgsettings\\s+set\\b',
22
+ '\\bxset\\b', '\\bxrandr\\b', '\\bchsh\\b', '\\bcrontab\\s+-r\\b',
23
+ '\\bsystemctl\\b(?!\\s+(status|cat|show|is-))', // status-y reads ok
24
+ '\\bgit\\s+config\\s+--global\\b',
25
+ // package manager: global / system installs run arbitrary code
26
+ '\\bnpm\\s+(i|install|add)\\s+(-g|--global)\\b',
27
+ '\\bpip\\d?\\s+install\\b', '\\bpipx\\s+install\\b',
28
+ '\\bcargo\\s+install\\b', '\\bgo\\s+install\\b', '\\bgem\\s+install\\b',
29
+ '\\bbrew\\s+install\\b',
30
+ '\\bapt(-get)?\\s+(install|remove|purge|upgrade|dist-upgrade)\\b',
31
+ '\\bdnf\\s+(install|remove|upgrade)\\b',
32
+ '\\bpacman\\s+-[A-Z]*[SR]\\b', '\\byay\\s+-S\\b', '\\bparu\\s+-S\\b',
33
+ '\\bzypper\\s+(in|install|rm|remove)\\b',
34
+ ].join('|'), 'i');
35
+
36
+ function classifyRunCommand(cmd) {
37
+ if (!cmd) return 'safe';
38
+ return DANGER_RE.test(cmd) ? 'unsafe' : 'safe';
39
+ }
40
+
41
+ // Paths whose read OR write should always prompt (even in auto mode).
42
+ // Match against the *resolved relative path* we present to the agent, plus
43
+ // the absolute path so e.g. ~/.ssh works when workspace is ~.
44
+ const SECRET_RE = [
45
+ /(^|\/)\.env(\..*)?$/,
46
+ /(^|\/)\.netrc$/,
47
+ /(^|\/)\.npmrc$/,
48
+ /(^|\/)\.pypirc$/,
49
+ /(^|\/)\.gem\/credentials$/,
50
+ /(^|\/)\.ssh(\/|$)/,
51
+ /(^|\/)\.gnupg(\/|$)/,
52
+ /(^|\/)\.aws(\/|$)/,
53
+ /(^|\/)\.kube\/config(\/|$)/,
54
+ /(^|\/)\.docker\/config\.json$/,
55
+ /(^|\/)\.config\/gh(\/|$)/,
56
+ /\.pem$/i,
57
+ /\.key$/i,
58
+ /(^|\/)id_(rsa|ed25519|dsa|ecdsa)(\.pub)?$/,
59
+ /(^|\/)credentials(\.json|\.yaml|\.yml)?$/i,
60
+ ];
61
+
62
+ function isSecretPath(p) {
63
+ if (!p) return false;
64
+ return SECRET_RE.some((r) => r.test(p));
65
+ }
66
+
67
+ function workspaceWarning(workspace) {
68
+ if (!workspace) return null;
69
+ const r = path.resolve(workspace);
70
+ const home = os.homedir();
71
+ if (r === '/' || r === '/etc' || r === home) {
72
+ return `workspace is ${r} — that's broad. Consider \`shmakk --workspace <project-dir>\` to keep AI scope smaller.`;
73
+ }
74
+ return null;
75
+ }
76
+
77
+ module.exports = { classifyRunCommand, isSecretPath, workspaceWarning, DANGER_RE };
@@ -0,0 +1,131 @@
1
+ // Speech-to-text via Whisper ONNX using @huggingface/transformers.
2
+ // No Python, no server — 100% JS, runs locally in-process.
3
+ // Model auto-downloads on first use (~45MB quantized whisper-tiny).
4
+
5
+ const path = require('path');
6
+ const fs = require('fs');
7
+ const { WaveFile } = require('wavefile');
8
+
9
+ let _pipeline = null;
10
+ let _instance = null;
11
+ let _loadPromise = null;
12
+ let _env = null;
13
+
14
+ async function _ensureModel() {
15
+ if (_instance) return _instance;
16
+
17
+ // Prevent concurrent loads
18
+ if (_loadPromise) return _loadPromise;
19
+
20
+ _loadPromise = (async () => {
21
+ let mod;
22
+ try {
23
+ mod = await import('@huggingface/transformers');
24
+ } catch {
25
+ throw new Error(
26
+ 'Voice deps not installed. Run: npm run setup:voice\n' +
27
+ 'Or: npm install --include=optional'
28
+ );
29
+ }
30
+ _pipeline = mod.pipeline;
31
+ _env = mod.env;
32
+
33
+ // Allow cache dir override
34
+ if (process.env.SHMAKK_HF_CACHE) {
35
+ _env.cacheDir = process.env.SHMAKK_HF_CACHE;
36
+ }
37
+
38
+ // Don't spam progress to stdout
39
+ _env.allowLocalModels = false;
40
+
41
+ _instance = await _pipeline(
42
+ 'automatic-speech-recognition',
43
+ 'Xenova/whisper-base',
44
+ );
45
+ return _instance;
46
+ })();
47
+
48
+ return _loadPromise;
49
+ }
50
+
51
+ /**
52
+ * Decode a WAV file to a mono 16kHz Float32Array, the format
53
+ * required by the transformers.js Whisper pipeline in Node.js.
54
+ */
55
+ function _wavToFloat32(audioPath) {
56
+ const buffer = fs.readFileSync(audioPath);
57
+ const wav = new WaveFile(buffer);
58
+
59
+ wav.toBitDepth('32f');
60
+ wav.toSampleRate(16000);
61
+
62
+ let audioData = wav.getSamples();
63
+
64
+ // If multi-channel, merge to mono
65
+ if (Array.isArray(audioData)) {
66
+ if (audioData.length > 1) {
67
+ const SCALING_FACTOR = Math.sqrt(2);
68
+ for (let i = 0; i < audioData[0].length; ++i) {
69
+ audioData[0][i] =
70
+ (SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
71
+ }
72
+ }
73
+ audioData = audioData[0];
74
+ }
75
+
76
+ // Ensure Float32Array (getSamples may return a regular Array)
77
+ if (!(audioData instanceof Float32Array)) {
78
+ audioData = new Float32Array(audioData);
79
+ }
80
+
81
+ return audioData;
82
+ }
83
+
84
+ /**
85
+ * Transcribe a WAV file to text.
86
+ * @param {string} audioPath - path to WAV audio file
87
+ * @param {object} [opts]
88
+ * @param {string} [opts.language] - ISO language code hint (e.g. "en")
89
+ * @returns {Promise<string>} transcribed text
90
+ */
91
+ async function transcribe(audioPath, opts = {}) {
92
+ if (!fs.existsSync(audioPath)) {
93
+ throw new Error(`Audio file not found: ${audioPath}`);
94
+ }
95
+
96
+ const transcriber = await _ensureModel();
97
+
98
+ // Decode WAV to Float32Array — pipeline can't use AudioContext in Node.js.
99
+ // See https://huggingface.co/docs/transformers.js/guides/node-audio-processing
100
+ const audioData = _wavToFloat32(audioPath);
101
+
102
+ const kwargs = { language: opts.language || 'english' };
103
+
104
+ const result = await transcriber(audioData, kwargs);
105
+ return (result.text || '').trim();
106
+ }
107
+
108
+ /**
109
+ * Check if the STT model files are already cached locally.
110
+ * Used for pre-flight warnings.
111
+ */
112
+ function isCached() {
113
+ try {
114
+ // @huggingface/transformers caches in ~/.cache/huggingface by default
115
+ const hfHome = process.env.HF_HOME
116
+ || process.env.XDG_CACHE_HOME
117
+ || path.join(require('os').homedir(), '.cache', 'huggingface');
118
+
119
+ // Whisper-tiny has at minimum the config and quantized ONNX models
120
+ const modelDir = path.join(
121
+ hfHome,
122
+ 'transformers',
123
+ 'models--Xenova--whisper-base',
124
+ );
125
+ return fs.existsSync(modelDir);
126
+ } catch {
127
+ return false;
128
+ }
129
+ }
130
+
131
+ module.exports = { transcribe, isCached, _ensureModel };
@@ -0,0 +1,307 @@
1
+ // Text-to-speech via Kokoro ONNX using kokoro-js.
2
+ // No Python, no server — 100% JS, runs locally in-process.
3
+ // Model auto-downloads on first use (~334MB quantized Kokoro-82M).
4
+
5
+ const path = require('path');
6
+ const fs = require('fs');
7
+ const os = require('os');
8
+ const { spawn, spawnSync } = require('child_process');
9
+ const { WaveFile } = require('wavefile');
10
+
11
+ let _tts = null;
12
+ let _loadPromise = null;
13
+ // Track current speak operation for interrupt support
14
+ let _currentSpeakCtrl = null;
15
+ let _currentSpeakAborted = false;
16
+
17
+ /**
18
+ * Stop any active TTS playback immediately.
19
+ * Kills the audio player process and cancels sentence streaming.
20
+ */
21
+ function stopSpeaking() {
22
+ _currentSpeakAborted = true;
23
+ if (_currentSpeakCtrl) {
24
+ try { _currentSpeakCtrl.abort(); } catch {}
25
+ _currentSpeakCtrl = null;
26
+ }
27
+ // Also kill the raw audio player process
28
+ try { require('./voice')._setTtsProc(null); } catch {}
29
+ }
30
+
31
+ async function _ensureModel() {
32
+ if (_tts) return _tts;
33
+
34
+ if (_loadPromise) return _loadPromise;
35
+
36
+ _loadPromise = (async () => {
37
+ let KokoroTTS;
38
+ try {
39
+ ({ KokoroTTS } = require('kokoro-js'));
40
+ } catch {
41
+ throw new Error(
42
+ 'Voice deps not installed. Run: npm run setup:voice\n' +
43
+ 'Or: npm install --include=optional'
44
+ );
45
+ }
46
+ _tts = await KokoroTTS.from_pretrained(
47
+ 'onnx-community/Kokoro-82M-v1.0-ONNX',
48
+ { dtype: process.env.SHMAKK_TTS_DTYPE || 'fp16' },
49
+ );
50
+ return _tts;
51
+ })();
52
+
53
+ return _loadPromise;
54
+ }
55
+
56
+ /**
57
+ * Pick a voice deterministically based on current time.
58
+ * Changes every 2-5 hours (varied per day) so it feels random but is consistent
59
+ * within a session. No state needed — pure function of wall-clock time.
60
+ */
61
+ function _scheduleVoice(voices) {
62
+ const now = new Date();
63
+ const day = now.getFullYear() * 10000 + (now.getMonth() + 1) * 100 + now.getDate();
64
+
65
+ // Use day as seed to determine bucket sizes for this day (2-5h each)
66
+ // Simple LCG-style hash
67
+ const daySeed = (day * 2654435761) >>> 0;
68
+
69
+ // Build time buckets for the day using the day seed
70
+ let bucketStart = 0;
71
+ let bucket = 0;
72
+ let bucketSeed = daySeed;
73
+ const minuteOfDay = now.getHours() * 60 + now.getMinutes();
74
+
75
+ while (bucketStart < minuteOfDay) {
76
+ bucketSeed = (bucketSeed * 1664525 + 1013904223) >>> 0;
77
+ const bucketMinutes = 120 + (bucketSeed % 180); // 2h to 5h in minutes
78
+ if (bucketStart + bucketMinutes > minuteOfDay) break;
79
+ bucketStart += bucketMinutes;
80
+ bucket++;
81
+ }
82
+
83
+ // Pick voice from bucket + day seed
84
+ const voiceSeed = (daySeed ^ (bucket * 2246822519)) >>> 0;
85
+ return voices[voiceSeed % voices.length].id;
86
+ }
87
+
88
+ /**
89
+ * List available Kokoro voices.
90
+ * @returns {Promise<Array<{id: string, name: string, language: string, gender: string}>>}
91
+ */
92
+ async function listVoices() {
93
+ const tts = await _ensureModel();
94
+ const voices = [];
95
+ for (const [id, meta] of Object.entries(tts.voices)) {
96
+ voices.push({
97
+ id,
98
+ name: meta.name || id,
99
+ language: meta.language || 'unknown',
100
+ gender: (meta.gender || '').toLowerCase(),
101
+ });
102
+ }
103
+ return voices;
104
+ }
105
+
106
+ /**
107
+ * Write a Float32Array as a WAV file.
108
+ */
109
+ function _writeWav(floatData, sampleRate, outputPath) {
110
+ const wav = new WaveFile();
111
+ wav.fromScratch(1, sampleRate, '32f', floatData);
112
+ fs.writeFileSync(outputPath, wav.toBuffer());
113
+ }
114
+
115
+ /**
116
+ * Generate speech from text.
117
+ *
118
+ * @param {string} text - text to speak
119
+ * @param {object} [opts]
120
+ * @param {string} [opts.voice] - voice name (default: "af_heart")
121
+ * @param {string} [opts.outputPath] - WAV output path (default: temp file)
122
+ * @returns {Promise<{audioPath: string, voice: string}>}
123
+ */
124
+ async function generate(text, opts = {}) {
125
+ if (!text || !text.trim()) {
126
+ throw new Error('Empty text for TTS');
127
+ }
128
+
129
+ const voice = opts.voice || process.env.SHMAKK_TTS_VOICE || 'af_heart';
130
+ const tts = await _ensureModel();
131
+
132
+ // Validate voice
133
+ if (!tts.voices[voice]) {
134
+ // Try to find a matching voice (case-insensitive)
135
+ const lower = voice.toLowerCase();
136
+ const match = Object.keys(tts.voices).find(
137
+ (v) => v.toLowerCase() === lower,
138
+ );
139
+ if (match) {
140
+ opts.voice = match;
141
+ } else {
142
+ const available = Object.keys(tts.voices).slice(0, 8).join(', ');
143
+ throw new Error(
144
+ `Unknown voice: ${voice}. Available: ${available}...`,
145
+ );
146
+ }
147
+ }
148
+
149
+ const speed = parseFloat(opts.speed || process.env.SHMAKK_TTS_SPEED || '1.5');
150
+ const result = await tts.generate(text, { voice: opts.voice || voice, speed });
151
+
152
+ // result is a RawAudio with .audio (Float32Array) and .sampling_rate
153
+ const audioData = result.audio;
154
+ const sampleRate = result.sampling_rate || 24000;
155
+
156
+ const outPath =
157
+ opts.outputPath ||
158
+ path.join(os.tmpdir(), `shmakk-tts-${Date.now()}.wav`);
159
+
160
+ _writeWav(audioData, sampleRate, outPath);
161
+ return { audioPath: outPath, voice: opts.voice || voice };
162
+ }
163
+
164
+ /**
165
+ * Check whether a system audio player is available.
166
+ */
167
+ function playerAvailable(name) {
168
+ try {
169
+ const r = spawnSync('which', [name], {
170
+ stdio: ['ignore', 'pipe', 'ignore'],
171
+ timeout: 2000,
172
+ });
173
+ return r.status === 0 && r.stdout && r.stdout.length > 0;
174
+ } catch {
175
+ return false;
176
+ }
177
+ }
178
+
179
+ /**
180
+ * Play a WAV file through system audio.
181
+ * Detects aplay (ALSA), paplay (PulseAudio), afplay (macOS).
182
+ * Returns true if a player was found and launched.
183
+ */
184
+ function playAudio(audioPath) {
185
+ if (!fs.existsSync(audioPath)) {
186
+ throw new Error(`Audio file not found: ${audioPath}`);
187
+ }
188
+
189
+ let cmd, args;
190
+
191
+ // macOS
192
+ if (playerAvailable('afplay')) {
193
+ cmd = 'afplay';
194
+ args = [audioPath];
195
+ }
196
+
197
+ // Linux: PulseAudio
198
+ if (!cmd && playerAvailable('paplay')) {
199
+ cmd = 'paplay';
200
+ args = [audioPath];
201
+ }
202
+
203
+ // Linux: ALSA
204
+ if (!cmd && playerAvailable('aplay')) {
205
+ cmd = 'aplay';
206
+ args = ['-q', audioPath];
207
+ }
208
+
209
+ if (!cmd) {
210
+ return Promise.resolve(false);
211
+ }
212
+
213
+ return new Promise((resolve) => {
214
+ const proc = spawn(cmd, args, {
215
+ stdio: 'ignore',
216
+ });
217
+ // Track in voice.js for Ctrl+C interrupt kill
218
+ try { require('./voice')._setTtsProc(proc); } catch {}
219
+ proc.on('exit', () => {
220
+ try { require('./voice')._setTtsProc(null); } catch {}
221
+ resolve(true);
222
+ });
223
+ proc.on('error', () => {
224
+ try { require('./voice')._setTtsProc(null); } catch {}
225
+ resolve(false);
226
+ });
227
+ });
228
+ }
229
+
230
+ /**
231
+ * Split text into sentences for streaming TTS.
232
+ */
233
+ function splitSentences(text) {
234
+ return text
235
+ .split(/(?<=[.!?])\s+|\n+/)
236
+ .map(s => s.trim())
237
+ .filter(s => s.length > 0);
238
+ }
239
+
240
+ /**
241
+ * Speak text sentence by sentence — first sentence starts playing
242
+ * immediately while the rest are generated, cutting perceived latency.
243
+ * Returns a promise that resolves when all audio is queued.
244
+ */
245
+ async function speakStreaming(text, opts = {}) {
246
+ // Cancel any previous speak operation still in progress
247
+ stopSpeaking();
248
+ _currentSpeakAborted = false;
249
+ _currentSpeakCtrl = new AbortController();
250
+ const signal = _currentSpeakCtrl.signal;
251
+
252
+ const tts = await _ensureModel();
253
+ const voices = Object.keys(tts.voices);
254
+ // Use scheduled voice unless explicitly overridden via env or opts
255
+ const voice = opts.voice
256
+ || process.env.SHMAKK_TTS_VOICE
257
+ || _scheduleVoice(voices.map(id => ({ id })));
258
+ const sentences = splitSentences(text);
259
+ if (!sentences.length) return;
260
+
261
+ // Generate and play each sentence sequentially but start playing
262
+ // the first one as soon as it's ready without waiting for the rest.
263
+ for (const sentence of sentences) {
264
+ // Check if interrupted before each sentence
265
+ if (signal.aborted || _currentSpeakAborted) break;
266
+ const { _isTtsKilled } = require('./voice');
267
+ if (_isTtsKilled && _isTtsKilled()) break;
268
+ try {
269
+ const { audioPath } = await generate(sentence, { voice });
270
+ if (signal.aborted || _currentSpeakAborted) {
271
+ try { fs.unlinkSync(audioPath); } catch {}
272
+ break;
273
+ }
274
+ await playAudio(audioPath);
275
+ // Clean up after a delay
276
+ setTimeout(() => { try { fs.unlinkSync(audioPath); } catch {} }, 10000);
277
+ } catch {}
278
+ }
279
+ _currentSpeakCtrl = null;
280
+ }
281
+
282
+ async function speak(text, opts = {}) {
283
+ return speakStreaming(text, opts);
284
+ }
285
+
286
+ /**
287
+ * Check whether model files are cached locally.
288
+ */
289
+ function isCached() {
290
+ try {
291
+ const hfHome =
292
+ process.env.HF_HOME ||
293
+ process.env.XDG_CACHE_HOME ||
294
+ path.join(os.homedir(), '.cache', 'huggingface');
295
+
296
+ const modelDir = path.join(
297
+ hfHome,
298
+ 'transformers',
299
+ 'models--onnx-community--Kokoro-82M-v1.0-ONNX',
300
+ );
301
+ return fs.existsSync(modelDir);
302
+ } catch {
303
+ return false;
304
+ }
305
+ }
306
+
307
+ module.exports = { generate, speak, speakStreaming, playAudio, listVoices, isCached, stopSpeaking };