shmakk 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +23 -0
- package/LICENSE +21 -0
- package/README.md +138 -0
- package/bin/shmakk.js +2 -0
- package/docs/index.html +581 -0
- package/docs/voice.md +181 -0
- package/package.json +58 -0
- package/scripts/patch-onnxruntime.js +82 -0
- package/src/agent.js +0 -0
- package/src/audit.js +18 -0
- package/src/cli.js +177 -0
- package/src/completions.js +167 -0
- package/src/control.js +250 -0
- package/src/correction.js +159 -0
- package/src/endpoints.js +52 -0
- package/src/global-doctor.js +33 -0
- package/src/global-setup.js +62 -0
- package/src/glossary.js +235 -0
- package/src/history-parser.js +166 -0
- package/src/hooks/bash.js +43 -0
- package/src/hooks/fish.js +25 -0
- package/src/hooks/index.js +14 -0
- package/src/hooks/zsh.js +42 -0
- package/src/index.js +166 -0
- package/src/llm.js +45 -0
- package/src/markers.js +113 -0
- package/src/orchestrator.js +61 -0
- package/src/profiles.js +19 -0
- package/src/prompt-cache.js +83 -0
- package/src/pty.js +107 -0
- package/src/review.js +75 -0
- package/src/safety.js +77 -0
- package/src/services/stt.js +131 -0
- package/src/services/tts.js +307 -0
- package/src/services/voice.js +362 -0
- package/src/session.js +604 -0
- package/src/setup-voice.js +108 -0
- package/src/shell.js +32 -0
- package/src/skills.js +309 -0
- package/src/subagent.js +42 -0
- package/src/system-prompt.js +261 -0
- package/src/tools.js +386 -0
- package/src/web.js +228 -0
- package/src/workspace-index.js +213 -0
package/src/safety.js
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
const path = require('path');
|
|
2
|
+
const os = require('os');
|
|
3
|
+
|
|
4
|
+
// run-tool / shell-command danger patterns. Matched against the literal cmd
|
|
5
|
+
// string the agent passes to `run`. If matched → 'unsafe' (always prompts).
|
|
6
|
+
const DANGER_RE = new RegExp([
|
|
7
|
+
// privilege escalation
|
|
8
|
+
'\\bsudo\\b', '\\bsu\\s', '\\bdoas\\b', '\\bpkexec\\b',
|
|
9
|
+
// any deletion (recursive or not — user wants confirmation on every delete)
|
|
10
|
+
'\\brm\\b(?!\\s+--?(version|help))', '\\brmdir\\b', '\\bunlink\\b',
|
|
11
|
+
'\\btrash(-put)?\\b', '\\bgio\\s+trash\\b',
|
|
12
|
+
'\\bchmod\\s+-R\\b', '\\bchown\\s+-R\\b',
|
|
13
|
+
'\\bfind\\b[^\\n]*-delete\\b',
|
|
14
|
+
// disk / fs
|
|
15
|
+
'\\bmkfs\\b', '\\bdd\\s+if=', '\\bshred\\b', '\\bwipe\\b',
|
|
16
|
+
// pipe-to-shell
|
|
17
|
+
'\\|\\s*(sh|bash|zsh|fish)\\b',
|
|
18
|
+
// redirect into system paths
|
|
19
|
+
'>\\s*/(?!tmp|home|var/tmp|dev/null)',
|
|
20
|
+
// personal config / display state mutation (lessons learned)
|
|
21
|
+
'\\bsetxkbmap\\b', '\\blocalectl\\s+set\\b', '\\bgsettings\\s+set\\b',
|
|
22
|
+
'\\bxset\\b', '\\bxrandr\\b', '\\bchsh\\b', '\\bcrontab\\s+-r\\b',
|
|
23
|
+
'\\bsystemctl\\b(?!\\s+(status|cat|show|is-))', // status-y reads ok
|
|
24
|
+
'\\bgit\\s+config\\s+--global\\b',
|
|
25
|
+
// package manager: global / system installs run arbitrary code
|
|
26
|
+
'\\bnpm\\s+(i|install|add)\\s+(-g|--global)\\b',
|
|
27
|
+
'\\bpip\\d?\\s+install\\b', '\\bpipx\\s+install\\b',
|
|
28
|
+
'\\bcargo\\s+install\\b', '\\bgo\\s+install\\b', '\\bgem\\s+install\\b',
|
|
29
|
+
'\\bbrew\\s+install\\b',
|
|
30
|
+
'\\bapt(-get)?\\s+(install|remove|purge|upgrade|dist-upgrade)\\b',
|
|
31
|
+
'\\bdnf\\s+(install|remove|upgrade)\\b',
|
|
32
|
+
'\\bpacman\\s+-[A-Z]*[SR]\\b', '\\byay\\s+-S\\b', '\\bparu\\s+-S\\b',
|
|
33
|
+
'\\bzypper\\s+(in|install|rm|remove)\\b',
|
|
34
|
+
].join('|'), 'i');
|
|
35
|
+
|
|
36
|
+
function classifyRunCommand(cmd) {
|
|
37
|
+
if (!cmd) return 'safe';
|
|
38
|
+
return DANGER_RE.test(cmd) ? 'unsafe' : 'safe';
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Paths whose read OR write should always prompt (even in auto mode).
|
|
42
|
+
// Match against the *resolved relative path* we present to the agent, plus
|
|
43
|
+
// the absolute path so e.g. ~/.ssh works when workspace is ~.
|
|
44
|
+
const SECRET_RE = [
|
|
45
|
+
/(^|\/)\.env(\..*)?$/,
|
|
46
|
+
/(^|\/)\.netrc$/,
|
|
47
|
+
/(^|\/)\.npmrc$/,
|
|
48
|
+
/(^|\/)\.pypirc$/,
|
|
49
|
+
/(^|\/)\.gem\/credentials$/,
|
|
50
|
+
/(^|\/)\.ssh(\/|$)/,
|
|
51
|
+
/(^|\/)\.gnupg(\/|$)/,
|
|
52
|
+
/(^|\/)\.aws(\/|$)/,
|
|
53
|
+
/(^|\/)\.kube\/config(\/|$)/,
|
|
54
|
+
/(^|\/)\.docker\/config\.json$/,
|
|
55
|
+
/(^|\/)\.config\/gh(\/|$)/,
|
|
56
|
+
/\.pem$/i,
|
|
57
|
+
/\.key$/i,
|
|
58
|
+
/(^|\/)id_(rsa|ed25519|dsa|ecdsa)(\.pub)?$/,
|
|
59
|
+
/(^|\/)credentials(\.json|\.yaml|\.yml)?$/i,
|
|
60
|
+
];
|
|
61
|
+
|
|
62
|
+
function isSecretPath(p) {
|
|
63
|
+
if (!p) return false;
|
|
64
|
+
return SECRET_RE.some((r) => r.test(p));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function workspaceWarning(workspace) {
|
|
68
|
+
if (!workspace) return null;
|
|
69
|
+
const r = path.resolve(workspace);
|
|
70
|
+
const home = os.homedir();
|
|
71
|
+
if (r === '/' || r === '/etc' || r === home) {
|
|
72
|
+
return `workspace is ${r} — that's broad. Consider \`shmakk --workspace <project-dir>\` to keep AI scope smaller.`;
|
|
73
|
+
}
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
module.exports = { classifyRunCommand, isSecretPath, workspaceWarning, DANGER_RE };
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
// Speech-to-text via Whisper ONNX using @huggingface/transformers.
|
|
2
|
+
// No Python, no server — 100% JS, runs locally in-process.
|
|
3
|
+
// Model auto-downloads on first use (~45MB quantized whisper-tiny).
|
|
4
|
+
|
|
5
|
+
const path = require('path');
|
|
6
|
+
const fs = require('fs');
|
|
7
|
+
const { WaveFile } = require('wavefile');
|
|
8
|
+
|
|
9
|
+
let _pipeline = null;
|
|
10
|
+
let _instance = null;
|
|
11
|
+
let _loadPromise = null;
|
|
12
|
+
let _env = null;
|
|
13
|
+
|
|
14
|
+
async function _ensureModel() {
|
|
15
|
+
if (_instance) return _instance;
|
|
16
|
+
|
|
17
|
+
// Prevent concurrent loads
|
|
18
|
+
if (_loadPromise) return _loadPromise;
|
|
19
|
+
|
|
20
|
+
_loadPromise = (async () => {
|
|
21
|
+
let mod;
|
|
22
|
+
try {
|
|
23
|
+
mod = await import('@huggingface/transformers');
|
|
24
|
+
} catch {
|
|
25
|
+
throw new Error(
|
|
26
|
+
'Voice deps not installed. Run: npm run setup:voice\n' +
|
|
27
|
+
'Or: npm install --include=optional'
|
|
28
|
+
);
|
|
29
|
+
}
|
|
30
|
+
_pipeline = mod.pipeline;
|
|
31
|
+
_env = mod.env;
|
|
32
|
+
|
|
33
|
+
// Allow cache dir override
|
|
34
|
+
if (process.env.SHMAKK_HF_CACHE) {
|
|
35
|
+
_env.cacheDir = process.env.SHMAKK_HF_CACHE;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Don't spam progress to stdout
|
|
39
|
+
_env.allowLocalModels = false;
|
|
40
|
+
|
|
41
|
+
_instance = await _pipeline(
|
|
42
|
+
'automatic-speech-recognition',
|
|
43
|
+
'Xenova/whisper-base',
|
|
44
|
+
);
|
|
45
|
+
return _instance;
|
|
46
|
+
})();
|
|
47
|
+
|
|
48
|
+
return _loadPromise;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Decode a WAV file to a mono 16kHz Float32Array, the format
|
|
53
|
+
* required by the transformers.js Whisper pipeline in Node.js.
|
|
54
|
+
*/
|
|
55
|
+
function _wavToFloat32(audioPath) {
|
|
56
|
+
const buffer = fs.readFileSync(audioPath);
|
|
57
|
+
const wav = new WaveFile(buffer);
|
|
58
|
+
|
|
59
|
+
wav.toBitDepth('32f');
|
|
60
|
+
wav.toSampleRate(16000);
|
|
61
|
+
|
|
62
|
+
let audioData = wav.getSamples();
|
|
63
|
+
|
|
64
|
+
// If multi-channel, merge to mono
|
|
65
|
+
if (Array.isArray(audioData)) {
|
|
66
|
+
if (audioData.length > 1) {
|
|
67
|
+
const SCALING_FACTOR = Math.sqrt(2);
|
|
68
|
+
for (let i = 0; i < audioData[0].length; ++i) {
|
|
69
|
+
audioData[0][i] =
|
|
70
|
+
(SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
audioData = audioData[0];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Ensure Float32Array (getSamples may return a regular Array)
|
|
77
|
+
if (!(audioData instanceof Float32Array)) {
|
|
78
|
+
audioData = new Float32Array(audioData);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return audioData;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Transcribe a WAV file to text.
|
|
86
|
+
* @param {string} audioPath - path to WAV audio file
|
|
87
|
+
* @param {object} [opts]
|
|
88
|
+
* @param {string} [opts.language] - ISO language code hint (e.g. "en")
|
|
89
|
+
* @returns {Promise<string>} transcribed text
|
|
90
|
+
*/
|
|
91
|
+
async function transcribe(audioPath, opts = {}) {
|
|
92
|
+
if (!fs.existsSync(audioPath)) {
|
|
93
|
+
throw new Error(`Audio file not found: ${audioPath}`);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const transcriber = await _ensureModel();
|
|
97
|
+
|
|
98
|
+
// Decode WAV to Float32Array — pipeline can't use AudioContext in Node.js.
|
|
99
|
+
// See https://huggingface.co/docs/transformers.js/guides/node-audio-processing
|
|
100
|
+
const audioData = _wavToFloat32(audioPath);
|
|
101
|
+
|
|
102
|
+
const kwargs = { language: opts.language || 'english' };
|
|
103
|
+
|
|
104
|
+
const result = await transcriber(audioData, kwargs);
|
|
105
|
+
return (result.text || '').trim();
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Check if the STT model files are already cached locally.
|
|
110
|
+
* Used for pre-flight warnings.
|
|
111
|
+
*/
|
|
112
|
+
function isCached() {
|
|
113
|
+
try {
|
|
114
|
+
// @huggingface/transformers caches in ~/.cache/huggingface by default
|
|
115
|
+
const hfHome = process.env.HF_HOME
|
|
116
|
+
|| process.env.XDG_CACHE_HOME
|
|
117
|
+
|| path.join(require('os').homedir(), '.cache', 'huggingface');
|
|
118
|
+
|
|
119
|
+
// Whisper-tiny has at minimum the config and quantized ONNX models
|
|
120
|
+
const modelDir = path.join(
|
|
121
|
+
hfHome,
|
|
122
|
+
'transformers',
|
|
123
|
+
'models--Xenova--whisper-base',
|
|
124
|
+
);
|
|
125
|
+
return fs.existsSync(modelDir);
|
|
126
|
+
} catch {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
module.exports = { transcribe, isCached, _ensureModel };
|
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
// Text-to-speech via Kokoro ONNX using kokoro-js.
|
|
2
|
+
// No Python, no server — 100% JS, runs locally in-process.
|
|
3
|
+
// Model auto-downloads on first use (~334MB quantized Kokoro-82M).
|
|
4
|
+
|
|
5
|
+
const path = require('path');
|
|
6
|
+
const fs = require('fs');
|
|
7
|
+
const os = require('os');
|
|
8
|
+
const { spawn, spawnSync } = require('child_process');
|
|
9
|
+
const { WaveFile } = require('wavefile');
|
|
10
|
+
|
|
11
|
+
let _tts = null;
|
|
12
|
+
let _loadPromise = null;
|
|
13
|
+
// Track current speak operation for interrupt support
|
|
14
|
+
let _currentSpeakCtrl = null;
|
|
15
|
+
let _currentSpeakAborted = false;
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Stop any active TTS playback immediately.
|
|
19
|
+
* Kills the audio player process and cancels sentence streaming.
|
|
20
|
+
*/
|
|
21
|
+
function stopSpeaking() {
|
|
22
|
+
_currentSpeakAborted = true;
|
|
23
|
+
if (_currentSpeakCtrl) {
|
|
24
|
+
try { _currentSpeakCtrl.abort(); } catch {}
|
|
25
|
+
_currentSpeakCtrl = null;
|
|
26
|
+
}
|
|
27
|
+
// Also kill the raw audio player process
|
|
28
|
+
try { require('./voice')._setTtsProc(null); } catch {}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
async function _ensureModel() {
|
|
32
|
+
if (_tts) return _tts;
|
|
33
|
+
|
|
34
|
+
if (_loadPromise) return _loadPromise;
|
|
35
|
+
|
|
36
|
+
_loadPromise = (async () => {
|
|
37
|
+
let KokoroTTS;
|
|
38
|
+
try {
|
|
39
|
+
({ KokoroTTS } = require('kokoro-js'));
|
|
40
|
+
} catch {
|
|
41
|
+
throw new Error(
|
|
42
|
+
'Voice deps not installed. Run: npm run setup:voice\n' +
|
|
43
|
+
'Or: npm install --include=optional'
|
|
44
|
+
);
|
|
45
|
+
}
|
|
46
|
+
_tts = await KokoroTTS.from_pretrained(
|
|
47
|
+
'onnx-community/Kokoro-82M-v1.0-ONNX',
|
|
48
|
+
{ dtype: process.env.SHMAKK_TTS_DTYPE || 'fp16' },
|
|
49
|
+
);
|
|
50
|
+
return _tts;
|
|
51
|
+
})();
|
|
52
|
+
|
|
53
|
+
return _loadPromise;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Pick a voice deterministically based on current time.
|
|
58
|
+
* Changes every 2-5 hours (varied per day) so it feels random but is consistent
|
|
59
|
+
* within a session. No state needed — pure function of wall-clock time.
|
|
60
|
+
*/
|
|
61
|
+
function _scheduleVoice(voices) {
|
|
62
|
+
const now = new Date();
|
|
63
|
+
const day = now.getFullYear() * 10000 + (now.getMonth() + 1) * 100 + now.getDate();
|
|
64
|
+
|
|
65
|
+
// Use day as seed to determine bucket sizes for this day (2-5h each)
|
|
66
|
+
// Simple LCG-style hash
|
|
67
|
+
const daySeed = (day * 2654435761) >>> 0;
|
|
68
|
+
|
|
69
|
+
// Build time buckets for the day using the day seed
|
|
70
|
+
let bucketStart = 0;
|
|
71
|
+
let bucket = 0;
|
|
72
|
+
let bucketSeed = daySeed;
|
|
73
|
+
const minuteOfDay = now.getHours() * 60 + now.getMinutes();
|
|
74
|
+
|
|
75
|
+
while (bucketStart < minuteOfDay) {
|
|
76
|
+
bucketSeed = (bucketSeed * 1664525 + 1013904223) >>> 0;
|
|
77
|
+
const bucketMinutes = 120 + (bucketSeed % 180); // 2h to 5h in minutes
|
|
78
|
+
if (bucketStart + bucketMinutes > minuteOfDay) break;
|
|
79
|
+
bucketStart += bucketMinutes;
|
|
80
|
+
bucket++;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Pick voice from bucket + day seed
|
|
84
|
+
const voiceSeed = (daySeed ^ (bucket * 2246822519)) >>> 0;
|
|
85
|
+
return voices[voiceSeed % voices.length].id;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* List available Kokoro voices.
|
|
90
|
+
* @returns {Promise<Array<{id: string, name: string, language: string, gender: string}>>}
|
|
91
|
+
*/
|
|
92
|
+
async function listVoices() {
|
|
93
|
+
const tts = await _ensureModel();
|
|
94
|
+
const voices = [];
|
|
95
|
+
for (const [id, meta] of Object.entries(tts.voices)) {
|
|
96
|
+
voices.push({
|
|
97
|
+
id,
|
|
98
|
+
name: meta.name || id,
|
|
99
|
+
language: meta.language || 'unknown',
|
|
100
|
+
gender: (meta.gender || '').toLowerCase(),
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
return voices;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Write a Float32Array as a WAV file.
|
|
108
|
+
*/
|
|
109
|
+
function _writeWav(floatData, sampleRate, outputPath) {
|
|
110
|
+
const wav = new WaveFile();
|
|
111
|
+
wav.fromScratch(1, sampleRate, '32f', floatData);
|
|
112
|
+
fs.writeFileSync(outputPath, wav.toBuffer());
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Generate speech from text.
|
|
117
|
+
*
|
|
118
|
+
* @param {string} text - text to speak
|
|
119
|
+
* @param {object} [opts]
|
|
120
|
+
* @param {string} [opts.voice] - voice name (default: "af_heart")
|
|
121
|
+
* @param {string} [opts.outputPath] - WAV output path (default: temp file)
|
|
122
|
+
* @returns {Promise<{audioPath: string, voice: string}>}
|
|
123
|
+
*/
|
|
124
|
+
async function generate(text, opts = {}) {
|
|
125
|
+
if (!text || !text.trim()) {
|
|
126
|
+
throw new Error('Empty text for TTS');
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const voice = opts.voice || process.env.SHMAKK_TTS_VOICE || 'af_heart';
|
|
130
|
+
const tts = await _ensureModel();
|
|
131
|
+
|
|
132
|
+
// Validate voice
|
|
133
|
+
if (!tts.voices[voice]) {
|
|
134
|
+
// Try to find a matching voice (case-insensitive)
|
|
135
|
+
const lower = voice.toLowerCase();
|
|
136
|
+
const match = Object.keys(tts.voices).find(
|
|
137
|
+
(v) => v.toLowerCase() === lower,
|
|
138
|
+
);
|
|
139
|
+
if (match) {
|
|
140
|
+
opts.voice = match;
|
|
141
|
+
} else {
|
|
142
|
+
const available = Object.keys(tts.voices).slice(0, 8).join(', ');
|
|
143
|
+
throw new Error(
|
|
144
|
+
`Unknown voice: ${voice}. Available: ${available}...`,
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const speed = parseFloat(opts.speed || process.env.SHMAKK_TTS_SPEED || '1.5');
|
|
150
|
+
const result = await tts.generate(text, { voice: opts.voice || voice, speed });
|
|
151
|
+
|
|
152
|
+
// result is a RawAudio with .audio (Float32Array) and .sampling_rate
|
|
153
|
+
const audioData = result.audio;
|
|
154
|
+
const sampleRate = result.sampling_rate || 24000;
|
|
155
|
+
|
|
156
|
+
const outPath =
|
|
157
|
+
opts.outputPath ||
|
|
158
|
+
path.join(os.tmpdir(), `shmakk-tts-${Date.now()}.wav`);
|
|
159
|
+
|
|
160
|
+
_writeWav(audioData, sampleRate, outPath);
|
|
161
|
+
return { audioPath: outPath, voice: opts.voice || voice };
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
/**
|
|
165
|
+
* Check whether a system audio player is available.
|
|
166
|
+
*/
|
|
167
|
+
function playerAvailable(name) {
|
|
168
|
+
try {
|
|
169
|
+
const r = spawnSync('which', [name], {
|
|
170
|
+
stdio: ['ignore', 'pipe', 'ignore'],
|
|
171
|
+
timeout: 2000,
|
|
172
|
+
});
|
|
173
|
+
return r.status === 0 && r.stdout && r.stdout.length > 0;
|
|
174
|
+
} catch {
|
|
175
|
+
return false;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Play a WAV file through system audio.
|
|
181
|
+
* Detects aplay (ALSA), paplay (PulseAudio), afplay (macOS).
|
|
182
|
+
* Returns true if a player was found and launched.
|
|
183
|
+
*/
|
|
184
|
+
function playAudio(audioPath) {
|
|
185
|
+
if (!fs.existsSync(audioPath)) {
|
|
186
|
+
throw new Error(`Audio file not found: ${audioPath}`);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
let cmd, args;
|
|
190
|
+
|
|
191
|
+
// macOS
|
|
192
|
+
if (playerAvailable('afplay')) {
|
|
193
|
+
cmd = 'afplay';
|
|
194
|
+
args = [audioPath];
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// Linux: PulseAudio
|
|
198
|
+
if (!cmd && playerAvailable('paplay')) {
|
|
199
|
+
cmd = 'paplay';
|
|
200
|
+
args = [audioPath];
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Linux: ALSA
|
|
204
|
+
if (!cmd && playerAvailable('aplay')) {
|
|
205
|
+
cmd = 'aplay';
|
|
206
|
+
args = ['-q', audioPath];
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
if (!cmd) {
|
|
210
|
+
return Promise.resolve(false);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return new Promise((resolve) => {
|
|
214
|
+
const proc = spawn(cmd, args, {
|
|
215
|
+
stdio: 'ignore',
|
|
216
|
+
});
|
|
217
|
+
// Track in voice.js for Ctrl+C interrupt kill
|
|
218
|
+
try { require('./voice')._setTtsProc(proc); } catch {}
|
|
219
|
+
proc.on('exit', () => {
|
|
220
|
+
try { require('./voice')._setTtsProc(null); } catch {}
|
|
221
|
+
resolve(true);
|
|
222
|
+
});
|
|
223
|
+
proc.on('error', () => {
|
|
224
|
+
try { require('./voice')._setTtsProc(null); } catch {}
|
|
225
|
+
resolve(false);
|
|
226
|
+
});
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
/**
|
|
231
|
+
* Split text into sentences for streaming TTS.
|
|
232
|
+
*/
|
|
233
|
+
function splitSentences(text) {
|
|
234
|
+
return text
|
|
235
|
+
.split(/(?<=[.!?])\s+|\n+/)
|
|
236
|
+
.map(s => s.trim())
|
|
237
|
+
.filter(s => s.length > 0);
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Speak text sentence by sentence — first sentence starts playing
|
|
242
|
+
* immediately while the rest are generated, cutting perceived latency.
|
|
243
|
+
* Returns a promise that resolves when all audio is queued.
|
|
244
|
+
*/
|
|
245
|
+
async function speakStreaming(text, opts = {}) {
|
|
246
|
+
// Cancel any previous speak operation still in progress
|
|
247
|
+
stopSpeaking();
|
|
248
|
+
_currentSpeakAborted = false;
|
|
249
|
+
_currentSpeakCtrl = new AbortController();
|
|
250
|
+
const signal = _currentSpeakCtrl.signal;
|
|
251
|
+
|
|
252
|
+
const tts = await _ensureModel();
|
|
253
|
+
const voices = Object.keys(tts.voices);
|
|
254
|
+
// Use scheduled voice unless explicitly overridden via env or opts
|
|
255
|
+
const voice = opts.voice
|
|
256
|
+
|| process.env.SHMAKK_TTS_VOICE
|
|
257
|
+
|| _scheduleVoice(voices.map(id => ({ id })));
|
|
258
|
+
const sentences = splitSentences(text);
|
|
259
|
+
if (!sentences.length) return;
|
|
260
|
+
|
|
261
|
+
// Generate and play each sentence sequentially but start playing
|
|
262
|
+
// the first one as soon as it's ready without waiting for the rest.
|
|
263
|
+
for (const sentence of sentences) {
|
|
264
|
+
// Check if interrupted before each sentence
|
|
265
|
+
if (signal.aborted || _currentSpeakAborted) break;
|
|
266
|
+
const { _isTtsKilled } = require('./voice');
|
|
267
|
+
if (_isTtsKilled && _isTtsKilled()) break;
|
|
268
|
+
try {
|
|
269
|
+
const { audioPath } = await generate(sentence, { voice });
|
|
270
|
+
if (signal.aborted || _currentSpeakAborted) {
|
|
271
|
+
try { fs.unlinkSync(audioPath); } catch {}
|
|
272
|
+
break;
|
|
273
|
+
}
|
|
274
|
+
await playAudio(audioPath);
|
|
275
|
+
// Clean up after a delay
|
|
276
|
+
setTimeout(() => { try { fs.unlinkSync(audioPath); } catch {} }, 10000);
|
|
277
|
+
} catch {}
|
|
278
|
+
}
|
|
279
|
+
_currentSpeakCtrl = null;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
async function speak(text, opts = {}) {
|
|
283
|
+
return speakStreaming(text, opts);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Check whether model files are cached locally.
|
|
288
|
+
*/
|
|
289
|
+
function isCached() {
|
|
290
|
+
try {
|
|
291
|
+
const hfHome =
|
|
292
|
+
process.env.HF_HOME ||
|
|
293
|
+
process.env.XDG_CACHE_HOME ||
|
|
294
|
+
path.join(os.homedir(), '.cache', 'huggingface');
|
|
295
|
+
|
|
296
|
+
const modelDir = path.join(
|
|
297
|
+
hfHome,
|
|
298
|
+
'transformers',
|
|
299
|
+
'models--onnx-community--Kokoro-82M-v1.0-ONNX',
|
|
300
|
+
);
|
|
301
|
+
return fs.existsSync(modelDir);
|
|
302
|
+
} catch {
|
|
303
|
+
return false;
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
module.exports = { generate, speak, speakStreaming, playAudio, listVoices, isCached, stopSpeaking };
|