@kernel.chat/kbot 3.62.0 → 3.64.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent.js +15 -1
- package/dist/buddy.d.ts +33 -0
- package/dist/buddy.js +468 -0
- package/dist/cli.js +75 -1
- package/dist/dream.d.ts +66 -0
- package/dist/dream.js +377 -0
- package/dist/memory-scanner.d.ts +60 -0
- package/dist/memory-scanner.js +461 -0
- package/dist/tools/buddy-tools.d.ts +2 -0
- package/dist/tools/buddy-tools.js +63 -0
- package/dist/tools/dream-tools.d.ts +2 -0
- package/dist/tools/dream-tools.js +159 -0
- package/dist/tools/index.js +4 -0
- package/dist/tools/memory-scanner-tools.d.ts +2 -0
- package/dist/tools/memory-scanner-tools.js +87 -0
- package/dist/tools/voice-input-tools.d.ts +2 -0
- package/dist/tools/voice-input-tools.js +132 -0
- package/dist/voice-input.d.ts +53 -0
- package/dist/voice-input.js +362 -0
- package/ollama-manifest.json +1 -1
- package/package.json +2 -2
|
@@ -0,0 +1,362 @@
|
|
|
1
|
+
// kbot Voice Input — Local-first speech-to-text foundation
|
|
2
|
+
//
|
|
3
|
+
// Push-to-talk flow: start recording → transcribe locally → return text
|
|
4
|
+
//
|
|
5
|
+
// Transcription backends (priority order):
|
|
6
|
+
// 1. whisper.cpp / openai-whisper CLI binary (fully local, $0)
|
|
7
|
+
// 2. Ollama with a whisper-compatible model (fully local, $0)
|
|
8
|
+
// 3. Falls back to text input if neither is available
|
|
9
|
+
//
|
|
10
|
+
// Recording backends:
|
|
11
|
+
// - macOS: `rec` (sox) — 16kHz mono WAV with silence detection
|
|
12
|
+
// - Linux: `arecord` (ALSA) — 16kHz mono WAV with fixed duration
|
|
13
|
+
//
|
|
14
|
+
// No cloud APIs. No subscriptions. BYOK philosophy.
|
|
15
|
+
//
|
|
16
|
+
// Usage:
|
|
17
|
+
// import { getVoiceInput, checkVoiceInputStatus } from './voice-input.js'
|
|
18
|
+
// const text = await getVoiceInput() // record + transcribe
|
|
19
|
+
// const status = await checkVoiceInputStatus() // check readiness
|
|
20
|
+
import { execSync, spawn } from 'node:child_process';
|
|
21
|
+
import { homedir } from 'node:os';
|
|
22
|
+
import { join } from 'node:path';
|
|
23
|
+
import { existsSync, unlinkSync, mkdirSync, statSync } from 'node:fs';
|
|
24
|
+
// ---------------------------------------------------------------------------
|
|
25
|
+
// Constants
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
const KBOT_DIR = join(homedir(), '.kbot');
|
|
28
|
+
const VOICE_TMP_DIR = join(KBOT_DIR, 'voice-tmp');
|
|
29
|
+
const OLLAMA_HOST = process.env.OLLAMA_HOST || 'http://localhost:11434';
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// Platform detection helpers
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
function commandExists(cmd) {
|
|
34
|
+
try {
|
|
35
|
+
execSync(`which ${cmd}`, { stdio: 'ignore' });
|
|
36
|
+
return true;
|
|
37
|
+
}
|
|
38
|
+
catch {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
function detectRecorder() {
|
|
43
|
+
if (commandExists('rec'))
|
|
44
|
+
return 'rec';
|
|
45
|
+
if (commandExists('arecord'))
|
|
46
|
+
return 'arecord';
|
|
47
|
+
return 'none';
|
|
48
|
+
}
|
|
49
|
+
function getWhisperCliPath() {
|
|
50
|
+
// Check common binary names in priority order
|
|
51
|
+
for (const cmd of ['whisper', 'whisper.cpp', 'whisper-cpp']) {
|
|
52
|
+
if (commandExists(cmd))
|
|
53
|
+
return cmd;
|
|
54
|
+
}
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
async function isOllamaReachable(host) {
|
|
58
|
+
try {
|
|
59
|
+
const res = await fetch(`${host}/api/tags`, {
|
|
60
|
+
signal: AbortSignal.timeout(3000),
|
|
61
|
+
});
|
|
62
|
+
return res.ok;
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
async function ollamaHasWhisperModel(host, modelName) {
|
|
69
|
+
try {
|
|
70
|
+
const res = await fetch(`${host}/api/tags`, {
|
|
71
|
+
signal: AbortSignal.timeout(3000),
|
|
72
|
+
});
|
|
73
|
+
if (!res.ok)
|
|
74
|
+
return false;
|
|
75
|
+
const data = await res.json();
|
|
76
|
+
if (!data.models)
|
|
77
|
+
return false;
|
|
78
|
+
return data.models.some(m => m.name.toLowerCase().includes(modelName.toLowerCase()));
|
|
79
|
+
}
|
|
80
|
+
catch {
|
|
81
|
+
return false;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
function detectTranscriber(whisperCli, ollamaWhisper) {
|
|
85
|
+
if (whisperCli)
|
|
86
|
+
return 'whisper-cli';
|
|
87
|
+
if (ollamaWhisper)
|
|
88
|
+
return 'ollama';
|
|
89
|
+
return 'none';
|
|
90
|
+
}
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
// Audio recording
|
|
93
|
+
// ---------------------------------------------------------------------------
|
|
94
|
+
function ensureVoiceTmpDir() {
|
|
95
|
+
if (!existsSync(VOICE_TMP_DIR)) {
|
|
96
|
+
mkdirSync(VOICE_TMP_DIR, { recursive: true });
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
function generateTmpPath() {
|
|
100
|
+
ensureVoiceTmpDir();
|
|
101
|
+
const id = `voice-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
102
|
+
return join(VOICE_TMP_DIR, `${id}.wav`);
|
|
103
|
+
}
|
|
104
|
+
function cleanupFile(path) {
|
|
105
|
+
try {
|
|
106
|
+
if (existsSync(path))
|
|
107
|
+
unlinkSync(path);
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
// best effort cleanup
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Record audio from the microphone.
|
|
115
|
+
* Returns the path to the recorded WAV file, or null if recording failed.
|
|
116
|
+
*/
|
|
117
|
+
async function recordAudio(outputPath, recorder, maxSeconds, silenceThreshold) {
|
|
118
|
+
if (recorder === 'none')
|
|
119
|
+
return false;
|
|
120
|
+
return new Promise((resolve) => {
|
|
121
|
+
let proc;
|
|
122
|
+
if (recorder === 'rec') {
|
|
123
|
+
// sox rec: 16kHz mono WAV, auto-stop on silence after speech
|
|
124
|
+
// silence 1 0.1 <threshold>% = start recording after sound above threshold
|
|
125
|
+
// silence 1 2.0 <threshold>% = stop recording after 2s silence below threshold
|
|
126
|
+
proc = spawn('rec', [
|
|
127
|
+
outputPath,
|
|
128
|
+
'rate', '16k',
|
|
129
|
+
'channels', '1',
|
|
130
|
+
'silence', '1', '0.1', `${silenceThreshold}%`,
|
|
131
|
+
'1', '2.0', `${silenceThreshold}%`,
|
|
132
|
+
'trim', '0', String(maxSeconds),
|
|
133
|
+
], {
|
|
134
|
+
stdio: ['ignore', 'ignore', 'ignore'],
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
// arecord: fixed-duration recording at 16kHz mono
|
|
139
|
+
proc = spawn('arecord', [
|
|
140
|
+
'-f', 'S16_LE',
|
|
141
|
+
'-r', '16000',
|
|
142
|
+
'-c', '1',
|
|
143
|
+
'-d', String(maxSeconds),
|
|
144
|
+
outputPath,
|
|
145
|
+
], {
|
|
146
|
+
stdio: ['ignore', 'ignore', 'ignore'],
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
// Safety timeout — kill if recording hangs
|
|
150
|
+
const timeout = setTimeout(() => {
|
|
151
|
+
proc.kill('SIGTERM');
|
|
152
|
+
}, (maxSeconds + 5) * 1000);
|
|
153
|
+
proc.on('close', () => {
|
|
154
|
+
clearTimeout(timeout);
|
|
155
|
+
// Verify the file exists and has content (not just a header)
|
|
156
|
+
if (existsSync(outputPath)) {
|
|
157
|
+
try {
|
|
158
|
+
const stat = statSync(outputPath);
|
|
159
|
+
resolve(stat.size > 44); // WAV header is 44 bytes; need actual audio data
|
|
160
|
+
}
|
|
161
|
+
catch {
|
|
162
|
+
resolve(false);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
resolve(false);
|
|
167
|
+
}
|
|
168
|
+
});
|
|
169
|
+
proc.on('error', () => {
|
|
170
|
+
clearTimeout(timeout);
|
|
171
|
+
resolve(false);
|
|
172
|
+
});
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
// ---------------------------------------------------------------------------
|
|
176
|
+
// Transcription — whisper.cpp CLI
|
|
177
|
+
// ---------------------------------------------------------------------------
|
|
178
|
+
function transcribeWithWhisperCli(audioPath, whisperCmd, model, language) {
|
|
179
|
+
// Both whisper.cpp and openai-whisper support --model and --language
|
|
180
|
+
// whisper.cpp outputs to stdout with --output_format txt
|
|
181
|
+
const output = execSync(`${whisperCmd} "${audioPath}" --model ${model} --language ${language} --output_format txt 2>/dev/null`, { encoding: 'utf-8', timeout: 120_000 }).trim();
|
|
182
|
+
return output;
|
|
183
|
+
}
|
|
184
|
+
// ---------------------------------------------------------------------------
|
|
185
|
+
// Transcription — Ollama (whisper-compatible audio model)
|
|
186
|
+
// ---------------------------------------------------------------------------
|
|
187
|
+
async function transcribeWithOllama(audioPath, host, modelName) {
|
|
188
|
+
// Ollama doesn't have a native whisper endpoint as of 2026-03.
|
|
189
|
+
// But some audio-capable models can transcribe when given a base64-encoded
|
|
190
|
+
// audio file as an "image" (multimodal input). This is the pattern used by
|
|
191
|
+
// models like whisper variants on Ollama.
|
|
192
|
+
//
|
|
193
|
+
// If the model supports the /api/generate endpoint with images, we send
|
|
194
|
+
// the audio as a base64 payload. If not, we fall back to asking the model
|
|
195
|
+
// to transcribe (text-only, which won't work for actual audio).
|
|
196
|
+
const { readFileSync } = await import('node:fs');
|
|
197
|
+
const audioBytes = readFileSync(audioPath);
|
|
198
|
+
const audioBase64 = audioBytes.toString('base64');
|
|
199
|
+
const res = await fetch(`${host}/api/generate`, {
|
|
200
|
+
method: 'POST',
|
|
201
|
+
headers: { 'Content-Type': 'application/json' },
|
|
202
|
+
body: JSON.stringify({
|
|
203
|
+
model: modelName,
|
|
204
|
+
prompt: 'Transcribe this audio to text. Return only the transcription, no commentary.',
|
|
205
|
+
images: [audioBase64],
|
|
206
|
+
stream: false,
|
|
207
|
+
}),
|
|
208
|
+
signal: AbortSignal.timeout(120_000),
|
|
209
|
+
});
|
|
210
|
+
if (!res.ok) {
|
|
211
|
+
const err = await res.text().catch(() => `HTTP ${res.status}`);
|
|
212
|
+
throw new Error(`Ollama transcription failed: ${err}`);
|
|
213
|
+
}
|
|
214
|
+
const data = await res.json();
|
|
215
|
+
return (data.response || '').trim();
|
|
216
|
+
}
|
|
217
|
+
// ---------------------------------------------------------------------------
|
|
218
|
+
// Public API
|
|
219
|
+
// ---------------------------------------------------------------------------
|
|
220
|
+
/**
|
|
221
|
+
* Check voice input system status — microphone, transcription engine, models.
|
|
222
|
+
* Call this to diagnose issues before recording.
|
|
223
|
+
*/
|
|
224
|
+
export async function checkVoiceInputStatus(options) {
|
|
225
|
+
const host = options?.ollamaHost || OLLAMA_HOST;
|
|
226
|
+
const whisperModel = options?.ollamaWhisperModel || 'whisper';
|
|
227
|
+
const recorder = detectRecorder();
|
|
228
|
+
const whisperCli = getWhisperCliPath();
|
|
229
|
+
const ollamaReachable = await isOllamaReachable(host);
|
|
230
|
+
const ollamaHasWhisper = ollamaReachable
|
|
231
|
+
? await ollamaHasWhisperModel(host, whisperModel)
|
|
232
|
+
: false;
|
|
233
|
+
const transcriber = detectTranscriber(whisperCli, ollamaHasWhisper);
|
|
234
|
+
const issues = [];
|
|
235
|
+
const suggestions = [];
|
|
236
|
+
// Check recorder
|
|
237
|
+
if (recorder === 'none') {
|
|
238
|
+
issues.push('No audio recorder found (need `rec` from sox or `arecord` from ALSA)');
|
|
239
|
+
if (process.platform === 'darwin') {
|
|
240
|
+
suggestions.push('Install sox: brew install sox');
|
|
241
|
+
}
|
|
242
|
+
else {
|
|
243
|
+
suggestions.push('Install sox: sudo apt install sox OR sudo apt install alsa-utils');
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
// Check transcriber
|
|
247
|
+
if (transcriber === 'none') {
|
|
248
|
+
issues.push('No transcription engine found');
|
|
249
|
+
suggestions.push('Install whisper.cpp: brew install whisper-cpp (macOS)');
|
|
250
|
+
suggestions.push('Or pull a whisper model in Ollama: ollama pull whisper');
|
|
251
|
+
suggestions.push('Or install openai-whisper: pip install openai-whisper');
|
|
252
|
+
}
|
|
253
|
+
// Ollama status
|
|
254
|
+
if (!ollamaReachable) {
|
|
255
|
+
suggestions.push(`Ollama not reachable at ${host}. Start it: ollama serve`);
|
|
256
|
+
}
|
|
257
|
+
else if (!ollamaHasWhisper) {
|
|
258
|
+
suggestions.push(`Ollama running but no whisper model found. Pull one: ollama pull whisper`);
|
|
259
|
+
}
|
|
260
|
+
const available = recorder !== 'none' && transcriber !== 'none';
|
|
261
|
+
return {
|
|
262
|
+
available,
|
|
263
|
+
recorder,
|
|
264
|
+
transcriber,
|
|
265
|
+
whisperCliPath: whisperCli,
|
|
266
|
+
ollamaReachable,
|
|
267
|
+
ollamaHasWhisper,
|
|
268
|
+
issues,
|
|
269
|
+
suggestions,
|
|
270
|
+
};
|
|
271
|
+
}
|
|
272
|
+
/**
|
|
273
|
+
* Record audio from the microphone and transcribe it locally.
|
|
274
|
+
* Returns the transcribed text.
|
|
275
|
+
*
|
|
276
|
+
* This is the main entry point — call this for push-to-talk.
|
|
277
|
+
*
|
|
278
|
+
* @throws Error if no recorder or transcriber is available
|
|
279
|
+
*/
|
|
280
|
+
export async function getVoiceInput(options) {
|
|
281
|
+
const model = options?.model ?? 'base';
|
|
282
|
+
const language = options?.language ?? 'en';
|
|
283
|
+
const maxRecordSeconds = options?.maxRecordSeconds ?? 15;
|
|
284
|
+
const silenceThreshold = options?.silenceThreshold ?? '1.5';
|
|
285
|
+
const ollamaHost = options?.ollamaHost ?? OLLAMA_HOST;
|
|
286
|
+
const ollamaWhisperModel = options?.ollamaWhisperModel ?? 'whisper';
|
|
287
|
+
// Detect available backends
|
|
288
|
+
const recorder = detectRecorder();
|
|
289
|
+
if (recorder === 'none') {
|
|
290
|
+
throw new Error('No audio recorder found. Install sox (brew install sox) or alsa-utils (sudo apt install alsa-utils).');
|
|
291
|
+
}
|
|
292
|
+
const whisperCli = getWhisperCliPath();
|
|
293
|
+
const ollamaReachable = await isOllamaReachable(ollamaHost);
|
|
294
|
+
const ollamaWhisper = ollamaReachable
|
|
295
|
+
? await ollamaHasWhisperModel(ollamaHost, ollamaWhisperModel)
|
|
296
|
+
: false;
|
|
297
|
+
const transcriber = detectTranscriber(whisperCli, ollamaWhisper);
|
|
298
|
+
if (transcriber === 'none') {
|
|
299
|
+
throw new Error('No transcription engine available.\n' +
|
|
300
|
+
'Install one of:\n' +
|
|
301
|
+
' - whisper.cpp: brew install whisper-cpp\n' +
|
|
302
|
+
' - openai-whisper: pip install openai-whisper\n' +
|
|
303
|
+
' - Ollama whisper: ollama pull whisper');
|
|
304
|
+
}
|
|
305
|
+
// Record
|
|
306
|
+
const audioPath = generateTmpPath();
|
|
307
|
+
const startTime = Date.now();
|
|
308
|
+
const recorded = await recordAudio(audioPath, recorder, maxRecordSeconds, silenceThreshold);
|
|
309
|
+
if (!recorded) {
|
|
310
|
+
cleanupFile(audioPath);
|
|
311
|
+
throw new Error('Recording failed — no audio captured. Check microphone permissions and that the mic is connected.');
|
|
312
|
+
}
|
|
313
|
+
// Transcribe
|
|
314
|
+
let text = '';
|
|
315
|
+
let source = transcriber;
|
|
316
|
+
try {
|
|
317
|
+
if (transcriber === 'whisper-cli' && whisperCli) {
|
|
318
|
+
text = transcribeWithWhisperCli(audioPath, whisperCli, model, language);
|
|
319
|
+
}
|
|
320
|
+
else if (transcriber === 'ollama') {
|
|
321
|
+
text = await transcribeWithOllama(audioPath, ollamaHost, ollamaWhisperModel);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
catch (err) {
|
|
325
|
+
// If primary transcriber fails, try fallback
|
|
326
|
+
if (transcriber === 'whisper-cli' && ollamaWhisper) {
|
|
327
|
+
try {
|
|
328
|
+
text = await transcribeWithOllama(audioPath, ollamaHost, ollamaWhisperModel);
|
|
329
|
+
source = 'ollama';
|
|
330
|
+
}
|
|
331
|
+
catch {
|
|
332
|
+
cleanupFile(audioPath);
|
|
333
|
+
throw new Error(`Transcription failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
else {
|
|
337
|
+
cleanupFile(audioPath);
|
|
338
|
+
throw new Error(`Transcription failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
const durationMs = Date.now() - startTime;
|
|
342
|
+
// Clean up the temp audio file
|
|
343
|
+
cleanupFile(audioPath);
|
|
344
|
+
if (!text) {
|
|
345
|
+
throw new Error('Transcription returned empty text — microphone may not have captured speech.');
|
|
346
|
+
}
|
|
347
|
+
return {
|
|
348
|
+
text: text.trim(),
|
|
349
|
+
source,
|
|
350
|
+
durationMs,
|
|
351
|
+
audioFile: null, // cleaned up
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Quick check: can voice input work right now?
|
|
356
|
+
* Returns true if both a recorder and transcriber are available.
|
|
357
|
+
*/
|
|
358
|
+
export async function isVoiceInputAvailable(options) {
|
|
359
|
+
const status = await checkVoiceInputStatus(options);
|
|
360
|
+
return status.available;
|
|
361
|
+
}
|
|
362
|
+
//# sourceMappingURL=voice-input.js.map
|
package/ollama-manifest.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "kbot",
|
|
3
3
|
"display_name": "kbot",
|
|
4
4
|
"description": "Open-source terminal AI agent — 670+ tools, 35 agents, science, finance, security, music production, and more.",
|
|
5
|
-
"version": "3.
|
|
5
|
+
"version": "3.62.0",
|
|
6
6
|
"homepage": "https://kernel.chat",
|
|
7
7
|
"repository": "https://github.com/isaacsight/kernel",
|
|
8
8
|
"license": "MIT",
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@kernel.chat/kbot",
|
|
3
|
-
"version": "3.
|
|
4
|
-
"description": "Open-source terminal AI agent.
|
|
3
|
+
"version": "3.64.0",
|
|
4
|
+
"description": "Open-source terminal AI agent. 686+ tools, 35 agents, 20 providers. Fully local, fully sovereign. MIT.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"repository": {
|
|
7
7
|
"type": "git",
|