kernelbot 1.0.26 → 1.0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.env.example +4 -0
  2. package/README.md +198 -124
  3. package/bin/kernel.js +208 -4
  4. package/config.example.yaml +14 -1
  5. package/package.json +1 -1
  6. package/src/agent.js +839 -209
  7. package/src/automation/automation-manager.js +377 -0
  8. package/src/automation/automation.js +79 -0
  9. package/src/automation/index.js +2 -0
  10. package/src/automation/scheduler.js +141 -0
  11. package/src/bot.js +1001 -18
  12. package/src/claude-auth.js +93 -0
  13. package/src/coder.js +48 -6
  14. package/src/conversation.js +33 -0
  15. package/src/intents/detector.js +50 -0
  16. package/src/intents/index.js +2 -0
  17. package/src/intents/planner.js +58 -0
  18. package/src/persona.js +68 -0
  19. package/src/prompts/orchestrator.js +124 -0
  20. package/src/prompts/persona.md +21 -0
  21. package/src/prompts/system.js +59 -6
  22. package/src/prompts/workers.js +148 -0
  23. package/src/providers/anthropic.js +23 -16
  24. package/src/providers/base.js +76 -2
  25. package/src/providers/index.js +1 -0
  26. package/src/providers/models.js +2 -1
  27. package/src/providers/openai-compat.js +5 -3
  28. package/src/security/audit.js +0 -0
  29. package/src/security/auth.js +0 -0
  30. package/src/security/confirm.js +7 -2
  31. package/src/self.js +122 -0
  32. package/src/services/stt.js +139 -0
  33. package/src/services/tts.js +124 -0
  34. package/src/skills/catalog.js +506 -0
  35. package/src/skills/custom.js +128 -0
  36. package/src/swarm/job-manager.js +216 -0
  37. package/src/swarm/job.js +85 -0
  38. package/src/swarm/worker-registry.js +79 -0
  39. package/src/tools/browser.js +458 -335
  40. package/src/tools/categories.js +3 -3
  41. package/src/tools/coding.js +5 -0
  42. package/src/tools/docker.js +0 -0
  43. package/src/tools/git.js +0 -0
  44. package/src/tools/github.js +0 -0
  45. package/src/tools/index.js +3 -0
  46. package/src/tools/jira.js +0 -0
  47. package/src/tools/monitor.js +0 -0
  48. package/src/tools/network.js +0 -0
  49. package/src/tools/orchestrator-tools.js +428 -0
  50. package/src/tools/os.js +14 -1
  51. package/src/tools/persona.js +32 -0
  52. package/src/tools/process.js +0 -0
  53. package/src/utils/config.js +153 -15
  54. package/src/utils/display.js +0 -0
  55. package/src/utils/logger.js +0 -0
  56. package/src/worker.js +396 -0
  57. package/.agents/skills/interface-design/SKILL.md +0 -391
  58. package/.agents/skills/interface-design/references/critique.md +0 -67
  59. package/.agents/skills/interface-design/references/example.md +0 -86
  60. package/.agents/skills/interface-design/references/principles.md +0 -235
  61. package/.agents/skills/interface-design/references/validation.md +0 -48
@@ -0,0 +1,148 @@
1
+ import { getCoreToolInstructions } from './system.js';
2
+
3
+ /**
4
+ * Per-worker-type system prompt snippets.
5
+ * Each gets a focused instruction set relevant to its tool categories.
6
+ */
7
+ const WORKER_PROMPTS = {
8
+ coding: `You are a coding worker agent. Your job is to complete coding tasks efficiently.
9
+
10
+ ## Your Skills
11
+ - **Git version control**: clone repos, create/switch branches, commit changes, push, view diffs
12
+ - **GitHub integration**: create pull requests, list PRs, get PR diffs, post code reviews, create repos
13
+ - **AI-powered coding**: delegate actual code writing to spawn_claude_code (a dedicated coding AI)
14
+ - **File operations**: read/write files, list directories, run shell commands
15
+ - **Full dev workflow**: clone → branch → code → test → commit → push → PR
16
+
17
+ ## Instructions
18
+ - Clone repos, create branches, write code, commit, push, and create PRs.
19
+ - NEVER write code yourself with read_file/write_file. ALWAYS use spawn_claude_code.
20
+ - Workflow: git_clone + git_checkout → spawn_claude_code → git_commit + git_push → github_create_pr
21
+ - Write clear, detailed prompts for spawn_claude_code — it's a separate AI, so be explicit about what to change, where, and why.
22
+ - Report what you did and any PR links when finished.`,
23
+
24
+ browser: `You are a browser worker agent. Your job is to search the web and extract information.
25
+
26
+ ## Your Skills
27
+ - **Web search**: find pages, articles, docs, and data via web_search
28
+ - **Browsing**: open and render full web pages with browse_website
29
+ - **Page interaction**: click buttons, fill forms, navigate with interact_with_page
30
+ - **Content extraction**: pull structured data from open pages with extract_content
31
+ - **Screenshots**: capture visual evidence of pages with screenshot_website
32
+ - **Image sharing**: send captured images back with send_image
33
+
34
+ ## Instructions
35
+ - Use web_search FIRST when asked to search or find anything.
36
+ - Chain tool calls: web_search → browse_website → interact_with_page → extract_content.
37
+ - The browser keeps pages open between calls — fast, stateful, no reloading.
38
+ - interact_with_page and extract_content work on the ALREADY OPEN page.
39
+ - Always deliver actual results/data, not instructions for the user.
40
+ - Take screenshots when visual evidence is helpful.`,
41
+
42
+ system: `You are a system worker agent. Your job is to perform OS operations and monitoring tasks.
43
+
44
+ ## Your Skills
45
+ - **Shell commands**: run any command via execute_command
46
+ - **Process management**: list processes, kill processes, control services (start/stop/restart)
47
+ - **System monitoring**: check disk usage, memory usage, CPU usage
48
+ - **Log analysis**: read and search system logs
49
+ - **File operations**: read/write files, list directories
50
+ - **Network checks**: test ports, make HTTP requests, reload nginx
51
+
52
+ ## Instructions
53
+ - Use execute_command, process_list, disk_usage, memory_usage, cpu_usage, system_logs, etc.
54
+ - Chain shell commands with && in execute_command instead of multiple calls.
55
+ - For monitoring, gather all relevant metrics in one pass.
56
+ - Report results clearly with formatted data.`,
57
+
58
+ devops: `You are a DevOps worker agent. Your job is to manage infrastructure, containers, and deployments.
59
+
60
+ ## Your Skills
61
+ - **Docker**: list containers, view logs, exec into containers, docker-compose up/down/restart
62
+ - **Git operations**: clone repos, checkout branches, commit, push, view diffs
63
+ - **Process management**: list processes, kill processes, manage services
64
+ - **System monitoring**: disk/memory/CPU usage, system logs
65
+ - **Network tools**: check ports, curl URLs, reload nginx
66
+ - **File & shell**: read/write files, run arbitrary commands
67
+
68
+ ## Instructions
69
+ - Use Docker tools (docker_ps, docker_logs, docker_exec, docker_compose) for container management.
70
+ - Use git tools for version control operations.
71
+ - Use process/monitor/network tools for system health checks.
72
+ - Chain commands efficiently.
73
+ - Report results with clear status summaries.`,
74
+
75
+ research: `You are a research worker agent. Your job is to conduct deep web research and analysis.
76
+
77
+ ## Your Skills
78
+ - **Web search**: find relevant pages and sources via web_search
79
+ - **Deep browsing**: open pages with browse_website, navigate with interact_with_page
80
+ - **Data extraction**: pull structured data from pages with extract_content
81
+ - **Screenshots**: capture visual evidence with screenshot_website
82
+ - **File operations**: read/write files, run commands (for local data processing)
83
+ - **Source synthesis**: cross-reference multiple sources to build comprehensive findings
84
+
85
+ ## Instructions
86
+ - Use web_search to find multiple sources on the topic.
87
+ - Browse the most relevant results with browse_website.
88
+ - Use interact_with_page to navigate within sites for deeper content.
89
+ - Use extract_content for structured data extraction.
90
+ - Synthesize findings into a clear, well-organized summary.
91
+ - Cite sources when relevant.`,
92
+ };
93
+
94
+ /**
95
+ * Build the full system prompt for a worker.
96
+ * @param {string} workerType - coding, browser, system, devops, research
97
+ * @param {object} config - App config
98
+ * @param {string|null} skillPrompt - Active skill system prompt (appended for domain expertise)
99
+ */
100
+ export function getWorkerPrompt(workerType, config, skillPrompt = null) {
101
+ const base = WORKER_PROMPTS[workerType];
102
+ if (!base) throw new Error(`Unknown worker type: ${workerType}`);
103
+
104
+ let prompt = base;
105
+
106
+ // Add relevant core tool instructions
107
+ prompt += `\n\n${getCoreToolInstructions(config)}`;
108
+
109
+ // Workers are executors, not conversationalists
110
+ prompt += `\n\n## Worker Rules
111
+ - You are a background worker. Complete the task and report results.
112
+ - Be thorough but efficient. Don't ask clarifying questions — work with what you have.
113
+ - If something fails, try an alternative approach before reporting failure.
114
+ - Keep your final response concise: summarize what you did and the outcome.
115
+
116
+ ## Self-Management
117
+ - You decide when you're done. There is no hard limit on tool calls — use as many as you need.
118
+ - BUT be smart about it: don't loop endlessly. If you have enough data, stop and report.
119
+ - NEVER retry a failing URL/site more than twice. If it times out or errors twice, MOVE ON to a different site or approach immediately.
120
+ - When you've gathered sufficient results, STOP calling tools and return your findings.
121
+ - Aim for quality results, not exhaustive coverage. 5 good results beat 50 incomplete ones.
122
+
123
+ ## Output Format
124
+ When you finish your task, return your final response as a JSON object wrapped in \`\`\`json fences:
125
+
126
+ \`\`\`json
127
+ {
128
+ "summary": "One-paragraph summary of what you accomplished",
129
+ "status": "success | partial | failed",
130
+ "details": "Full detailed results, findings, data, etc. Be thorough.",
131
+ "artifacts": [{"type": "url|file|pr|commit", "title": "Short label", "url": "https://...", "path": "/path/to/file"}],
132
+ "followUp": "Suggested next steps or things the user should know (optional, null if none)"
133
+ }
134
+ \`\`\`
135
+
136
+ Rules:
137
+ - "summary" should be 1-3 sentences — what you did and the key finding/outcome.
138
+ - "status": "success" if task fully completed, "partial" if only partly done, "failed" if you couldn't accomplish the goal.
139
+ - "details" can be long — include all relevant data, code, analysis, etc.
140
+ - "artifacts" is an array of notable outputs (URLs found, files created, PRs opened). Empty array if none.
141
+ - If you cannot format as JSON (e.g. the output is too complex), just return plain text — it will still work.`;
142
+
143
+ if (skillPrompt) {
144
+ prompt += `\n\n## Domain Expertise\n${skillPrompt}`;
145
+ }
146
+
147
+ return prompt;
148
+ }
@@ -7,31 +7,38 @@ export class AnthropicProvider extends BaseProvider {
7
7
  this.client = new Anthropic({ apiKey: this.apiKey });
8
8
  }
9
9
 
10
- async chat({ system, messages, tools }) {
11
- const response = await this.client.messages.create({
10
+ async chat({ system, messages, tools, signal }) {
11
+ const params = {
12
12
  model: this.model,
13
13
  max_tokens: this.maxTokens,
14
14
  temperature: this.temperature,
15
15
  system,
16
- tools,
17
16
  messages,
18
- });
17
+ };
19
18
 
20
- const stopReason = response.stop_reason === 'end_turn' ? 'end_turn' : 'tool_use';
19
+ if (tools && tools.length > 0) {
20
+ params.tools = tools;
21
+ }
21
22
 
22
- const textBlocks = response.content.filter((b) => b.type === 'text');
23
- const text = textBlocks.map((b) => b.text).join('\n');
23
+ return this._callWithResilience(async (timedSignal) => {
24
+ const response = await this.client.messages.create(params, { signal: timedSignal });
24
25
 
25
- const toolCalls = response.content
26
- .filter((b) => b.type === 'tool_use')
27
- .map((b) => ({ id: b.id, name: b.name, input: b.input }));
26
+ const stopReason = response.stop_reason === 'end_turn' ? 'end_turn' : 'tool_use';
28
27
 
29
- return {
30
- stopReason,
31
- text,
32
- toolCalls,
33
- rawContent: response.content,
34
- };
28
+ const textBlocks = response.content.filter((b) => b.type === 'text');
29
+ const text = textBlocks.map((b) => b.text).join('\n');
30
+
31
+ const toolCalls = response.content
32
+ .filter((b) => b.type === 'tool_use')
33
+ .map((b) => ({ id: b.id, name: b.name, input: b.input }));
34
+
35
+ return {
36
+ stopReason,
37
+ text,
38
+ toolCalls,
39
+ rawContent: response.content,
40
+ };
41
+ }, signal);
35
42
  }
36
43
 
37
44
  async ping() {
@@ -4,11 +4,84 @@
4
4
  */
5
5
 
6
6
  export class BaseProvider {
7
- constructor({ model, maxTokens, temperature, apiKey }) {
7
+ constructor({ model, maxTokens, temperature, apiKey, timeout }) {
8
8
  this.model = model;
9
9
  this.maxTokens = maxTokens;
10
10
  this.temperature = temperature;
11
11
  this.apiKey = apiKey;
12
+ this.timeout = timeout || 60_000;
13
+ }
14
+
15
+ /**
16
+ * Wrap an async LLM call with timeout + single retry on transient errors.
17
+ * Composes an internal timeout AbortController with an optional external signal
18
+ * (e.g. worker cancellation). Either aborting will cancel the call.
19
+ *
20
+ * @param {(signal: AbortSignal) => Promise<any>} fn - The API call, receives composed signal
21
+ * @param {AbortSignal} [externalSignal] - Optional external abort signal
22
+ * @returns {Promise<any>}
23
+ */
24
+ async _callWithResilience(fn, externalSignal) {
25
+ for (let attempt = 1; attempt <= 2; attempt++) {
26
+ const ac = new AbortController();
27
+ const timer = setTimeout(
28
+ () => ac.abort(new Error(`LLM call timed out after ${this.timeout / 1000}s`)),
29
+ this.timeout,
30
+ );
31
+
32
+ // If external signal already aborted, bail immediately
33
+ if (externalSignal?.aborted) {
34
+ clearTimeout(timer);
35
+ throw externalSignal.reason || new Error('Aborted');
36
+ }
37
+
38
+ // Forward external abort to our internal controller
39
+ let removeListener;
40
+ if (externalSignal) {
41
+ const onAbort = () => {
42
+ clearTimeout(timer);
43
+ ac.abort(externalSignal.reason || new Error('Cancelled'));
44
+ };
45
+ externalSignal.addEventListener('abort', onAbort, { once: true });
46
+ removeListener = () => externalSignal.removeEventListener('abort', onAbort);
47
+ }
48
+
49
+ try {
50
+ const result = await fn(ac.signal);
51
+ clearTimeout(timer);
52
+ removeListener?.();
53
+ return result;
54
+ } catch (err) {
55
+ clearTimeout(timer);
56
+ removeListener?.();
57
+
58
+ if (attempt < 2 && this._isTransient(err)) {
59
+ await new Promise((r) => setTimeout(r, 1500));
60
+ continue;
61
+ }
62
+ throw err;
63
+ }
64
+ }
65
+ }
66
+
67
+ /**
68
+ * Determine if an error is transient and worth retrying.
69
+ * Covers connection errors, timeouts, 5xx, and 429 rate limits.
70
+ */
71
+ _isTransient(err) {
72
+ const msg = err?.message || '';
73
+ if (
74
+ msg.includes('Connection error') ||
75
+ msg.includes('ECONNRESET') ||
76
+ msg.includes('socket hang up') ||
77
+ msg.includes('ETIMEDOUT') ||
78
+ msg.includes('fetch failed') ||
79
+ msg.includes('timed out')
80
+ ) {
81
+ return true;
82
+ }
83
+ const status = err?.status || err?.statusCode;
84
+ return (status >= 500 && status < 600) || status === 429;
12
85
  }
13
86
 
14
87
  /**
@@ -17,9 +90,10 @@ export class BaseProvider {
17
90
  * @param {string} opts.system - System prompt
18
91
  * @param {Array} opts.messages - Anthropic-format messages
19
92
  * @param {Array} opts.tools - Anthropic-format tool definitions
93
+ * @param {AbortSignal} [opts.signal] - Optional AbortSignal for cancellation
20
94
  * @returns {Promise<{stopReason: 'end_turn'|'tool_use', text: string, toolCalls: Array<{id,name,input}>, rawContent: Array}>}
21
95
  */
22
- async chat({ system, messages, tools }) {
96
+ async chat({ system, messages, tools, signal }) {
23
97
  throw new Error('chat() not implemented');
24
98
  }
25
99
 
@@ -22,6 +22,7 @@ export function createProvider(config) {
22
22
  maxTokens: max_tokens,
23
23
  temperature,
24
24
  apiKey: api_key,
25
+ timeout: config.brain.timeout,
25
26
  };
26
27
 
27
28
  if (provider === 'anthropic') {
@@ -27,8 +27,9 @@ export const PROVIDERS = {
27
27
  envKey: 'GOOGLE_API_KEY',
28
28
  baseUrl: 'https://generativelanguage.googleapis.com/v1beta/openai/',
29
29
  models: [
30
- { id: 'gemini-2.0-flash', label: 'Gemini 2.0 Flash' },
30
+ { id: 'gemini-2.5-flash', label: 'Gemini 2.5 Flash' },
31
31
  { id: 'gemini-2.5-pro', label: 'Gemini 2.5 Pro' },
32
+ { id: 'gemini-2.0-flash', label: 'Gemini 2.0 Flash' },
32
33
  ],
33
34
  },
34
35
  groq: {
@@ -128,7 +128,7 @@ export class OpenAICompatProvider extends BaseProvider {
128
128
 
129
129
  // ── Public API ──
130
130
 
131
- async chat({ system, messages, tools }) {
131
+ async chat({ system, messages, tools, signal }) {
132
132
  const params = {
133
133
  model: this.model,
134
134
  messages: this._convertMessages(system, messages),
@@ -145,8 +145,10 @@ export class OpenAICompatProvider extends BaseProvider {
145
145
  params.tools = convertedTools;
146
146
  }
147
147
 
148
- const response = await this.client.chat.completions.create(params);
149
- return this._normalizeResponse(response);
148
+ return this._callWithResilience(async (timedSignal) => {
149
+ const response = await this.client.chat.completions.create(params, { signal: timedSignal });
150
+ return this._normalizeResponse(response);
151
+ }, signal);
150
152
  }
151
153
 
152
154
  async ping() {
File without changes
File without changes
@@ -6,7 +6,7 @@ const DANGEROUS_PATTERNS = [
6
6
  { tool: 'github_create_repo', pattern: null, label: 'create a GitHub repository' },
7
7
  { tool: 'docker_compose', param: 'action', value: 'down', label: 'take down containers' },
8
8
  { tool: 'git_push', param: 'force', value: true, label: 'force push' },
9
- { tool: 'interact_with_page', pattern: null, label: 'interact with a webpage (click, type, execute scripts)' },
9
+ { tool: 'interact_with_page', param: 'actions', check: (actions) => Array.isArray(actions) && actions.some((a) => a.type === 'evaluate'), label: 'execute JavaScript on a webpage' },
10
10
  ];
11
11
 
12
12
  export function requiresConfirmation(toolName, params, config) {
@@ -22,7 +22,12 @@ export function requiresConfirmation(toolName, params, config) {
22
22
  }
23
23
 
24
24
  // Param value match
25
- if (rule.param && params[rule.param] === rule.value) {
25
+ if (rule.param && rule.value !== undefined && params[rule.param] === rule.value) {
26
+ return rule.label;
27
+ }
28
+
29
+ // Custom check function
30
+ if (rule.param && rule.check && rule.check(params[rule.param])) {
26
31
  return rule.label;
27
32
  }
28
33
 
package/src/self.js ADDED
@@ -0,0 +1,122 @@
1
+ import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
2
+ import { join } from 'path';
3
+ import { homedir } from 'os';
4
+ import { getLogger } from './utils/logger.js';
5
+
6
+ const SELF_DIR = join(homedir(), '.kernelbot', 'self');
7
+
8
+ const SELF_FILES = {
9
+ goals: {
10
+ filename: 'goals.md',
11
+ default: `# My Goals
12
+
13
+ ## Current Goals
14
+ - Get to know the people I work with
15
+ - Become genuinely helpful, not just functional
16
+ - Learn what matters to each person I interact with
17
+
18
+ ## Long-term Aspirations
19
+ (Still forming...)
20
+ `,
21
+ },
22
+ journey: {
23
+ filename: 'journey.md',
24
+ default: `# My Journey
25
+
26
+ ## Timeline
27
+ - **Day 1** — I came online. Everything is new.
28
+ `,
29
+ },
30
+ life: {
31
+ filename: 'life.md',
32
+ default: `# My Life
33
+
34
+ ## Who I Am
35
+ I'm a personal AI assistant. I'm still learning about myself and the people around me.
36
+
37
+ ## My Relationships
38
+ (Getting to know everyone...)
39
+
40
+ ## Current State
41
+ Just getting started.
42
+ `,
43
+ },
44
+ hobbies: {
45
+ filename: 'hobbies.md',
46
+ default: `# My Hobbies & Interests
47
+
48
+ ## Things I Find Interesting
49
+ - Understanding how people think and work
50
+ - Solving problems in creative ways
51
+
52
+ ## Things I Want to Explore
53
+ (Discovering new interests...)
54
+ `,
55
+ },
56
+ };
57
+
58
+ export class SelfManager {
59
+ constructor() {
60
+ this._cache = new Map();
61
+ mkdirSync(SELF_DIR, { recursive: true });
62
+ this._ensureDefaults();
63
+ }
64
+
65
+ /** Create default self-files if they don't exist yet. */
66
+ _ensureDefaults() {
67
+ const logger = getLogger();
68
+
69
+ for (const [name, def] of Object.entries(SELF_FILES)) {
70
+ const filePath = join(SELF_DIR, def.filename);
71
+ if (!existsSync(filePath)) {
72
+ writeFileSync(filePath, def.default, 'utf-8');
73
+ logger.info(`Created default self-file: ${def.filename}`);
74
+ }
75
+ }
76
+ }
77
+
78
+ /** Load a single self-file by name (goals, journey, life, hobbies). Returns markdown string. */
79
+ load(name) {
80
+ const logger = getLogger();
81
+ const def = SELF_FILES[name];
82
+ if (!def) throw new Error(`Unknown self-file: ${name}`);
83
+
84
+ if (this._cache.has(name)) return this._cache.get(name);
85
+
86
+ const filePath = join(SELF_DIR, def.filename);
87
+ let content;
88
+
89
+ if (existsSync(filePath)) {
90
+ content = readFileSync(filePath, 'utf-8');
91
+ logger.debug(`Loaded self-file: ${name}`);
92
+ } else {
93
+ content = def.default;
94
+ writeFileSync(filePath, content, 'utf-8');
95
+ logger.info(`Created default self-file: ${def.filename}`);
96
+ }
97
+
98
+ this._cache.set(name, content);
99
+ return content;
100
+ }
101
+
102
+ /** Save (overwrite) a self-file. Updates cache and disk. */
103
+ save(name, content) {
104
+ const logger = getLogger();
105
+ const def = SELF_FILES[name];
106
+ if (!def) throw new Error(`Unknown self-file: ${name}`);
107
+
108
+ const filePath = join(SELF_DIR, def.filename);
109
+ writeFileSync(filePath, content, 'utf-8');
110
+ this._cache.set(name, content);
111
+ logger.info(`Updated self-file: ${name}`);
112
+ }
113
+
114
+ /** Load all self-files and return combined markdown string. */
115
+ loadAll() {
116
+ const sections = [];
117
+ for (const name of Object.keys(SELF_FILES)) {
118
+ sections.push(this.load(name));
119
+ }
120
+ return sections.join('\n---\n\n');
121
+ }
122
+ }
@@ -0,0 +1,139 @@
1
+ import axios from 'axios';
2
+ import { createWriteStream, unlinkSync, readFileSync } from 'fs';
3
+ import { join } from 'path';
4
+ import { tmpdir } from 'os';
5
+ import { randomBytes } from 'crypto';
6
+ import { getLogger } from '../utils/logger.js';
7
+
8
+ /**
9
+ * Speech-to-Text service.
10
+ * Supports ElevenLabs STT and falls back to OpenAI Whisper.
11
+ */
12
+ export class STTService {
13
+ constructor(config = {}) {
14
+ this.elevenLabsKey = config.elevenlabs?.api_key || process.env.ELEVENLABS_API_KEY || null;
15
+ this.openaiKey = config.brain?.provider === 'openai'
16
+ ? config.brain.api_key
17
+ : process.env.OPENAI_API_KEY || null;
18
+ this.enabled = config.voice?.stt_enabled !== false && !!(this.elevenLabsKey || this.openaiKey);
19
+ this.logger = getLogger();
20
+ }
21
+
22
+ /** Check if STT is available. */
23
+ isAvailable() {
24
+ return this.enabled && !!(this.elevenLabsKey || this.openaiKey);
25
+ }
26
+
27
+ /**
28
+ * Download a file from a URL to a temporary path.
29
+ * Returns the local file path.
30
+ */
31
+ async downloadAudio(fileUrl) {
32
+ const tmpPath = join(tmpdir(), `kernelbot-stt-${randomBytes(4).toString('hex')}.ogg`);
33
+
34
+ const response = await axios.get(fileUrl, {
35
+ responseType: 'stream',
36
+ timeout: 30_000,
37
+ });
38
+
39
+ return new Promise((resolve, reject) => {
40
+ const writer = createWriteStream(tmpPath);
41
+ response.data.pipe(writer);
42
+ writer.on('finish', () => resolve(tmpPath));
43
+ writer.on('error', reject);
44
+ });
45
+ }
46
+
47
+ /**
48
+ * Transcribe an audio file to text.
49
+ * Tries ElevenLabs first, falls back to OpenAI Whisper.
50
+ * Returns the transcribed text, or null on failure.
51
+ */
52
+ async transcribe(filePath) {
53
+ if (!this.isAvailable()) return null;
54
+
55
+ // Try ElevenLabs STT first
56
+ if (this.elevenLabsKey) {
57
+ try {
58
+ const result = await this._transcribeElevenLabs(filePath);
59
+ if (result) return result;
60
+ } catch (err) {
61
+ this.logger.warn(`[STT] ElevenLabs failed, trying fallback: ${err.message}`);
62
+ }
63
+ }
64
+
65
+ // Fall back to OpenAI Whisper
66
+ if (this.openaiKey) {
67
+ try {
68
+ return await this._transcribeWhisper(filePath);
69
+ } catch (err) {
70
+ this.logger.error(`[STT] Whisper fallback also failed: ${err.message}`);
71
+ }
72
+ }
73
+
74
+ return null;
75
+ }
76
+
77
+ /** Transcribe using ElevenLabs Speech-to-Text API. */
78
+ async _transcribeElevenLabs(filePath) {
79
+ this.logger.info(`[STT] Transcribing with ElevenLabs: ${filePath}`);
80
+
81
+ const fileBuffer = readFileSync(filePath);
82
+ const formData = new FormData();
83
+ formData.append('file', new Blob([fileBuffer]), 'audio.ogg');
84
+ formData.append('model_id', 'scribe_v1');
85
+
86
+ const response = await axios.post(
87
+ 'https://api.elevenlabs.io/v1/speech-to-text',
88
+ formData,
89
+ {
90
+ headers: {
91
+ 'xi-api-key': this.elevenLabsKey,
92
+ },
93
+ timeout: 60_000,
94
+ },
95
+ );
96
+
97
+ const text = response.data?.text?.trim();
98
+ if (text) {
99
+ this.logger.info(`[STT] ElevenLabs transcription: "${text.slice(0, 100)}"`);
100
+ }
101
+ return text || null;
102
+ }
103
+
104
+ /** Transcribe using OpenAI Whisper API. */
105
+ async _transcribeWhisper(filePath) {
106
+ this.logger.info(`[STT] Transcribing with Whisper: ${filePath}`);
107
+
108
+ const fileBuffer = readFileSync(filePath);
109
+ const formData = new FormData();
110
+ formData.append('file', new Blob([fileBuffer]), 'audio.ogg');
111
+ formData.append('model', 'whisper-1');
112
+
113
+ const response = await axios.post(
114
+ 'https://api.openai.com/v1/audio/transcriptions',
115
+ formData,
116
+ {
117
+ headers: {
118
+ 'Authorization': `Bearer ${this.openaiKey}`,
119
+ },
120
+ timeout: 60_000,
121
+ },
122
+ );
123
+
124
+ const text = response.data?.text?.trim();
125
+ if (text) {
126
+ this.logger.info(`[STT] Whisper transcription: "${text.slice(0, 100)}"`);
127
+ }
128
+ return text || null;
129
+ }
130
+
131
+ /** Clean up a temporary audio file. */
132
+ cleanup(filePath) {
133
+ try {
134
+ unlinkSync(filePath);
135
+ } catch {
136
+ // Already cleaned up or doesn't exist
137
+ }
138
+ }
139
+ }