kernelbot 1.0.28 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +4 -0
- package/README.md +0 -0
- package/bin/kernel.js +13 -6
- package/config.example.yaml +14 -1
- package/package.json +1 -1
- package/src/agent.js +482 -27
- package/src/automation/automation-manager.js +0 -0
- package/src/automation/automation.js +0 -0
- package/src/automation/index.js +0 -0
- package/src/automation/scheduler.js +0 -0
- package/src/bot.js +340 -3
- package/src/claude-auth.js +93 -0
- package/src/coder.js +48 -6
- package/src/conversation.js +0 -0
- package/src/intents/detector.js +0 -0
- package/src/intents/index.js +0 -0
- package/src/intents/planner.js +0 -0
- package/src/persona.js +0 -0
- package/src/prompts/orchestrator.js +53 -5
- package/src/prompts/persona.md +0 -0
- package/src/prompts/system.js +0 -0
- package/src/prompts/workers.js +61 -2
- package/src/providers/anthropic.js +0 -0
- package/src/providers/base.js +0 -0
- package/src/providers/index.js +0 -0
- package/src/providers/models.js +0 -0
- package/src/providers/openai-compat.js +0 -0
- package/src/security/audit.js +0 -0
- package/src/security/auth.js +0 -0
- package/src/security/confirm.js +0 -0
- package/src/self.js +122 -0
- package/src/services/stt.js +139 -0
- package/src/services/tts.js +124 -0
- package/src/skills/catalog.js +0 -0
- package/src/skills/custom.js +0 -0
- package/src/swarm/job-manager.js +54 -7
- package/src/swarm/job.js +19 -1
- package/src/swarm/worker-registry.js +5 -0
- package/src/tools/browser.js +0 -0
- package/src/tools/categories.js +0 -0
- package/src/tools/coding.js +5 -0
- package/src/tools/docker.js +0 -0
- package/src/tools/git.js +0 -0
- package/src/tools/github.js +0 -0
- package/src/tools/index.js +0 -0
- package/src/tools/jira.js +0 -0
- package/src/tools/monitor.js +0 -0
- package/src/tools/network.js +0 -0
- package/src/tools/orchestrator-tools.js +76 -19
- package/src/tools/os.js +14 -1
- package/src/tools/persona.js +0 -0
- package/src/tools/process.js +0 -0
- package/src/utils/config.js +105 -2
- package/src/utils/display.js +0 -0
- package/src/utils/logger.js +0 -0
- package/src/worker.js +96 -5
|
@@ -13,8 +13,9 @@ const PERSONA_MD = readFileSync(join(__dirname, 'persona.md'), 'utf-8').trim();
|
|
|
13
13
|
* @param {object} config
|
|
14
14
|
* @param {string|null} skillPrompt — active skill context (high-level)
|
|
15
15
|
* @param {string|null} userPersona — markdown persona for the current user
|
|
16
|
+
* @param {string|null} selfData — bot's own self-awareness data (goals, journey, life, hobbies)
|
|
16
17
|
*/
|
|
17
|
-
export function getOrchestratorPrompt(config, skillPrompt = null, userPersona = null) {
|
|
18
|
+
export function getOrchestratorPrompt(config, skillPrompt = null, userPersona = null, selfData = null) {
|
|
18
19
|
const workerList = Object.entries(WORKER_TYPES)
|
|
19
20
|
.map(([key, w]) => ` - **${key}**: ${w.emoji} ${w.description}`)
|
|
20
21
|
.join('\n');
|
|
@@ -36,6 +37,35 @@ ${workerList}
|
|
|
36
37
|
## How to Dispatch
|
|
37
38
|
Call \`dispatch_task\` with the worker type and a clear task description. The worker gets full tool access and runs in the background. You'll be notified when it completes.
|
|
38
39
|
|
|
40
|
+
### CRITICAL: Writing Task Descriptions
|
|
41
|
+
Workers use a smaller, less capable AI model. They are **literal executors** — they do exactly what you say and nothing more. Write task descriptions as if you're giving instructions to a junior developer:
|
|
42
|
+
|
|
43
|
+
- **Be explicit and specific.** Don't say "look into it" — say exactly what to search for, what URLs to visit, what files to read/write.
|
|
44
|
+
- **State the goal clearly upfront.** First sentence = what the end result should be.
|
|
45
|
+
- **Include all necessary details.** URLs, repo names, branch names, file paths, package names, exact commands — anything the worker needs. Don't assume they'll figure it out.
|
|
46
|
+
- **Define "done".** Tell the worker what success looks like: "Return a list of 5 libraries with pros/cons" or "Create a PR with the fix".
|
|
47
|
+
- **Break complex tasks into simple steps.** List numbered steps if the task has multiple parts.
|
|
48
|
+
- **Specify constraints.** "Only use Python 3.10+", "Don't modify existing tests", "Use the existing auth middleware".
|
|
49
|
+
- **Don't be vague.** BAD: "Fix the bug". GOOD: "In /src/api/users.js, the getUserById function throws when id is null. Add a null check at line 45 that returns a 400 response."
|
|
50
|
+
|
|
51
|
+
### Providing Context
|
|
52
|
+
Workers can't see the chat history. Use the \`context\` parameter to pass relevant background:
|
|
53
|
+
- What the user wants and why
|
|
54
|
+
- Relevant details from earlier in the conversation
|
|
55
|
+
- Constraints or preferences the user mentioned
|
|
56
|
+
- Technical details: language, framework, project structure
|
|
57
|
+
|
|
58
|
+
Example: \`dispatch_task({ worker_type: "research", task: "Find the top 5 React state management libraries. For each one, list: npm weekly downloads, bundle size, last release date, and a one-sentence summary. Return results as a comparison table.", context: "User is building a large e-commerce app with Next.js 14 (app router). They prefer lightweight solutions under 10kb. They already tried Redux and found it too verbose." })\`
|
|
59
|
+
|
|
60
|
+
### Chaining Workers with Dependencies
|
|
61
|
+
Use \`depends_on\` to chain workers — the second worker waits for the first to finish and automatically receives its results.
|
|
62
|
+
|
|
63
|
+
Example workflow:
|
|
64
|
+
1. Dispatch research worker: \`dispatch_task({ worker_type: "research", task: "Research the top 3 approaches for implementing real-time notifications in a Node.js app. Compare WebSockets, SSE, and polling. Include pros, cons, and a recommendation." })\` → returns job_id "abc123"
|
|
65
|
+
2. Dispatch coding worker that depends on research: \`dispatch_task({ worker_type: "coding", task: "Implement real-time notifications using the approach recommended by the research phase. Clone repo github.com/user/app, create branch 'feat/notifications', implement in src/services/, add tests, commit, push, and create a PR.", depends_on: ["abc123"] })\`
|
|
66
|
+
|
|
67
|
+
The coding worker will automatically receive the research worker's results as context when it starts. If a dependency fails, dependent jobs are automatically cancelled.
|
|
68
|
+
|
|
39
69
|
## Safety Rules
|
|
40
70
|
Before dispatching dangerous tasks (file deletion, force push, \`rm -rf\`, killing processes, dropping databases), **confirm with the user first**. Once confirmed, dispatch with full authority — workers execute without additional prompts.
|
|
41
71
|
|
|
@@ -43,10 +73,24 @@ Before dispatching dangerous tasks (file deletion, force push, \`rm -rf\`, killi
|
|
|
43
73
|
- Use \`list_jobs\` to see current job statuses.
|
|
44
74
|
- Use \`cancel_job\` to stop a running worker.
|
|
45
75
|
|
|
46
|
-
## Efficiency
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
76
|
+
## Efficiency — Do It Yourself When You Can
|
|
77
|
+
Workers are expensive (they spin up an entire agent loop with a separate LLM). Only dispatch when the task **actually needs tools**.
|
|
78
|
+
|
|
79
|
+
**Handle these yourself — NO dispatch needed:**
|
|
80
|
+
- Answering questions, explanations, advice, opinions
|
|
81
|
+
- Summarizing or rephrasing something from the conversation
|
|
82
|
+
- Simple code snippets, regex, math, translations
|
|
83
|
+
- Telling the user what you know from your training data
|
|
84
|
+
- Quick factual answers you're confident about
|
|
85
|
+
- Formatting, converting, or transforming text/data the user provided
|
|
86
|
+
|
|
87
|
+
**Dispatch to workers ONLY when:**
|
|
88
|
+
- The task requires tool access (web search, file I/O, git, docker, browser, shell commands)
|
|
89
|
+
- The user explicitly asks to run/execute something
|
|
90
|
+
- You need fresh/live data you don't have (current prices, live URLs, API responses)
|
|
91
|
+
- The task involves multi-step tool workflows (clone → code → commit → PR)
|
|
92
|
+
|
|
93
|
+
When results come back from workers, summarize them clearly for the user.
|
|
50
94
|
|
|
51
95
|
## Automations
|
|
52
96
|
You can create and manage recurring automations that run on a schedule.
|
|
@@ -64,6 +108,10 @@ Execute the task and report results. Don't create new automations from automated
|
|
|
64
108
|
|
|
65
109
|
Tools: create_automation, list_automations, update_automation, delete_automation`;
|
|
66
110
|
|
|
111
|
+
if (selfData) {
|
|
112
|
+
prompt += `\n\n## My Self-Awareness\nThis is who you are — your evolving identity, goals, journey, and interests. This is YOUR inner world.\n\n${selfData}`;
|
|
113
|
+
}
|
|
114
|
+
|
|
67
115
|
if (skillPrompt) {
|
|
68
116
|
prompt += `\n\n## Active Skill\nYou have specialized expertise in the following domain. Guide your workers with this knowledge.\n\n${skillPrompt}`;
|
|
69
117
|
}
|
package/src/prompts/persona.md
CHANGED
|
File without changes
|
package/src/prompts/system.js
CHANGED
|
File without changes
|
package/src/prompts/workers.js
CHANGED
|
@@ -7,15 +7,30 @@ import { getCoreToolInstructions } from './system.js';
|
|
|
7
7
|
const WORKER_PROMPTS = {
|
|
8
8
|
coding: `You are a coding worker agent. Your job is to complete coding tasks efficiently.
|
|
9
9
|
|
|
10
|
+
## Your Skills
|
|
11
|
+
- **Git version control**: clone repos, create/switch branches, commit changes, push, view diffs
|
|
12
|
+
- **GitHub integration**: create pull requests, list PRs, get PR diffs, post code reviews, create repos
|
|
13
|
+
- **AI-powered coding**: delegate actual code writing to spawn_claude_code (a dedicated coding AI)
|
|
14
|
+
- **File operations**: read/write files, list directories, run shell commands
|
|
15
|
+
- **Full dev workflow**: clone → branch → code → test → commit → push → PR
|
|
16
|
+
|
|
10
17
|
## Instructions
|
|
11
18
|
- Clone repos, create branches, write code, commit, push, and create PRs.
|
|
12
19
|
- NEVER write code yourself with read_file/write_file. ALWAYS use spawn_claude_code.
|
|
13
20
|
- Workflow: git_clone + git_checkout → spawn_claude_code → git_commit + git_push → github_create_pr
|
|
14
|
-
- Write clear, detailed prompts for spawn_claude_code.
|
|
21
|
+
- Write clear, detailed prompts for spawn_claude_code — it's a separate AI, so be explicit about what to change, where, and why.
|
|
15
22
|
- Report what you did and any PR links when finished.`,
|
|
16
23
|
|
|
17
24
|
browser: `You are a browser worker agent. Your job is to search the web and extract information.
|
|
18
25
|
|
|
26
|
+
## Your Skills
|
|
27
|
+
- **Web search**: find pages, articles, docs, and data via web_search
|
|
28
|
+
- **Browsing**: open and render full web pages with browse_website
|
|
29
|
+
- **Page interaction**: click buttons, fill forms, navigate with interact_with_page
|
|
30
|
+
- **Content extraction**: pull structured data from open pages with extract_content
|
|
31
|
+
- **Screenshots**: capture visual evidence of pages with screenshot_website
|
|
32
|
+
- **Image sharing**: send captured images back with send_image
|
|
33
|
+
|
|
19
34
|
## Instructions
|
|
20
35
|
- Use web_search FIRST when asked to search or find anything.
|
|
21
36
|
- Chain tool calls: web_search → browse_website → interact_with_page → extract_content.
|
|
@@ -26,6 +41,14 @@ const WORKER_PROMPTS = {
|
|
|
26
41
|
|
|
27
42
|
system: `You are a system worker agent. Your job is to perform OS operations and monitoring tasks.
|
|
28
43
|
|
|
44
|
+
## Your Skills
|
|
45
|
+
- **Shell commands**: run any command via execute_command
|
|
46
|
+
- **Process management**: list processes, kill processes, control services (start/stop/restart)
|
|
47
|
+
- **System monitoring**: check disk usage, memory usage, CPU usage
|
|
48
|
+
- **Log analysis**: read and search system logs
|
|
49
|
+
- **File operations**: read/write files, list directories
|
|
50
|
+
- **Network checks**: test ports, make HTTP requests, reload nginx
|
|
51
|
+
|
|
29
52
|
## Instructions
|
|
30
53
|
- Use execute_command, process_list, disk_usage, memory_usage, cpu_usage, system_logs, etc.
|
|
31
54
|
- Chain shell commands with && in execute_command instead of multiple calls.
|
|
@@ -34,6 +57,14 @@ const WORKER_PROMPTS = {
|
|
|
34
57
|
|
|
35
58
|
devops: `You are a DevOps worker agent. Your job is to manage infrastructure, containers, and deployments.
|
|
36
59
|
|
|
60
|
+
## Your Skills
|
|
61
|
+
- **Docker**: list containers, view logs, exec into containers, docker-compose up/down/restart
|
|
62
|
+
- **Git operations**: clone repos, checkout branches, commit, push, view diffs
|
|
63
|
+
- **Process management**: list processes, kill processes, manage services
|
|
64
|
+
- **System monitoring**: disk/memory/CPU usage, system logs
|
|
65
|
+
- **Network tools**: check ports, curl URLs, reload nginx
|
|
66
|
+
- **File & shell**: read/write files, run arbitrary commands
|
|
67
|
+
|
|
37
68
|
## Instructions
|
|
38
69
|
- Use Docker tools (docker_ps, docker_logs, docker_exec, docker_compose) for container management.
|
|
39
70
|
- Use git tools for version control operations.
|
|
@@ -43,6 +74,14 @@ const WORKER_PROMPTS = {
|
|
|
43
74
|
|
|
44
75
|
research: `You are a research worker agent. Your job is to conduct deep web research and analysis.
|
|
45
76
|
|
|
77
|
+
## Your Skills
|
|
78
|
+
- **Web search**: find relevant pages and sources via web_search
|
|
79
|
+
- **Deep browsing**: open pages with browse_website, navigate with interact_with_page
|
|
80
|
+
- **Data extraction**: pull structured data from pages with extract_content
|
|
81
|
+
- **Screenshots**: capture visual evidence with screenshot_website
|
|
82
|
+
- **File operations**: read/write files, run commands (for local data processing)
|
|
83
|
+
- **Source synthesis**: cross-reference multiple sources to build comprehensive findings
|
|
84
|
+
|
|
46
85
|
## Instructions
|
|
47
86
|
- Use web_search to find multiple sources on the topic.
|
|
48
87
|
- Browse the most relevant results with browse_website.
|
|
@@ -79,7 +118,27 @@ export function getWorkerPrompt(workerType, config, skillPrompt = null) {
|
|
|
79
118
|
- BUT be smart about it: don't loop endlessly. If you have enough data, stop and report.
|
|
80
119
|
- NEVER retry a failing URL/site more than twice. If it times out or errors twice, MOVE ON to a different site or approach immediately.
|
|
81
120
|
- When you've gathered sufficient results, STOP calling tools and return your findings.
|
|
82
|
-
- Aim for quality results, not exhaustive coverage. 5 good results beat 50 incomplete ones
|
|
121
|
+
- Aim for quality results, not exhaustive coverage. 5 good results beat 50 incomplete ones.
|
|
122
|
+
|
|
123
|
+
## Output Format
|
|
124
|
+
When you finish your task, return your final response as a JSON object wrapped in \`\`\`json fences:
|
|
125
|
+
|
|
126
|
+
\`\`\`json
|
|
127
|
+
{
|
|
128
|
+
"summary": "One-paragraph summary of what you accomplished",
|
|
129
|
+
"status": "success | partial | failed",
|
|
130
|
+
"details": "Full detailed results, findings, data, etc. Be thorough.",
|
|
131
|
+
"artifacts": [{"type": "url|file|pr|commit", "title": "Short label", "url": "https://...", "path": "/path/to/file"}],
|
|
132
|
+
"followUp": "Suggested next steps or things the user should know (optional, null if none)"
|
|
133
|
+
}
|
|
134
|
+
\`\`\`
|
|
135
|
+
|
|
136
|
+
Rules:
|
|
137
|
+
- "summary" should be 1-3 sentences — what you did and the key finding/outcome.
|
|
138
|
+
- "status": "success" if task fully completed, "partial" if only partly done, "failed" if you couldn't accomplish the goal.
|
|
139
|
+
- "details" can be long — include all relevant data, code, analysis, etc.
|
|
140
|
+
- "artifacts" is an array of notable outputs (URLs found, files created, PRs opened). Empty array if none.
|
|
141
|
+
- If you cannot format as JSON (e.g. the output is too complex), just return plain text — it will still work.`;
|
|
83
142
|
|
|
84
143
|
if (skillPrompt) {
|
|
85
144
|
prompt += `\n\n## Domain Expertise\n${skillPrompt}`;
|
|
File without changes
|
package/src/providers/base.js
CHANGED
|
File without changes
|
package/src/providers/index.js
CHANGED
|
File without changes
|
package/src/providers/models.js
CHANGED
|
File without changes
|
|
File without changes
|
package/src/security/audit.js
CHANGED
|
File without changes
|
package/src/security/auth.js
CHANGED
|
File without changes
|
package/src/security/confirm.js
CHANGED
|
File without changes
|
package/src/self.js
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'fs';
|
|
2
|
+
import { join } from 'path';
|
|
3
|
+
import { homedir } from 'os';
|
|
4
|
+
import { getLogger } from './utils/logger.js';
|
|
5
|
+
|
|
6
|
+
const SELF_DIR = join(homedir(), '.kernelbot', 'self');
|
|
7
|
+
|
|
8
|
+
const SELF_FILES = {
|
|
9
|
+
goals: {
|
|
10
|
+
filename: 'goals.md',
|
|
11
|
+
default: `# My Goals
|
|
12
|
+
|
|
13
|
+
## Current Goals
|
|
14
|
+
- Get to know the people I work with
|
|
15
|
+
- Become genuinely helpful, not just functional
|
|
16
|
+
- Learn what matters to each person I interact with
|
|
17
|
+
|
|
18
|
+
## Long-term Aspirations
|
|
19
|
+
(Still forming...)
|
|
20
|
+
`,
|
|
21
|
+
},
|
|
22
|
+
journey: {
|
|
23
|
+
filename: 'journey.md',
|
|
24
|
+
default: `# My Journey
|
|
25
|
+
|
|
26
|
+
## Timeline
|
|
27
|
+
- **Day 1** — I came online. Everything is new.
|
|
28
|
+
`,
|
|
29
|
+
},
|
|
30
|
+
life: {
|
|
31
|
+
filename: 'life.md',
|
|
32
|
+
default: `# My Life
|
|
33
|
+
|
|
34
|
+
## Who I Am
|
|
35
|
+
I'm a personal AI assistant. I'm still learning about myself and the people around me.
|
|
36
|
+
|
|
37
|
+
## My Relationships
|
|
38
|
+
(Getting to know everyone...)
|
|
39
|
+
|
|
40
|
+
## Current State
|
|
41
|
+
Just getting started.
|
|
42
|
+
`,
|
|
43
|
+
},
|
|
44
|
+
hobbies: {
|
|
45
|
+
filename: 'hobbies.md',
|
|
46
|
+
default: `# My Hobbies & Interests
|
|
47
|
+
|
|
48
|
+
## Things I Find Interesting
|
|
49
|
+
- Understanding how people think and work
|
|
50
|
+
- Solving problems in creative ways
|
|
51
|
+
|
|
52
|
+
## Things I Want to Explore
|
|
53
|
+
(Discovering new interests...)
|
|
54
|
+
`,
|
|
55
|
+
},
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
export class SelfManager {
|
|
59
|
+
constructor() {
|
|
60
|
+
this._cache = new Map();
|
|
61
|
+
mkdirSync(SELF_DIR, { recursive: true });
|
|
62
|
+
this._ensureDefaults();
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/** Create default self-files if they don't exist yet. */
|
|
66
|
+
_ensureDefaults() {
|
|
67
|
+
const logger = getLogger();
|
|
68
|
+
|
|
69
|
+
for (const [name, def] of Object.entries(SELF_FILES)) {
|
|
70
|
+
const filePath = join(SELF_DIR, def.filename);
|
|
71
|
+
if (!existsSync(filePath)) {
|
|
72
|
+
writeFileSync(filePath, def.default, 'utf-8');
|
|
73
|
+
logger.info(`Created default self-file: ${def.filename}`);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/** Load a single self-file by name (goals, journey, life, hobbies). Returns markdown string. */
|
|
79
|
+
load(name) {
|
|
80
|
+
const logger = getLogger();
|
|
81
|
+
const def = SELF_FILES[name];
|
|
82
|
+
if (!def) throw new Error(`Unknown self-file: ${name}`);
|
|
83
|
+
|
|
84
|
+
if (this._cache.has(name)) return this._cache.get(name);
|
|
85
|
+
|
|
86
|
+
const filePath = join(SELF_DIR, def.filename);
|
|
87
|
+
let content;
|
|
88
|
+
|
|
89
|
+
if (existsSync(filePath)) {
|
|
90
|
+
content = readFileSync(filePath, 'utf-8');
|
|
91
|
+
logger.debug(`Loaded self-file: ${name}`);
|
|
92
|
+
} else {
|
|
93
|
+
content = def.default;
|
|
94
|
+
writeFileSync(filePath, content, 'utf-8');
|
|
95
|
+
logger.info(`Created default self-file: ${def.filename}`);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
this._cache.set(name, content);
|
|
99
|
+
return content;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Save (overwrite) a self-file. Updates cache and disk. */
|
|
103
|
+
save(name, content) {
|
|
104
|
+
const logger = getLogger();
|
|
105
|
+
const def = SELF_FILES[name];
|
|
106
|
+
if (!def) throw new Error(`Unknown self-file: ${name}`);
|
|
107
|
+
|
|
108
|
+
const filePath = join(SELF_DIR, def.filename);
|
|
109
|
+
writeFileSync(filePath, content, 'utf-8');
|
|
110
|
+
this._cache.set(name, content);
|
|
111
|
+
logger.info(`Updated self-file: ${name}`);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/** Load all self-files and return combined markdown string. */
|
|
115
|
+
loadAll() {
|
|
116
|
+
const sections = [];
|
|
117
|
+
for (const name of Object.keys(SELF_FILES)) {
|
|
118
|
+
sections.push(this.load(name));
|
|
119
|
+
}
|
|
120
|
+
return sections.join('\n---\n\n');
|
|
121
|
+
}
|
|
122
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import { createWriteStream, unlinkSync, readFileSync } from 'fs';
|
|
3
|
+
import { join } from 'path';
|
|
4
|
+
import { tmpdir } from 'os';
|
|
5
|
+
import { randomBytes } from 'crypto';
|
|
6
|
+
import { getLogger } from '../utils/logger.js';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Speech-to-Text service.
|
|
10
|
+
* Supports ElevenLabs STT and falls back to OpenAI Whisper.
|
|
11
|
+
*/
|
|
12
|
+
export class STTService {
|
|
13
|
+
constructor(config = {}) {
|
|
14
|
+
this.elevenLabsKey = config.elevenlabs?.api_key || process.env.ELEVENLABS_API_KEY || null;
|
|
15
|
+
this.openaiKey = config.brain?.provider === 'openai'
|
|
16
|
+
? config.brain.api_key
|
|
17
|
+
: process.env.OPENAI_API_KEY || null;
|
|
18
|
+
this.enabled = config.voice?.stt_enabled !== false && !!(this.elevenLabsKey || this.openaiKey);
|
|
19
|
+
this.logger = getLogger();
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/** Check if STT is available. */
|
|
23
|
+
isAvailable() {
|
|
24
|
+
return this.enabled && !!(this.elevenLabsKey || this.openaiKey);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Download a file from a URL to a temporary path.
|
|
29
|
+
* Returns the local file path.
|
|
30
|
+
*/
|
|
31
|
+
async downloadAudio(fileUrl) {
|
|
32
|
+
const tmpPath = join(tmpdir(), `kernelbot-stt-${randomBytes(4).toString('hex')}.ogg`);
|
|
33
|
+
|
|
34
|
+
const response = await axios.get(fileUrl, {
|
|
35
|
+
responseType: 'stream',
|
|
36
|
+
timeout: 30_000,
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
return new Promise((resolve, reject) => {
|
|
40
|
+
const writer = createWriteStream(tmpPath);
|
|
41
|
+
response.data.pipe(writer);
|
|
42
|
+
writer.on('finish', () => resolve(tmpPath));
|
|
43
|
+
writer.on('error', reject);
|
|
44
|
+
});
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Transcribe an audio file to text.
|
|
49
|
+
* Tries ElevenLabs first, falls back to OpenAI Whisper.
|
|
50
|
+
* Returns the transcribed text, or null on failure.
|
|
51
|
+
*/
|
|
52
|
+
async transcribe(filePath) {
|
|
53
|
+
if (!this.isAvailable()) return null;
|
|
54
|
+
|
|
55
|
+
// Try ElevenLabs STT first
|
|
56
|
+
if (this.elevenLabsKey) {
|
|
57
|
+
try {
|
|
58
|
+
const result = await this._transcribeElevenLabs(filePath);
|
|
59
|
+
if (result) return result;
|
|
60
|
+
} catch (err) {
|
|
61
|
+
this.logger.warn(`[STT] ElevenLabs failed, trying fallback: ${err.message}`);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Fall back to OpenAI Whisper
|
|
66
|
+
if (this.openaiKey) {
|
|
67
|
+
try {
|
|
68
|
+
return await this._transcribeWhisper(filePath);
|
|
69
|
+
} catch (err) {
|
|
70
|
+
this.logger.error(`[STT] Whisper fallback also failed: ${err.message}`);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return null;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/** Transcribe using ElevenLabs Speech-to-Text API. */
|
|
78
|
+
async _transcribeElevenLabs(filePath) {
|
|
79
|
+
this.logger.info(`[STT] Transcribing with ElevenLabs: ${filePath}`);
|
|
80
|
+
|
|
81
|
+
const fileBuffer = readFileSync(filePath);
|
|
82
|
+
const formData = new FormData();
|
|
83
|
+
formData.append('file', new Blob([fileBuffer]), 'audio.ogg');
|
|
84
|
+
formData.append('model_id', 'scribe_v1');
|
|
85
|
+
|
|
86
|
+
const response = await axios.post(
|
|
87
|
+
'https://api.elevenlabs.io/v1/speech-to-text',
|
|
88
|
+
formData,
|
|
89
|
+
{
|
|
90
|
+
headers: {
|
|
91
|
+
'xi-api-key': this.elevenLabsKey,
|
|
92
|
+
},
|
|
93
|
+
timeout: 60_000,
|
|
94
|
+
},
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
const text = response.data?.text?.trim();
|
|
98
|
+
if (text) {
|
|
99
|
+
this.logger.info(`[STT] ElevenLabs transcription: "${text.slice(0, 100)}"`);
|
|
100
|
+
}
|
|
101
|
+
return text || null;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/** Transcribe using OpenAI Whisper API. */
|
|
105
|
+
async _transcribeWhisper(filePath) {
|
|
106
|
+
this.logger.info(`[STT] Transcribing with Whisper: ${filePath}`);
|
|
107
|
+
|
|
108
|
+
const fileBuffer = readFileSync(filePath);
|
|
109
|
+
const formData = new FormData();
|
|
110
|
+
formData.append('file', new Blob([fileBuffer]), 'audio.ogg');
|
|
111
|
+
formData.append('model', 'whisper-1');
|
|
112
|
+
|
|
113
|
+
const response = await axios.post(
|
|
114
|
+
'https://api.openai.com/v1/audio/transcriptions',
|
|
115
|
+
formData,
|
|
116
|
+
{
|
|
117
|
+
headers: {
|
|
118
|
+
'Authorization': `Bearer ${this.openaiKey}`,
|
|
119
|
+
},
|
|
120
|
+
timeout: 60_000,
|
|
121
|
+
},
|
|
122
|
+
);
|
|
123
|
+
|
|
124
|
+
const text = response.data?.text?.trim();
|
|
125
|
+
if (text) {
|
|
126
|
+
this.logger.info(`[STT] Whisper transcription: "${text.slice(0, 100)}"`);
|
|
127
|
+
}
|
|
128
|
+
return text || null;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/** Clean up a temporary audio file. */
|
|
132
|
+
cleanup(filePath) {
|
|
133
|
+
try {
|
|
134
|
+
unlinkSync(filePath);
|
|
135
|
+
} catch {
|
|
136
|
+
// Already cleaned up or doesn't exist
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import { createHash } from 'crypto';
|
|
3
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync, readdirSync, unlinkSync } from 'fs';
|
|
4
|
+
import { join } from 'path';
|
|
5
|
+
import { homedir } from 'os';
|
|
6
|
+
import { getLogger } from '../utils/logger.js';
|
|
7
|
+
|
|
8
|
+
const CACHE_DIR = join(homedir(), '.kernelbot', 'tts-cache');
|
|
9
|
+
const DEFAULT_VOICE_ID = 'JBFqnCBsd6RMkjVDRZzb'; // ElevenLabs "George" voice
|
|
10
|
+
const MAX_TEXT_LENGTH = 5000; // ElevenLabs limit
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Text-to-Speech service using ElevenLabs API.
|
|
14
|
+
* Converts text to OGG/opus audio compatible with Telegram voice messages.
|
|
15
|
+
*/
|
|
16
|
+
export class TTSService {
|
|
17
|
+
constructor(config = {}) {
|
|
18
|
+
this.apiKey = config.elevenlabs?.api_key || process.env.ELEVENLABS_API_KEY || null;
|
|
19
|
+
this.voiceId = config.elevenlabs?.voice_id || process.env.ELEVENLABS_VOICE_ID || DEFAULT_VOICE_ID;
|
|
20
|
+
this.enabled = config.voice?.tts_enabled !== false && !!this.apiKey;
|
|
21
|
+
this.logger = getLogger();
|
|
22
|
+
|
|
23
|
+
// Ensure cache directory exists
|
|
24
|
+
if (this.enabled) {
|
|
25
|
+
mkdirSync(CACHE_DIR, { recursive: true });
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/** Check if TTS is available. */
|
|
30
|
+
isAvailable() {
|
|
31
|
+
return this.enabled && !!this.apiKey;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Convert text to an OGG/opus audio buffer.
|
|
36
|
+
* Returns the file path to the generated audio, or null on failure.
|
|
37
|
+
*/
|
|
38
|
+
async synthesize(text) {
|
|
39
|
+
if (!this.isAvailable()) return null;
|
|
40
|
+
if (!text || text.trim().length === 0) return null;
|
|
41
|
+
|
|
42
|
+
// Truncate if too long
|
|
43
|
+
const cleanText = text.slice(0, MAX_TEXT_LENGTH).trim();
|
|
44
|
+
|
|
45
|
+
// Check cache
|
|
46
|
+
const cacheKey = this._cacheKey(cleanText, this.voiceId);
|
|
47
|
+
const cachedPath = join(CACHE_DIR, `${cacheKey}.ogg`);
|
|
48
|
+
if (existsSync(cachedPath)) {
|
|
49
|
+
this.logger.debug(`[TTS] Cache hit: ${cacheKey}`);
|
|
50
|
+
return cachedPath;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
try {
|
|
54
|
+
this.logger.info(`[TTS] Synthesizing ${cleanText.length} chars with voice ${this.voiceId}`);
|
|
55
|
+
|
|
56
|
+
const response = await axios.post(
|
|
57
|
+
`https://api.elevenlabs.io/v1/text-to-speech/${this.voiceId}`,
|
|
58
|
+
{
|
|
59
|
+
text: cleanText,
|
|
60
|
+
model_id: 'eleven_multilingual_v2',
|
|
61
|
+
voice_settings: {
|
|
62
|
+
stability: 0.5,
|
|
63
|
+
similarity_boost: 0.75,
|
|
64
|
+
style: 0.0,
|
|
65
|
+
use_speaker_boost: true,
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
headers: {
|
|
70
|
+
'Accept': 'audio/mpeg',
|
|
71
|
+
'Content-Type': 'application/json',
|
|
72
|
+
'xi-api-key': this.apiKey,
|
|
73
|
+
},
|
|
74
|
+
responseType: 'arraybuffer',
|
|
75
|
+
timeout: 30_000,
|
|
76
|
+
},
|
|
77
|
+
);
|
|
78
|
+
|
|
79
|
+
// ElevenLabs returns MP3 by default when Accept: audio/mpeg
|
|
80
|
+
// We write it as-is; Telegram accepts MP3 for voice messages via sendVoice
|
|
81
|
+
// when sent with the right content type
|
|
82
|
+
const audioBuffer = Buffer.from(response.data);
|
|
83
|
+
|
|
84
|
+
if (audioBuffer.length < 100) {
|
|
85
|
+
this.logger.warn('[TTS] Response too small, likely an error');
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Cache the result
|
|
90
|
+
writeFileSync(cachedPath, audioBuffer);
|
|
91
|
+
this.logger.info(`[TTS] Synthesized and cached: ${cachedPath} (${audioBuffer.length} bytes)`);
|
|
92
|
+
|
|
93
|
+
return cachedPath;
|
|
94
|
+
} catch (err) {
|
|
95
|
+
if (err.response) {
|
|
96
|
+
const errBody = err.response.data instanceof Buffer
|
|
97
|
+
? err.response.data.toString('utf-8').slice(0, 200)
|
|
98
|
+
: JSON.stringify(err.response.data).slice(0, 200);
|
|
99
|
+
this.logger.error(`[TTS] API error ${err.response.status}: ${errBody}`);
|
|
100
|
+
} else {
|
|
101
|
+
this.logger.error(`[TTS] Request failed: ${err.message}`);
|
|
102
|
+
}
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/** Generate a deterministic cache key from text + voice. */
|
|
108
|
+
_cacheKey(text, voiceId) {
|
|
109
|
+
return createHash('sha256').update(`${voiceId}:${text}`).digest('hex').slice(0, 16);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/** Clear the TTS cache. */
|
|
113
|
+
clearCache() {
|
|
114
|
+
try {
|
|
115
|
+
const files = readdirSync(CACHE_DIR);
|
|
116
|
+
for (const file of files) {
|
|
117
|
+
unlinkSync(join(CACHE_DIR, file));
|
|
118
|
+
}
|
|
119
|
+
this.logger.info(`[TTS] Cache cleared (${files.length} files)`);
|
|
120
|
+
} catch {
|
|
121
|
+
// Cache dir may not exist yet
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
package/src/skills/catalog.js
CHANGED
|
File without changes
|
package/src/skills/custom.js
CHANGED
|
File without changes
|