@tractorscorch/clank 1.2.1 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/dist/index.js +387 -18
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -6,6 +6,31 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/).
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
+
## [1.3.1] — 2026-03-23
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
- **STT not working** — local whisper.cpp was selected by default but not installed. Added Groq as the recommended free STT provider (whisper-large-v3-turbo).
|
|
13
|
+
- **STT provider priority:** Groq (free, fast) → OpenAI Whisper → local whisper.cpp
|
|
14
|
+
- **Setup wizard:** STT now offers Groq as option 1 (recommended), OpenAI as option 2, local as option 3
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## [1.3.0] — 2026-03-23
|
|
19
|
+
|
|
20
|
+
### Added
|
|
21
|
+
- **ElevenLabs integration** — text-to-speech via ElevenLabs API, configurable during onboarding
|
|
22
|
+
- **Whisper integration** — speech-to-text via OpenAI Whisper API or local whisper.cpp
|
|
23
|
+
- **Voice tools (3):** `text_to_speech`, `speech_to_text`, `list_voices` — agent can generate and transcribe audio
|
|
24
|
+
- **Telegram voice messages** — send a voice message → auto-transcribed via Whisper → routed to agent → response as voice (ElevenLabs) or text
|
|
25
|
+
- **Integrations config section** — unified config for third-party API services (ElevenLabs, Whisper, image gen, extensible)
|
|
26
|
+
- **Setup wizard integrations step** — configure ElevenLabs, Whisper, and other API services during onboarding
|
|
27
|
+
|
|
28
|
+
### Changed
|
|
29
|
+
- Tool count: 21 total (10 core + 11 self-config/voice)
|
|
30
|
+
- Setup wizard now asks about integrations for all users (not just advanced mode)
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
9
34
|
## [1.2.1] — 2026-03-23
|
|
10
35
|
|
|
11
36
|
### Fixed
|
package/dist/index.js
CHANGED
|
@@ -1901,7 +1901,8 @@ function defaultConfig() {
|
|
|
1901
1901
|
},
|
|
1902
1902
|
safety: {
|
|
1903
1903
|
confirmExternal: true
|
|
1904
|
-
}
|
|
1904
|
+
},
|
|
1905
|
+
integrations: {}
|
|
1905
1906
|
};
|
|
1906
1907
|
}
|
|
1907
1908
|
function substituteEnvVars(obj) {
|
|
@@ -3919,6 +3920,268 @@ var init_message_tool = __esm({
|
|
|
3919
3920
|
}
|
|
3920
3921
|
});
|
|
3921
3922
|
|
|
3923
|
+
// src/voice/tts.ts
|
|
3924
|
+
var TTSEngine, STTEngine;
|
|
3925
|
+
var init_tts = __esm({
|
|
3926
|
+
"src/voice/tts.ts"() {
|
|
3927
|
+
"use strict";
|
|
3928
|
+
init_esm_shims();
|
|
3929
|
+
TTSEngine = class {
|
|
3930
|
+
config;
|
|
3931
|
+
constructor(config) {
|
|
3932
|
+
this.config = config;
|
|
3933
|
+
}
|
|
3934
|
+
isAvailable() {
|
|
3935
|
+
return !!(this.config.integrations.elevenlabs?.enabled && this.config.integrations.elevenlabs?.apiKey);
|
|
3936
|
+
}
|
|
3937
|
+
async synthesize(text, opts) {
|
|
3938
|
+
const elevenlabs = this.config.integrations.elevenlabs;
|
|
3939
|
+
if (!elevenlabs?.enabled || !elevenlabs.apiKey) return null;
|
|
3940
|
+
const voiceId = opts?.voiceId || elevenlabs.voiceId || "JBFqnCBsd6RMkjVDRZzb";
|
|
3941
|
+
const model = elevenlabs.model || "eleven_multilingual_v2";
|
|
3942
|
+
try {
|
|
3943
|
+
const res = await fetch(
|
|
3944
|
+
`https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`,
|
|
3945
|
+
{
|
|
3946
|
+
method: "POST",
|
|
3947
|
+
headers: {
|
|
3948
|
+
"Content-Type": "application/json",
|
|
3949
|
+
"xi-api-key": elevenlabs.apiKey
|
|
3950
|
+
},
|
|
3951
|
+
body: JSON.stringify({
|
|
3952
|
+
text,
|
|
3953
|
+
model_id: model,
|
|
3954
|
+
voice_settings: { stability: 0.5, similarity_boost: 0.75 }
|
|
3955
|
+
})
|
|
3956
|
+
}
|
|
3957
|
+
);
|
|
3958
|
+
if (!res.ok) {
|
|
3959
|
+
console.error(`ElevenLabs TTS error ${res.status}`);
|
|
3960
|
+
return null;
|
|
3961
|
+
}
|
|
3962
|
+
const arrayBuffer = await res.arrayBuffer();
|
|
3963
|
+
return { audioBuffer: Buffer.from(arrayBuffer), format: "mp3" };
|
|
3964
|
+
} catch (err) {
|
|
3965
|
+
console.error(`TTS error: ${err instanceof Error ? err.message : err}`);
|
|
3966
|
+
return null;
|
|
3967
|
+
}
|
|
3968
|
+
}
|
|
3969
|
+
async listVoices() {
|
|
3970
|
+
const elevenlabs = this.config.integrations.elevenlabs;
|
|
3971
|
+
if (!elevenlabs?.enabled || !elevenlabs.apiKey) return [];
|
|
3972
|
+
try {
|
|
3973
|
+
const res = await fetch("https://api.elevenlabs.io/v1/voices", {
|
|
3974
|
+
headers: { "xi-api-key": elevenlabs.apiKey }
|
|
3975
|
+
});
|
|
3976
|
+
if (!res.ok) return [];
|
|
3977
|
+
const data = await res.json();
|
|
3978
|
+
return (data.voices || []).map((v) => ({ id: v.voice_id, name: v.name }));
|
|
3979
|
+
} catch {
|
|
3980
|
+
return [];
|
|
3981
|
+
}
|
|
3982
|
+
}
|
|
3983
|
+
};
|
|
3984
|
+
STTEngine = class {
|
|
3985
|
+
config;
|
|
3986
|
+
constructor(config) {
|
|
3987
|
+
this.config = config;
|
|
3988
|
+
}
|
|
3989
|
+
isAvailable() {
|
|
3990
|
+
const whisper = this.config.integrations.whisper;
|
|
3991
|
+
if (whisper?.enabled) {
|
|
3992
|
+
if (whisper.provider === "groq" && whisper.apiKey) return true;
|
|
3993
|
+
if (whisper.provider === "openai" && whisper.apiKey) return true;
|
|
3994
|
+
if (whisper.provider === "local") return true;
|
|
3995
|
+
}
|
|
3996
|
+
if (this.config.models.providers.openai?.apiKey) return true;
|
|
3997
|
+
if (this.config.integrations.groq?.apiKey) return true;
|
|
3998
|
+
return false;
|
|
3999
|
+
}
|
|
4000
|
+
async transcribe(audioBuffer, format = "ogg") {
|
|
4001
|
+
const whisper = this.config.integrations.whisper;
|
|
4002
|
+
const groqKey = whisper?.provider === "groq" && whisper?.apiKey ? whisper.apiKey : this.config.integrations.groq?.apiKey;
|
|
4003
|
+
if (groqKey) {
|
|
4004
|
+
const result = await this.transcribeAPI(audioBuffer, format, groqKey, "https://api.groq.com/openai/v1/audio/transcriptions", "whisper-large-v3-turbo");
|
|
4005
|
+
if (result) return result;
|
|
4006
|
+
}
|
|
4007
|
+
const openaiKey = whisper?.provider === "openai" && whisper?.apiKey ? whisper.apiKey : this.config.models.providers.openai?.apiKey;
|
|
4008
|
+
if (openaiKey) {
|
|
4009
|
+
const result = await this.transcribeAPI(audioBuffer, format, openaiKey, "https://api.openai.com/v1/audio/transcriptions", "whisper-1");
|
|
4010
|
+
if (result) return result;
|
|
4011
|
+
}
|
|
4012
|
+
if (whisper?.provider === "local") {
|
|
4013
|
+
return this.transcribeLocal(audioBuffer, format);
|
|
4014
|
+
}
|
|
4015
|
+
return null;
|
|
4016
|
+
}
|
|
4017
|
+
/** Transcribe via OpenAI-compatible API (works for both OpenAI and Groq) */
|
|
4018
|
+
async transcribeAPI(audioBuffer, format, apiKey, endpoint, model) {
|
|
4019
|
+
try {
|
|
4020
|
+
const blob = new Blob([new Uint8Array(audioBuffer)], { type: `audio/${format}` });
|
|
4021
|
+
const formData = new FormData();
|
|
4022
|
+
formData.append("file", blob, `audio.${format}`);
|
|
4023
|
+
formData.append("model", model);
|
|
4024
|
+
const res = await fetch(endpoint, {
|
|
4025
|
+
method: "POST",
|
|
4026
|
+
headers: { "Authorization": `Bearer ${apiKey}` },
|
|
4027
|
+
body: formData
|
|
4028
|
+
});
|
|
4029
|
+
if (!res.ok) {
|
|
4030
|
+
const errText = await res.text().catch(() => "");
|
|
4031
|
+
console.error(`STT API error ${res.status}: ${errText.slice(0, 200)}`);
|
|
4032
|
+
return null;
|
|
4033
|
+
}
|
|
4034
|
+
const data = await res.json();
|
|
4035
|
+
return data.text ? { text: data.text, language: data.language } : null;
|
|
4036
|
+
} catch (err) {
|
|
4037
|
+
console.error(`STT error: ${err instanceof Error ? err.message : err}`);
|
|
4038
|
+
return null;
|
|
4039
|
+
}
|
|
4040
|
+
}
|
|
4041
|
+
/** Transcribe via local whisper.cpp */
|
|
4042
|
+
async transcribeLocal(audioBuffer, format) {
|
|
4043
|
+
try {
|
|
4044
|
+
const { writeFile: writeFile9, unlink: unlink5 } = await import("fs/promises");
|
|
4045
|
+
const { execSync: execSync3 } = await import("child_process");
|
|
4046
|
+
const { join: join19 } = await import("path");
|
|
4047
|
+
const { tmpdir } = await import("os");
|
|
4048
|
+
const tmpFile = join19(tmpdir(), `clank-stt-${Date.now()}.${format}`);
|
|
4049
|
+
await writeFile9(tmpFile, audioBuffer);
|
|
4050
|
+
const output = execSync3(`whisper "${tmpFile}" --model base.en --output-txt`, {
|
|
4051
|
+
encoding: "utf-8",
|
|
4052
|
+
timeout: 6e4
|
|
4053
|
+
});
|
|
4054
|
+
await unlink5(tmpFile).catch(() => {
|
|
4055
|
+
});
|
|
4056
|
+
return output.trim() ? { text: output.trim() } : null;
|
|
4057
|
+
} catch {
|
|
4058
|
+
return null;
|
|
4059
|
+
}
|
|
4060
|
+
}
|
|
4061
|
+
};
|
|
4062
|
+
}
|
|
4063
|
+
});
|
|
4064
|
+
|
|
4065
|
+
// src/voice/index.ts
|
|
4066
|
+
var voice_exports = {};
|
|
4067
|
+
__export(voice_exports, {
|
|
4068
|
+
STTEngine: () => STTEngine,
|
|
4069
|
+
TTSEngine: () => TTSEngine
|
|
4070
|
+
});
|
|
4071
|
+
var init_voice = __esm({
|
|
4072
|
+
"src/voice/index.ts"() {
|
|
4073
|
+
"use strict";
|
|
4074
|
+
init_esm_shims();
|
|
4075
|
+
init_tts();
|
|
4076
|
+
}
|
|
4077
|
+
});
|
|
4078
|
+
|
|
4079
|
+
// src/tools/self-config/voice-tool.ts
|
|
4080
|
+
var ttsTool, sttTool, voiceListTool;
|
|
4081
|
+
var init_voice_tool = __esm({
|
|
4082
|
+
"src/tools/self-config/voice-tool.ts"() {
|
|
4083
|
+
"use strict";
|
|
4084
|
+
init_esm_shims();
|
|
4085
|
+
init_voice();
|
|
4086
|
+
init_config2();
|
|
4087
|
+
ttsTool = {
|
|
4088
|
+
definition: {
|
|
4089
|
+
name: "text_to_speech",
|
|
4090
|
+
description: "Convert text to speech audio using ElevenLabs. Returns the audio file path. Requires ElevenLabs integration to be configured.",
|
|
4091
|
+
parameters: {
|
|
4092
|
+
type: "object",
|
|
4093
|
+
properties: {
|
|
4094
|
+
text: { type: "string", description: "Text to convert to speech" },
|
|
4095
|
+
voice_id: { type: "string", description: "ElevenLabs voice ID (optional, uses default)" }
|
|
4096
|
+
},
|
|
4097
|
+
required: ["text"]
|
|
4098
|
+
}
|
|
4099
|
+
},
|
|
4100
|
+
safetyLevel: "low",
|
|
4101
|
+
readOnly: true,
|
|
4102
|
+
validate(args) {
|
|
4103
|
+
if (!args.text || typeof args.text !== "string") return { ok: false, error: "text is required" };
|
|
4104
|
+
if (args.text.length > 5e3) return { ok: false, error: "text too long (max 5000 chars)" };
|
|
4105
|
+
return { ok: true };
|
|
4106
|
+
},
|
|
4107
|
+
async execute(args) {
|
|
4108
|
+
const config = await loadConfig();
|
|
4109
|
+
const engine = new TTSEngine(config);
|
|
4110
|
+
if (!engine.isAvailable()) {
|
|
4111
|
+
return "Error: ElevenLabs not configured. Tell me to set it up, or run: clank setup --section integrations";
|
|
4112
|
+
}
|
|
4113
|
+
const result = await engine.synthesize(args.text, {
|
|
4114
|
+
voiceId: args.voice_id
|
|
4115
|
+
});
|
|
4116
|
+
if (!result) return "Error: TTS synthesis failed";
|
|
4117
|
+
const { writeFile: writeFile9 } = await import("fs/promises");
|
|
4118
|
+
const { join: join19 } = await import("path");
|
|
4119
|
+
const { tmpdir } = await import("os");
|
|
4120
|
+
const outPath = join19(tmpdir(), `clank-tts-${Date.now()}.${result.format}`);
|
|
4121
|
+
await writeFile9(outPath, result.audioBuffer);
|
|
4122
|
+
return `Audio generated: ${outPath} (${result.format}, ${Math.round(result.audioBuffer.length / 1024)}KB)`;
|
|
4123
|
+
}
|
|
4124
|
+
};
|
|
4125
|
+
sttTool = {
|
|
4126
|
+
definition: {
|
|
4127
|
+
name: "speech_to_text",
|
|
4128
|
+
description: "Transcribe an audio file to text using Whisper (OpenAI API or local whisper.cpp). Provide a file path to an audio file.",
|
|
4129
|
+
parameters: {
|
|
4130
|
+
type: "object",
|
|
4131
|
+
properties: {
|
|
4132
|
+
file_path: { type: "string", description: "Path to audio file (.mp3, .wav, .ogg, .m4a)" }
|
|
4133
|
+
},
|
|
4134
|
+
required: ["file_path"]
|
|
4135
|
+
}
|
|
4136
|
+
},
|
|
4137
|
+
safetyLevel: "low",
|
|
4138
|
+
readOnly: true,
|
|
4139
|
+
validate(args) {
|
|
4140
|
+
if (!args.file_path || typeof args.file_path !== "string") return { ok: false, error: "file_path is required" };
|
|
4141
|
+
return { ok: true };
|
|
4142
|
+
},
|
|
4143
|
+
async execute(args) {
|
|
4144
|
+
const { readFile: readFile12 } = await import("fs/promises");
|
|
4145
|
+
const { existsSync: existsSync10 } = await import("fs");
|
|
4146
|
+
const filePath = args.file_path;
|
|
4147
|
+
if (!existsSync10(filePath)) return `Error: File not found: ${filePath}`;
|
|
4148
|
+
const config = await loadConfig();
|
|
4149
|
+
const engine = new STTEngine(config);
|
|
4150
|
+
if (!engine.isAvailable()) {
|
|
4151
|
+
return "Error: Speech-to-text not configured. Need OpenAI API key or local whisper.cpp installed.";
|
|
4152
|
+
}
|
|
4153
|
+
const audioBuffer = await readFile12(filePath);
|
|
4154
|
+
const ext = filePath.split(".").pop() || "wav";
|
|
4155
|
+
const result = await engine.transcribe(audioBuffer, ext);
|
|
4156
|
+
if (!result) return "Error: Transcription failed";
|
|
4157
|
+
return result.text;
|
|
4158
|
+
}
|
|
4159
|
+
};
|
|
4160
|
+
voiceListTool = {
|
|
4161
|
+
definition: {
|
|
4162
|
+
name: "list_voices",
|
|
4163
|
+
description: "List available ElevenLabs voices for text-to-speech.",
|
|
4164
|
+
parameters: { type: "object", properties: {} }
|
|
4165
|
+
},
|
|
4166
|
+
safetyLevel: "low",
|
|
4167
|
+
readOnly: true,
|
|
4168
|
+
validate() {
|
|
4169
|
+
return { ok: true };
|
|
4170
|
+
},
|
|
4171
|
+
async execute() {
|
|
4172
|
+
const config = await loadConfig();
|
|
4173
|
+
const engine = new TTSEngine(config);
|
|
4174
|
+
if (!engine.isAvailable()) {
|
|
4175
|
+
return "Error: ElevenLabs not configured.";
|
|
4176
|
+
}
|
|
4177
|
+
const voices = await engine.listVoices();
|
|
4178
|
+
if (voices.length === 0) return "No voices found or API error.";
|
|
4179
|
+
return voices.map((v) => `${v.name}: ${v.id}`).join("\n");
|
|
4180
|
+
}
|
|
4181
|
+
};
|
|
4182
|
+
}
|
|
4183
|
+
});
|
|
4184
|
+
|
|
3922
4185
|
// src/tools/self-config/index.ts
|
|
3923
4186
|
function registerSelfConfigTools(registry) {
|
|
3924
4187
|
registry.register(configTool);
|
|
@@ -3929,6 +4192,9 @@ function registerSelfConfigTools(registry) {
|
|
|
3929
4192
|
registry.register(cronTool);
|
|
3930
4193
|
registry.register(gatewayTool);
|
|
3931
4194
|
registry.register(messageTool);
|
|
4195
|
+
registry.register(ttsTool);
|
|
4196
|
+
registry.register(sttTool);
|
|
4197
|
+
registry.register(voiceListTool);
|
|
3932
4198
|
}
|
|
3933
4199
|
var init_self_config = __esm({
|
|
3934
4200
|
"src/tools/self-config/index.ts"() {
|
|
@@ -3942,6 +4208,7 @@ var init_self_config = __esm({
|
|
|
3942
4208
|
init_cron_tool();
|
|
3943
4209
|
init_gateway_tool();
|
|
3944
4210
|
init_message_tool();
|
|
4211
|
+
init_voice_tool();
|
|
3945
4212
|
init_config_tool();
|
|
3946
4213
|
init_channel_tool();
|
|
3947
4214
|
init_agent_tool();
|
|
@@ -3950,6 +4217,7 @@ var init_self_config = __esm({
|
|
|
3950
4217
|
init_cron_tool();
|
|
3951
4218
|
init_gateway_tool();
|
|
3952
4219
|
init_message_tool();
|
|
4220
|
+
init_voice_tool();
|
|
3953
4221
|
}
|
|
3954
4222
|
});
|
|
3955
4223
|
|
|
@@ -4794,6 +5062,76 @@ var init_telegram = __esm({
|
|
|
4794
5062
|
});
|
|
4795
5063
|
chatLocks.set(chatId, next);
|
|
4796
5064
|
});
|
|
5065
|
+
bot.on("message:voice", async (ctx) => {
|
|
5066
|
+
const msg = ctx.message;
|
|
5067
|
+
const chatId = msg.chat.id;
|
|
5068
|
+
const userId = msg.from?.id;
|
|
5069
|
+
if (telegramConfig.allowFrom && telegramConfig.allowFrom.length > 0) {
|
|
5070
|
+
const username = msg.from?.username ? `@${msg.from.username}` : "";
|
|
5071
|
+
const userIdStr = String(userId || "");
|
|
5072
|
+
const allowed = telegramConfig.allowFrom.map(String);
|
|
5073
|
+
const isAllowed = allowed.some(
|
|
5074
|
+
(a) => a === userIdStr || a.toLowerCase() === username.toLowerCase() || a.toLowerCase() === (msg.from?.username || "").toLowerCase()
|
|
5075
|
+
);
|
|
5076
|
+
if (!isAllowed) return;
|
|
5077
|
+
}
|
|
5078
|
+
if (msg.date < startupTime - 30) return;
|
|
5079
|
+
const processVoice = async () => {
|
|
5080
|
+
if (!this.gateway || !this.config) return;
|
|
5081
|
+
try {
|
|
5082
|
+
await ctx.api.sendChatAction(chatId, "typing");
|
|
5083
|
+
const file = await ctx.api.getFile(msg.voice.file_id);
|
|
5084
|
+
const fileUrl = `https://api.telegram.org/file/bot${telegramConfig.botToken}/${file.file_path}`;
|
|
5085
|
+
const res = await fetch(fileUrl);
|
|
5086
|
+
if (!res.ok) {
|
|
5087
|
+
await ctx.api.sendMessage(chatId, "Error: could not download voice message");
|
|
5088
|
+
return;
|
|
5089
|
+
}
|
|
5090
|
+
const audioBuffer = Buffer.from(await res.arrayBuffer());
|
|
5091
|
+
const { STTEngine: STTEngine2 } = await Promise.resolve().then(() => (init_voice(), voice_exports));
|
|
5092
|
+
const { loadConfig: loadConfig3 } = await Promise.resolve().then(() => (init_config2(), config_exports));
|
|
5093
|
+
const config = await loadConfig3();
|
|
5094
|
+
const stt = new STTEngine2(config);
|
|
5095
|
+
if (!stt.isAvailable()) {
|
|
5096
|
+
await ctx.api.sendMessage(chatId, "Voice messages require speech-to-text. Set up Whisper: /help");
|
|
5097
|
+
return;
|
|
5098
|
+
}
|
|
5099
|
+
const transcription = await stt.transcribe(audioBuffer, "ogg");
|
|
5100
|
+
if (!transcription?.text) {
|
|
5101
|
+
await ctx.api.sendMessage(chatId, "Could not transcribe voice message.");
|
|
5102
|
+
return;
|
|
5103
|
+
}
|
|
5104
|
+
const isGroup = msg.chat.type === "group" || msg.chat.type === "supergroup";
|
|
5105
|
+
const response = await this.gateway.handleInboundMessage(
|
|
5106
|
+
{ channel: "telegram", peerId: chatId, peerKind: isGroup ? "group" : "dm" },
|
|
5107
|
+
`[Voice message transcription]: ${transcription.text}`
|
|
5108
|
+
);
|
|
5109
|
+
if (response) {
|
|
5110
|
+
const { TTSEngine: TTSEngine2 } = await Promise.resolve().then(() => (init_voice(), voice_exports));
|
|
5111
|
+
const tts = new TTSEngine2(config);
|
|
5112
|
+
if (tts.isAvailable() && response.length < 2e3) {
|
|
5113
|
+
const audio = await tts.synthesize(response);
|
|
5114
|
+
if (audio) {
|
|
5115
|
+
const { InputFile } = await import("grammy");
|
|
5116
|
+
await ctx.api.sendVoice(chatId, new InputFile(audio.audioBuffer, "reply.mp3"));
|
|
5117
|
+
return;
|
|
5118
|
+
}
|
|
5119
|
+
}
|
|
5120
|
+
const chunks = splitMessage(response, 4e3);
|
|
5121
|
+
for (const chunk of chunks) {
|
|
5122
|
+
await ctx.api.sendMessage(chatId, chunk);
|
|
5123
|
+
}
|
|
5124
|
+
}
|
|
5125
|
+
} catch (err) {
|
|
5126
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
5127
|
+
await ctx.api.sendMessage(chatId, `Error: ${errMsg.slice(0, 200)}`);
|
|
5128
|
+
}
|
|
5129
|
+
};
|
|
5130
|
+
const prev = chatLocks.get(chatId) || Promise.resolve();
|
|
5131
|
+
const next = prev.then(processVoice).catch(() => {
|
|
5132
|
+
});
|
|
5133
|
+
chatLocks.set(chatId, next);
|
|
5134
|
+
});
|
|
4797
5135
|
bot.start({
|
|
4798
5136
|
onStart: () => {
|
|
4799
5137
|
this.running = true;
|
|
@@ -5339,7 +5677,7 @@ var init_server = __esm({
|
|
|
5339
5677
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
5340
5678
|
res.end(JSON.stringify({
|
|
5341
5679
|
status: "ok",
|
|
5342
|
-
version: "1.
|
|
5680
|
+
version: "1.3.1",
|
|
5343
5681
|
uptime: process.uptime(),
|
|
5344
5682
|
clients: this.clients.size,
|
|
5345
5683
|
agents: this.engines.size
|
|
@@ -5447,7 +5785,7 @@ var init_server = __esm({
|
|
|
5447
5785
|
const hello = {
|
|
5448
5786
|
type: "hello",
|
|
5449
5787
|
protocol: PROTOCOL_VERSION,
|
|
5450
|
-
version: "1.
|
|
5788
|
+
version: "1.3.1",
|
|
5451
5789
|
agents: this.config.agents.list.map((a) => ({
|
|
5452
5790
|
id: a.id,
|
|
5453
5791
|
name: a.name || a.id,
|
|
@@ -6290,21 +6628,52 @@ async function runSetup(opts) {
|
|
|
6290
6628
|
console.log(green4(" Brave Search configured"));
|
|
6291
6629
|
}
|
|
6292
6630
|
}
|
|
6293
|
-
|
|
6294
|
-
|
|
6295
|
-
|
|
6296
|
-
|
|
6297
|
-
|
|
6298
|
-
|
|
6299
|
-
|
|
6300
|
-
|
|
6301
|
-
|
|
6631
|
+
console.log("");
|
|
6632
|
+
console.log(" API Integrations:");
|
|
6633
|
+
console.log(dim4(" Add third-party services for voice, image gen, etc."));
|
|
6634
|
+
console.log(dim4(" You can also configure these later through conversation."));
|
|
6635
|
+
console.log("");
|
|
6636
|
+
const addElevenLabs = await ask(rl, cyan2(" Set up ElevenLabs (text-to-speech)? [y/N] "));
|
|
6637
|
+
if (addElevenLabs.toLowerCase() === "y") {
|
|
6638
|
+
console.log(dim4(" Get an API key at: https://elevenlabs.io/"));
|
|
6639
|
+
const key = await ask(rl, cyan2(" ElevenLabs API key: "));
|
|
6640
|
+
if (key.trim()) {
|
|
6641
|
+
config.integrations.elevenlabs = { enabled: true, apiKey: key.trim() };
|
|
6642
|
+
const voiceId = await ask(rl, cyan2(" Default voice ID (Enter to skip): "));
|
|
6643
|
+
if (voiceId.trim()) {
|
|
6644
|
+
config.integrations.elevenlabs.voiceId = voiceId.trim();
|
|
6645
|
+
}
|
|
6646
|
+
console.log(green4(" ElevenLabs configured (TTS available)"));
|
|
6647
|
+
}
|
|
6648
|
+
}
|
|
6649
|
+
const addWhisper = await ask(rl, cyan2(" Set up speech-to-text (voice messages)? [y/N] "));
|
|
6650
|
+
if (addWhisper.toLowerCase() === "y") {
|
|
6651
|
+
console.log(dim4(" 1. Groq (recommended \u2014 free, fast)"));
|
|
6652
|
+
console.log(dim4(" 2. OpenAI Whisper API (paid, uses OpenAI key)"));
|
|
6653
|
+
console.log(dim4(" 3. Local whisper.cpp (requires manual install)"));
|
|
6654
|
+
const whisperChoice = await ask(rl, cyan2(" Choice [1]: "));
|
|
6655
|
+
if (whisperChoice === "3") {
|
|
6656
|
+
config.integrations.whisper = { enabled: true, provider: "local" };
|
|
6657
|
+
console.log(green4(" Local whisper.cpp configured"));
|
|
6658
|
+
console.log(dim4(" Make sure whisper is installed and in PATH"));
|
|
6659
|
+
} else if (whisperChoice === "2") {
|
|
6660
|
+
const existingKey = config.models.providers.openai?.apiKey;
|
|
6661
|
+
if (existingKey) {
|
|
6662
|
+
config.integrations.whisper = { enabled: true, provider: "openai", apiKey: existingKey };
|
|
6663
|
+
console.log(green4(" Whisper configured (using existing OpenAI key)"));
|
|
6664
|
+
} else {
|
|
6665
|
+
const key = await ask(rl, cyan2(" OpenAI API key: "));
|
|
6302
6666
|
if (key.trim()) {
|
|
6303
|
-
|
|
6667
|
+
config.integrations.whisper = { enabled: true, provider: "openai", apiKey: key.trim() };
|
|
6668
|
+
console.log(green4(" Whisper configured"));
|
|
6304
6669
|
}
|
|
6305
|
-
}
|
|
6306
|
-
|
|
6307
|
-
|
|
6670
|
+
}
|
|
6671
|
+
} else {
|
|
6672
|
+
console.log(dim4(" Get a free API key at: https://console.groq.com/keys"));
|
|
6673
|
+
const key = await ask(rl, cyan2(" Groq API key: "));
|
|
6674
|
+
if (key.trim()) {
|
|
6675
|
+
config.integrations.whisper = { enabled: true, provider: "groq", apiKey: key.trim() };
|
|
6676
|
+
console.log(green4(" Groq Whisper configured (free, fast)"));
|
|
6308
6677
|
}
|
|
6309
6678
|
}
|
|
6310
6679
|
}
|
|
@@ -6781,7 +7150,7 @@ async function runTui(opts) {
|
|
|
6781
7150
|
ws.on("open", () => {
|
|
6782
7151
|
ws.send(JSON.stringify({
|
|
6783
7152
|
type: "connect",
|
|
6784
|
-
params: { auth: { token }, mode: "tui", version: "1.
|
|
7153
|
+
params: { auth: { token }, mode: "tui", version: "1.3.1" }
|
|
6785
7154
|
}));
|
|
6786
7155
|
});
|
|
6787
7156
|
ws.on("message", (data) => {
|
|
@@ -7210,7 +7579,7 @@ import { fileURLToPath as fileURLToPath5 } from "url";
|
|
|
7210
7579
|
import { dirname as dirname5, join as join18 } from "path";
|
|
7211
7580
|
var __filename3 = fileURLToPath5(import.meta.url);
|
|
7212
7581
|
var __dirname3 = dirname5(__filename3);
|
|
7213
|
-
var version = "1.
|
|
7582
|
+
var version = "1.3.1";
|
|
7214
7583
|
try {
|
|
7215
7584
|
const pkg = JSON.parse(readFileSync(join18(__dirname3, "..", "package.json"), "utf-8"));
|
|
7216
7585
|
version = pkg.version;
|