alvin-bot 4.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +43 -0
- package/BACKLOG.md +223 -0
- package/CHANGELOG.md +63 -0
- package/CLAUDE.example.md +152 -0
- package/CODE_OF_CONDUCT.md +52 -0
- package/CONTRIBUTING.md +72 -0
- package/LICENSE +21 -0
- package/README.md +529 -0
- package/SECURITY.md +38 -0
- package/SOUL.example.md +60 -0
- package/TOOLS.example.md +42 -0
- package/alvin-bot.config.example.json +24 -0
- package/bin/cli.js +1088 -0
- package/dist/.metadata_never_index +0 -0
- package/dist/claude.js +102 -0
- package/dist/config.js +65 -0
- package/dist/engine.js +90 -0
- package/dist/find-claude-binary.js +98 -0
- package/dist/handlers/commands.js +1489 -0
- package/dist/handlers/document.js +187 -0
- package/dist/handlers/message.js +200 -0
- package/dist/handlers/photo.js +154 -0
- package/dist/handlers/platform-message.js +275 -0
- package/dist/handlers/video.js +237 -0
- package/dist/handlers/voice.js +148 -0
- package/dist/i18n.js +299 -0
- package/dist/index.js +442 -0
- package/dist/init-data-dir.js +81 -0
- package/dist/middleware/auth.js +215 -0
- package/dist/migrate.js +139 -0
- package/dist/paths.js +87 -0
- package/dist/platforms/discord.js +161 -0
- package/dist/platforms/index.js +130 -0
- package/dist/platforms/signal.js +205 -0
- package/dist/platforms/slack.js +318 -0
- package/dist/platforms/telegram.js +111 -0
- package/dist/platforms/types.js +8 -0
- package/dist/platforms/whatsapp.js +648 -0
- package/dist/providers/claude-sdk-provider.js +173 -0
- package/dist/providers/codex-cli-provider.js +121 -0
- package/dist/providers/index.js +7 -0
- package/dist/providers/openai-compatible.js +388 -0
- package/dist/providers/registry.js +209 -0
- package/dist/providers/tool-executor.js +450 -0
- package/dist/providers/types.js +205 -0
- package/dist/services/access.js +144 -0
- package/dist/services/asset-index.js +230 -0
- package/dist/services/browser-manager.js +161 -0
- package/dist/services/browser.js +121 -0
- package/dist/services/compaction.js +129 -0
- package/dist/services/cron.js +462 -0
- package/dist/services/custom-tools.js +317 -0
- package/dist/services/delivery-queue.js +154 -0
- package/dist/services/elevenlabs.js +58 -0
- package/dist/services/embeddings.js +386 -0
- package/dist/services/exec-guard.js +46 -0
- package/dist/services/fallback-order.js +151 -0
- package/dist/services/heartbeat.js +192 -0
- package/dist/services/hooks.js +44 -0
- package/dist/services/imagegen.js +72 -0
- package/dist/services/language-detect.js +144 -0
- package/dist/services/markdown.js +63 -0
- package/dist/services/mcp.js +252 -0
- package/dist/services/memory.js +133 -0
- package/dist/services/personality.js +227 -0
- package/dist/services/plugins.js +171 -0
- package/dist/services/reminders.js +97 -0
- package/dist/services/restart.js +48 -0
- package/dist/services/security-audit.js +66 -0
- package/dist/services/self-search.js +129 -0
- package/dist/services/session.js +93 -0
- package/dist/services/skills.js +287 -0
- package/dist/services/standing-orders.js +29 -0
- package/dist/services/subagents.js +142 -0
- package/dist/services/sudo.js +243 -0
- package/dist/services/telegram.js +113 -0
- package/dist/services/tool-discovery.js +214 -0
- package/dist/services/usage-tracker.js +137 -0
- package/dist/services/users.js +199 -0
- package/dist/services/voice.js +95 -0
- package/dist/tui/index.js +507 -0
- package/dist/web/canvas.js +30 -0
- package/dist/web/doctor-api.js +606 -0
- package/dist/web/openai-compat.js +252 -0
- package/dist/web/server.js +1351 -0
- package/dist/web/setup-api.js +1078 -0
- package/docs/mcp.example.json +16 -0
- package/docs/screenshots/00-Login.png +0 -0
- package/docs/screenshots/01-Chat-Dark-Conversation.png +0 -0
- package/docs/screenshots/02-Chat.png +0 -0
- package/docs/screenshots/03-Dashboard-Overview.png +0 -0
- package/docs/screenshots/04-AI-Models-and-Providers.png +0 -0
- package/docs/screenshots/05-Personality-Editor.png +0 -0
- package/docs/screenshots/06-Memory-Manager.png +0 -0
- package/docs/screenshots/07-Active-Sessions.png +0 -0
- package/docs/screenshots/08-File-Browser.png +0 -0
- package/docs/screenshots/09-Scheduled-Jobs.png +0 -0
- package/docs/screenshots/10-Custom-Tools.png +0 -0
- package/docs/screenshots/11-Plugins-and-MCP.png +0 -0
- package/docs/screenshots/12-Messaging-Platforms.png +0 -0
- package/docs/screenshots/12.1-Messaging-Platforms-WhatsApp-Groups-List.png +0 -0
- package/docs/screenshots/12.2-Messaging-Platforms-WA-Group-Details.png +0 -0
- package/docs/screenshots/13-User-Management.png +0 -0
- package/docs/screenshots/14-Web-Terminal.png +0 -0
- package/docs/screenshots/15-Maintenance-and-Health.png +0 -0
- package/docs/screenshots/16-Settings-and-Env.png +0 -0
- package/docs/screenshots/TG-commands.png +0 -0
- package/docs/screenshots/TG.png +0 -0
- package/docs/screenshots/_Mac-Installer.png +0 -0
- package/docs/tools.example.json +33 -0
- package/install.sh +165 -0
- package/package.json +190 -0
- package/plugins/calendar/index.js +270 -0
- package/plugins/email/index.js +231 -0
- package/plugins/finance/index.js +254 -0
- package/plugins/notes/index.js +227 -0
- package/plugins/smarthome/index.js +230 -0
- package/plugins/weather/index.js +122 -0
- package/skills/apple-notes/SKILL.md +31 -0
- package/skills/browse/SKILL.md +136 -0
- package/skills/code-project/SKILL.md +43 -0
- package/skills/data-analysis/SKILL.md +39 -0
- package/skills/document-creation/SKILL.md +48 -0
- package/skills/email-summary/SKILL.md +46 -0
- package/skills/github/SKILL.md +42 -0
- package/skills/summarize/SKILL.md +28 -0
- package/skills/system-admin/SKILL.md +39 -0
- package/skills/weather/SKILL.md +34 -0
- package/skills/web-research/SKILL.md +35 -0
- package/web/public/canvas.html +52 -0
- package/web/public/css/style.css +555 -0
- package/web/public/index.html +189 -0
- package/web/public/js/app.js +3102 -0
- package/web/public/js/i18n.js +1048 -0
- package/web/public/js/icons.js +104 -0
- package/web/public/login.html +48 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Generic Platform Message Handler
|
|
3
|
+
*
|
|
4
|
+
* Processes messages from any platform adapter (WhatsApp, Discord, Signal)
|
|
5
|
+
* through the AI engine and sends the response back.
|
|
6
|
+
*
|
|
7
|
+
* This is the platform-agnostic equivalent of message.ts (which is Telegram-specific).
|
|
8
|
+
*/
|
|
9
|
+
import fs from "fs";
|
|
10
|
+
import { getSession, addToHistory, trackProviderUsage } from "../services/session.js";
|
|
11
|
+
import { getRegistry } from "../engine.js";
|
|
12
|
+
import { buildSystemPrompt, buildSmartSystemPrompt } from "../services/personality.js";
|
|
13
|
+
import { buildSkillContext } from "../services/skills.js";
|
|
14
|
+
import { touchProfile } from "../services/users.js";
|
|
15
|
+
import { trackAndAdapt } from "../services/language-detect.js";
|
|
16
|
+
import { transcribeAudio } from "../services/voice.js";
|
|
17
|
+
import { config } from "../config.js";
|
|
18
|
+
/** Platform-specific message length limits */
|
|
19
|
+
const PLATFORM_LIMITS = {
|
|
20
|
+
discord: 2000,
|
|
21
|
+
telegram: 4096,
|
|
22
|
+
whatsapp: 4096,
|
|
23
|
+
signal: 6000,
|
|
24
|
+
web: 100_000,
|
|
25
|
+
};
|
|
26
|
+
/**
|
|
27
|
+
* Handle an incoming message from any platform adapter.
|
|
28
|
+
* Runs the AI query and sends the response back via the adapter's sendText.
|
|
29
|
+
*/
|
|
30
|
+
export async function handlePlatformMessage(msg, adapter) {
|
|
31
|
+
let text = msg.text?.trim();
|
|
32
|
+
// ── Voice message: transcribe first ──────────────────────────────────
|
|
33
|
+
if (msg.media?.type === "voice" && msg.media.path) {
|
|
34
|
+
if (!config.apiKeys.groq) {
|
|
35
|
+
await adapter.sendText(msg.chatId, "⚠️ Voice nicht konfiguriert (GROQ_API_KEY fehlt).");
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
try {
|
|
39
|
+
const transcript = await transcribeAudio(msg.media.path);
|
|
40
|
+
fs.unlink(msg.media.path, () => { });
|
|
41
|
+
if (!transcript.trim()) {
|
|
42
|
+
await adapter.sendText(msg.chatId, "Could not understand the voice message. 🤷");
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
await adapter.sendText(msg.chatId, `🎙️ _"${transcript}"_`);
|
|
46
|
+
text = transcript;
|
|
47
|
+
}
|
|
48
|
+
catch (err) {
|
|
49
|
+
const errMsg = err instanceof Error ? err.message : String(err);
|
|
50
|
+
console.error("Voice transcription error:", errMsg);
|
|
51
|
+
await adapter.sendText(msg.chatId, `⚠️ Voice message error: ${errMsg}`);
|
|
52
|
+
if (msg.media.path)
|
|
53
|
+
fs.unlink(msg.media.path, () => { });
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
// ── Photo with caption: describe as context ──────────────────────────
|
|
58
|
+
if (msg.media?.type === "photo" && msg.media.path) {
|
|
59
|
+
const caption = text || "Beschreibe dieses Bild.";
|
|
60
|
+
text = `[Image attached: ${msg.media.path}]\n\n${caption}`;
|
|
61
|
+
}
|
|
62
|
+
// ── Document: provide path + filename + instructions ──────────────────
|
|
63
|
+
if (msg.media?.type === "document" && msg.media.path) {
|
|
64
|
+
const fname = msg.media.fileName || "Dokument";
|
|
65
|
+
const fpath = msg.media.path;
|
|
66
|
+
const ext = fname.split(".").pop()?.toLowerCase() || "";
|
|
67
|
+
const caption = text || `Analysiere dieses Dokument: ${fname}`;
|
|
68
|
+
// Give the AI concrete instructions based on file type
|
|
69
|
+
const isArchive = ["zip", "tar", "gz", "tgz", "7z", "rar"].includes(ext);
|
|
70
|
+
const isPdf = ext === "pdf";
|
|
71
|
+
const isOffice = ["xlsx", "xls", "docx", "doc", "pptx", "csv"].includes(ext);
|
|
72
|
+
let fileHint = `[Datei empfangen: ${fpath}]\nDateiname: ${fname}\nTyp: ${msg.media.mimeType || "unbekannt"}`;
|
|
73
|
+
if (isArchive) {
|
|
74
|
+
fileHint += `\n\nDiese Datei ist ein Archiv. Entpacke sie mit: unzip "${fpath}" -d "${fpath.replace(/\.[^.]+$/, "")}" oder tar xf "${fpath}" und arbeite dann mit dem Inhalt.`;
|
|
75
|
+
}
|
|
76
|
+
else if (isPdf) {
|
|
77
|
+
fileHint += `\n\nLies den Inhalt mit: pdftotext "${fpath}" - oder python3 mit PyPDF2/pdfplumber.`;
|
|
78
|
+
}
|
|
79
|
+
else if (isOffice) {
|
|
80
|
+
fileHint += `\n\nOpen with python3 (openpyxl for xlsx, python-docx for docx, csv module for csv).`;
|
|
81
|
+
}
|
|
82
|
+
text = `${fileHint}\n\n${caption}`;
|
|
83
|
+
}
|
|
84
|
+
if (!text)
|
|
85
|
+
return;
|
|
86
|
+
// ── Basic command handling for non-Telegram platforms ──────────────
|
|
87
|
+
const cmdHandled = await handlePlatformCommand(text, msg, adapter);
|
|
88
|
+
if (cmdHandled)
|
|
89
|
+
return;
|
|
90
|
+
const userId = hashUserId(msg.userId);
|
|
91
|
+
const session = getSession(userId);
|
|
92
|
+
touchProfile(userId, msg.userName, msg.userHandle, msg.platform, text);
|
|
93
|
+
// Skip if already processing (queue up to 3)
|
|
94
|
+
if (session.isProcessing) {
|
|
95
|
+
if (session.messageQueue.length < 3) {
|
|
96
|
+
session.messageQueue.push(text);
|
|
97
|
+
}
|
|
98
|
+
return;
|
|
99
|
+
}
|
|
100
|
+
// Consume queued messages
|
|
101
|
+
let fullText = text;
|
|
102
|
+
if (session.messageQueue.length > 0) {
|
|
103
|
+
const queued = session.messageQueue.splice(0);
|
|
104
|
+
fullText = [...queued, text].join("\n\n");
|
|
105
|
+
}
|
|
106
|
+
// Add reply context
|
|
107
|
+
if (msg.replyToText) {
|
|
108
|
+
const quoted = msg.replyToText.length > 500
|
|
109
|
+
? msg.replyToText.slice(0, 500) + "..."
|
|
110
|
+
: msg.replyToText;
|
|
111
|
+
fullText = `[Bezug auf: "${quoted}"]\n\n${fullText}`;
|
|
112
|
+
}
|
|
113
|
+
session.isProcessing = true;
|
|
114
|
+
let finalText = "";
|
|
115
|
+
// Show typing indicator
|
|
116
|
+
if (adapter.setTyping) {
|
|
117
|
+
adapter.setTyping(msg.chatId).catch(() => { });
|
|
118
|
+
}
|
|
119
|
+
// Keep typing indicator alive during long requests (refresh every 4s)
|
|
120
|
+
const typingInterval = adapter.setTyping
|
|
121
|
+
? setInterval(() => adapter.setTyping(msg.chatId).catch(() => { }), 4000)
|
|
122
|
+
: null;
|
|
123
|
+
try {
|
|
124
|
+
session.messageCount++;
|
|
125
|
+
const adaptedLang = trackAndAdapt(Number(msg.userId) || 0, fullText, session.language);
|
|
126
|
+
if (adaptedLang !== session.language)
|
|
127
|
+
session.language = adaptedLang;
|
|
128
|
+
const registry = getRegistry();
|
|
129
|
+
const activeProvider = registry.getActive();
|
|
130
|
+
const isSDK = activeProvider.config.type === "claude-sdk";
|
|
131
|
+
const skillContext = buildSkillContext(fullText);
|
|
132
|
+
const systemPrompt = (isSDK
|
|
133
|
+
? buildSystemPrompt(isSDK, session.language, msg.chatId)
|
|
134
|
+
: await buildSmartSystemPrompt(isSDK, session.language, fullText, msg.chatId)) + skillContext;
|
|
135
|
+
const queryOpts = {
|
|
136
|
+
prompt: fullText,
|
|
137
|
+
systemPrompt,
|
|
138
|
+
workingDir: session.workingDir,
|
|
139
|
+
effort: session.effort,
|
|
140
|
+
sessionId: isSDK ? session.sessionId : null,
|
|
141
|
+
history: !isSDK ? session.history : undefined,
|
|
142
|
+
};
|
|
143
|
+
if (!isSDK) {
|
|
144
|
+
addToHistory(userId, { role: "user", content: fullText });
|
|
145
|
+
}
|
|
146
|
+
for await (const chunk of registry.queryWithFallback(queryOpts)) {
|
|
147
|
+
switch (chunk.type) {
|
|
148
|
+
case "text":
|
|
149
|
+
finalText = chunk.text || "";
|
|
150
|
+
break;
|
|
151
|
+
case "done":
|
|
152
|
+
if (chunk.sessionId)
|
|
153
|
+
session.sessionId = chunk.sessionId;
|
|
154
|
+
if (chunk.costUsd)
|
|
155
|
+
session.totalCost += chunk.costUsd;
|
|
156
|
+
trackProviderUsage(userId, registry.getActiveKey(), chunk.costUsd || 0, chunk.inputTokens, chunk.outputTokens);
|
|
157
|
+
session.lastActivity = Date.now();
|
|
158
|
+
break;
|
|
159
|
+
case "error":
|
|
160
|
+
await adapter.sendText(msg.chatId, `⚠️ Error: ${chunk.error}`);
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
// Send response
|
|
165
|
+
if (finalText.trim()) {
|
|
166
|
+
const maxLen = PLATFORM_LIMITS[msg.platform] || 4096;
|
|
167
|
+
if (finalText.length > maxLen) {
|
|
168
|
+
const chunks = splitMessage(finalText, maxLen);
|
|
169
|
+
for (const chunk of chunks) {
|
|
170
|
+
await adapter.sendText(msg.chatId, chunk);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
else {
|
|
174
|
+
await adapter.sendText(msg.chatId, finalText);
|
|
175
|
+
}
|
|
176
|
+
if (!isSDK && finalText) {
|
|
177
|
+
addToHistory(userId, { role: "assistant", content: finalText });
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
catch (err) {
|
|
182
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
183
|
+
console.error(`Platform message error (${msg.platform}):`, errorMsg);
|
|
184
|
+
await adapter.sendText(msg.chatId, `⚠️ Error: ${errorMsg}`);
|
|
185
|
+
}
|
|
186
|
+
finally {
|
|
187
|
+
if (typingInterval)
|
|
188
|
+
clearInterval(typingInterval);
|
|
189
|
+
session.isProcessing = false;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
/**
|
|
193
|
+
* Handle basic slash commands on non-Telegram platforms.
|
|
194
|
+
* Returns true if the message was a command and was handled.
|
|
195
|
+
*/
|
|
196
|
+
async function handlePlatformCommand(text, msg, adapter) {
|
|
197
|
+
if (!text.startsWith("/"))
|
|
198
|
+
return false;
|
|
199
|
+
const parts = text.split(/\s+/);
|
|
200
|
+
const cmd = parts[0].toLowerCase();
|
|
201
|
+
const userId = hashUserId(msg.userId);
|
|
202
|
+
const session = getSession(userId);
|
|
203
|
+
switch (cmd) {
|
|
204
|
+
case "/new": {
|
|
205
|
+
const { resetSession } = await import("../services/session.js");
|
|
206
|
+
resetSession(userId);
|
|
207
|
+
await adapter.sendText(msg.chatId, "🔄 New chat started.");
|
|
208
|
+
return true;
|
|
209
|
+
}
|
|
210
|
+
case "/status": {
|
|
211
|
+
const { getRegistry } = await import("../engine.js");
|
|
212
|
+
const registry = getRegistry();
|
|
213
|
+
const provider = registry.getActiveKey();
|
|
214
|
+
const msgs = session.messageCount;
|
|
215
|
+
const cost = session.totalCost.toFixed(4);
|
|
216
|
+
await adapter.sendText(msg.chatId, `📊 Status\n` +
|
|
217
|
+
`Provider: ${provider}\n` +
|
|
218
|
+
`Messages: ${msgs}\n` +
|
|
219
|
+
`Cost: $${cost}\n` +
|
|
220
|
+
`Effort: ${session.effort}\n` +
|
|
221
|
+
`Platform: ${msg.platform}`);
|
|
222
|
+
return true;
|
|
223
|
+
}
|
|
224
|
+
case "/effort": {
|
|
225
|
+
const level = parts[1]?.toLowerCase();
|
|
226
|
+
if (["low", "medium", "high", "max"].includes(level)) {
|
|
227
|
+
session.effort = level;
|
|
228
|
+
await adapter.sendText(msg.chatId, `🧠 Effort: ${level}`);
|
|
229
|
+
}
|
|
230
|
+
else {
|
|
231
|
+
await adapter.sendText(msg.chatId, `🧠 Current: ${session.effort}\nOptions: /effort low|medium|high|max`);
|
|
232
|
+
}
|
|
233
|
+
return true;
|
|
234
|
+
}
|
|
235
|
+
case "/help": {
|
|
236
|
+
await adapter.sendText(msg.chatId, "🤖 Alvin Bot — Commands\n\n" +
|
|
237
|
+
"/new — New chat\n" +
|
|
238
|
+
"/status — Session info\n" +
|
|
239
|
+
"/effort <low|medium|high|max> — Thinking depth\n" +
|
|
240
|
+
"/help — This help\n\n" +
|
|
241
|
+
"For all features use the Web Dashboard or Telegram.");
|
|
242
|
+
return true;
|
|
243
|
+
}
|
|
244
|
+
default:
|
|
245
|
+
// Unknown command → treat as normal message
|
|
246
|
+
return false;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
/** Hash a string userId to a numeric ID for session compatibility */
|
|
250
|
+
function hashUserId(id) {
|
|
251
|
+
let hash = 0;
|
|
252
|
+
for (let i = 0; i < id.length; i++) {
|
|
253
|
+
const char = id.charCodeAt(i);
|
|
254
|
+
hash = ((hash << 5) - hash) + char;
|
|
255
|
+
hash = hash & hash; // Convert to 32-bit int
|
|
256
|
+
}
|
|
257
|
+
return Math.abs(hash);
|
|
258
|
+
}
|
|
259
|
+
/** Split a message into chunks at word/newline boundaries */
|
|
260
|
+
function splitMessage(text, maxLen) {
|
|
261
|
+
const chunks = [];
|
|
262
|
+
let remaining = text;
|
|
263
|
+
while (remaining.length > maxLen) {
|
|
264
|
+
let splitAt = remaining.lastIndexOf("\n", maxLen);
|
|
265
|
+
if (splitAt < maxLen * 0.5)
|
|
266
|
+
splitAt = remaining.lastIndexOf(" ", maxLen);
|
|
267
|
+
if (splitAt < maxLen * 0.5)
|
|
268
|
+
splitAt = maxLen;
|
|
269
|
+
chunks.push(remaining.slice(0, splitAt));
|
|
270
|
+
remaining = remaining.slice(splitAt).trimStart();
|
|
271
|
+
}
|
|
272
|
+
if (remaining)
|
|
273
|
+
chunks.push(remaining);
|
|
274
|
+
return chunks;
|
|
275
|
+
}
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Video Message Handler — Process video messages and video notes (round videos).
|
|
3
|
+
*
|
|
4
|
+
* Capabilities:
|
|
5
|
+
* - Receive video messages → extract key frames → describe/analyze
|
|
6
|
+
* - Receive video notes (round videos) → same processing
|
|
7
|
+
* - Extract audio from video → transcribe (if voice content)
|
|
8
|
+
* - Support for video files sent as documents (handled by document handler)
|
|
9
|
+
*/
|
|
10
|
+
import fs from "fs";
|
|
11
|
+
import path from "path";
|
|
12
|
+
import os from "os";
|
|
13
|
+
import https from "https";
|
|
14
|
+
import { execSync } from "child_process";
|
|
15
|
+
import { config } from "../config.js";
|
|
16
|
+
import { getSession, addToHistory } from "../services/session.js";
|
|
17
|
+
import { TelegramStreamer } from "../services/telegram.js";
|
|
18
|
+
import { getRegistry } from "../engine.js";
|
|
19
|
+
import { transcribeAudio } from "../services/voice.js";
|
|
20
|
+
import { buildSystemPrompt } from "../services/personality.js";
|
|
21
|
+
const TEMP_DIR = path.join(os.tmpdir(), "alvin-bot", "video");
|
|
22
|
+
if (!fs.existsSync(TEMP_DIR))
|
|
23
|
+
fs.mkdirSync(TEMP_DIR, { recursive: true });
|
|
24
|
+
/** React helper */
|
|
25
|
+
async function react(ctx, emoji) {
|
|
26
|
+
try {
|
|
27
|
+
await ctx.react(emoji);
|
|
28
|
+
}
|
|
29
|
+
catch { /* ignore */ }
|
|
30
|
+
}
|
|
31
|
+
/** Download a Telegram file */
|
|
32
|
+
async function downloadFile(url, dest) {
|
|
33
|
+
return new Promise((resolve, reject) => {
|
|
34
|
+
const file = fs.createWriteStream(dest);
|
|
35
|
+
https.get(url, (response) => {
|
|
36
|
+
response.pipe(file);
|
|
37
|
+
file.on("finish", () => file.close(() => resolve()));
|
|
38
|
+
}).on("error", (err) => {
|
|
39
|
+
fs.unlink(dest, () => { });
|
|
40
|
+
reject(err);
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
/** Check if ffmpeg is available */
|
|
45
|
+
function hasFFmpeg() {
|
|
46
|
+
try {
|
|
47
|
+
execSync("which ffmpeg", { stdio: "pipe" });
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
catch {
|
|
51
|
+
return false;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
/** Extract key frames from a video (up to 4 frames, evenly spaced) */
|
|
55
|
+
function extractFrames(videoPath, outputDir, maxFrames = 4) {
|
|
56
|
+
// Get video duration
|
|
57
|
+
let duration = 10;
|
|
58
|
+
try {
|
|
59
|
+
const probe = execSync(`ffprobe -v error -show_entries format=duration -of csv="p=0" "${videoPath}"`, { stdio: "pipe" }).toString().trim();
|
|
60
|
+
duration = parseFloat(probe) || 10;
|
|
61
|
+
}
|
|
62
|
+
catch { /* default duration */ }
|
|
63
|
+
const interval = Math.max(duration / (maxFrames + 1), 0.5);
|
|
64
|
+
const frames = [];
|
|
65
|
+
for (let i = 1; i <= maxFrames; i++) {
|
|
66
|
+
const timestamp = Math.min(interval * i, duration - 0.1);
|
|
67
|
+
const framePath = path.join(outputDir, `frame_${i}.jpg`);
|
|
68
|
+
try {
|
|
69
|
+
execSync(`ffmpeg -ss ${timestamp.toFixed(2)} -i "${videoPath}" -vframes 1 -q:v 3 -y "${framePath}" 2>/dev/null`, { stdio: "pipe", timeout: 10000 });
|
|
70
|
+
if (fs.existsSync(framePath) && fs.statSync(framePath).size > 0) {
|
|
71
|
+
frames.push(framePath);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
catch { /* skip this frame */ }
|
|
75
|
+
}
|
|
76
|
+
return frames;
|
|
77
|
+
}
|
|
78
|
+
/** Extract audio track from video */
|
|
79
|
+
function extractAudio(videoPath) {
|
|
80
|
+
const audioPath = videoPath.replace(/\.\w+$/, ".ogg");
|
|
81
|
+
try {
|
|
82
|
+
execSync(`ffmpeg -i "${videoPath}" -vn -acodec libopus -y "${audioPath}" 2>/dev/null`, { stdio: "pipe", timeout: 30000 });
|
|
83
|
+
if (fs.existsSync(audioPath) && fs.statSync(audioPath).size > 1000) {
|
|
84
|
+
return audioPath;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
catch { /* no audio track or extraction failed */ }
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
export async function handleVideo(ctx) {
|
|
91
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
92
|
+
const video = ctx.message?.video || ctx.message?.video_note;
|
|
93
|
+
if (!video)
|
|
94
|
+
return;
|
|
95
|
+
const userId = ctx.from.id;
|
|
96
|
+
const session = getSession(userId);
|
|
97
|
+
if (session.isProcessing) {
|
|
98
|
+
if (session.messageQueue.length < 3) {
|
|
99
|
+
session.messageQueue.push("[Video-Nachricht empfangen]");
|
|
100
|
+
await react(ctx, "📝");
|
|
101
|
+
}
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
if (!hasFFmpeg()) {
|
|
105
|
+
await ctx.reply("❌ Video processing requires ffmpeg. Install with: `brew install ffmpeg`", { parse_mode: "Markdown" });
|
|
106
|
+
return;
|
|
107
|
+
}
|
|
108
|
+
session.isProcessing = true;
|
|
109
|
+
session.abortController = new AbortController();
|
|
110
|
+
const streamer = new TelegramStreamer(ctx.chat.id, ctx.api, ctx.message?.message_id);
|
|
111
|
+
let finalText = "";
|
|
112
|
+
const typingInterval = setInterval(() => {
|
|
113
|
+
ctx.api.sendChatAction(ctx.chat.id, "typing").catch(() => { });
|
|
114
|
+
}, 4000);
|
|
115
|
+
try {
|
|
116
|
+
await react(ctx, "👀");
|
|
117
|
+
await ctx.api.sendChatAction(ctx.chat.id, "typing");
|
|
118
|
+
// 1. Download video
|
|
119
|
+
const file = await ctx.api.getFile(video.file_id);
|
|
120
|
+
const ext = file.file_path?.split(".").pop() || "mp4";
|
|
121
|
+
const videoPath = path.join(TEMP_DIR, `video_${Date.now()}.${ext}`);
|
|
122
|
+
const fileUrl = `https://api.telegram.org/file/bot${config.botToken}/${file.file_path}`;
|
|
123
|
+
await downloadFile(fileUrl, videoPath);
|
|
124
|
+
// 2. Extract key frames
|
|
125
|
+
const frameDir = path.join(TEMP_DIR, `frames_${Date.now()}`);
|
|
126
|
+
fs.mkdirSync(frameDir, { recursive: true });
|
|
127
|
+
const frames = extractFrames(videoPath, frameDir);
|
|
128
|
+
// 3. Extract and transcribe audio (if available)
|
|
129
|
+
let transcript = "";
|
|
130
|
+
if (config.apiKeys.groq) {
|
|
131
|
+
const audioPath = extractAudio(videoPath);
|
|
132
|
+
if (audioPath) {
|
|
133
|
+
try {
|
|
134
|
+
transcript = await transcribeAudio(audioPath);
|
|
135
|
+
fs.unlink(audioPath, () => { });
|
|
136
|
+
}
|
|
137
|
+
catch { /* no transcription */ }
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
// 4. Build prompt with video context
|
|
141
|
+
const caption = ctx.message?.caption || "";
|
|
142
|
+
const duration = video.duration || 0;
|
|
143
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
144
|
+
const isVideoNote = !!ctx.message?.video_note;
|
|
145
|
+
let prompt = `[Video empfangen: ${duration}s`;
|
|
146
|
+
if (isVideoNote)
|
|
147
|
+
prompt += ", runde Videonachricht";
|
|
148
|
+
if (video.width && video.height)
|
|
149
|
+
prompt += `, ${video.width}x${video.height}`;
|
|
150
|
+
prompt += "]";
|
|
151
|
+
if (transcript) {
|
|
152
|
+
prompt += `\n\n[Audio-Transkription]: "${transcript}"`;
|
|
153
|
+
}
|
|
154
|
+
if (caption) {
|
|
155
|
+
prompt += `\n\n[Bildunterschrift]: "${caption}"`;
|
|
156
|
+
}
|
|
157
|
+
if (frames.length > 0) {
|
|
158
|
+
prompt += `\n\n[${frames.length} key frames extracted from the video]`;
|
|
159
|
+
prompt += "\n\nBitte beschreibe was im Video zu sehen ist und beantworte eventuelle Fragen.";
|
|
160
|
+
}
|
|
161
|
+
else {
|
|
162
|
+
prompt += "\n\nIch konnte keine Frames extrahieren. Basiere deine Antwort auf der Audio-Transkription.";
|
|
163
|
+
}
|
|
164
|
+
// Show what we extracted
|
|
165
|
+
const infoLines = [];
|
|
166
|
+
if (frames.length > 0)
|
|
167
|
+
infoLines.push(`🎞️ ${frames.length} frames extracted`);
|
|
168
|
+
if (transcript)
|
|
169
|
+
infoLines.push(`🎙️ "${transcript.slice(0, 100)}${transcript.length > 100 ? "..." : ""}"`);
|
|
170
|
+
if (infoLines.length > 0) {
|
|
171
|
+
await ctx.reply(infoLines.join("\n"));
|
|
172
|
+
}
|
|
173
|
+
// 5. Send to AI
|
|
174
|
+
session.messageCount++;
|
|
175
|
+
const registry = getRegistry();
|
|
176
|
+
const activeProvider = registry.getActive();
|
|
177
|
+
const isSDK = activeProvider.config.type === "claude-sdk";
|
|
178
|
+
const queryOpts = {
|
|
179
|
+
prompt,
|
|
180
|
+
systemPrompt: buildSystemPrompt(isSDK, session.language),
|
|
181
|
+
workingDir: session.workingDir,
|
|
182
|
+
effort: session.effort,
|
|
183
|
+
abortSignal: session.abortController.signal,
|
|
184
|
+
sessionId: isSDK ? session.sessionId : null,
|
|
185
|
+
history: !isSDK ? session.history : undefined,
|
|
186
|
+
};
|
|
187
|
+
if (!isSDK) {
|
|
188
|
+
addToHistory(userId, { role: "user", content: prompt });
|
|
189
|
+
}
|
|
190
|
+
for await (const chunk of registry.queryWithFallback(queryOpts)) {
|
|
191
|
+
switch (chunk.type) {
|
|
192
|
+
case "text":
|
|
193
|
+
finalText = chunk.text || "";
|
|
194
|
+
await streamer.update(finalText);
|
|
195
|
+
break;
|
|
196
|
+
case "tool_use":
|
|
197
|
+
if (chunk.toolName)
|
|
198
|
+
session.toolUseCount++;
|
|
199
|
+
break;
|
|
200
|
+
case "done":
|
|
201
|
+
if (chunk.sessionId)
|
|
202
|
+
session.sessionId = chunk.sessionId;
|
|
203
|
+
if (chunk.costUsd)
|
|
204
|
+
session.totalCost += chunk.costUsd;
|
|
205
|
+
session.lastActivity = Date.now();
|
|
206
|
+
break;
|
|
207
|
+
case "error":
|
|
208
|
+
await ctx.reply(`Error: ${chunk.error}`);
|
|
209
|
+
break;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
await streamer.finalize(finalText);
|
|
213
|
+
await react(ctx, "👍");
|
|
214
|
+
if (!isSDK && finalText) {
|
|
215
|
+
addToHistory(userId, { role: "assistant", content: finalText });
|
|
216
|
+
}
|
|
217
|
+
// Cleanup
|
|
218
|
+
frames.forEach(f => fs.unlink(f, () => { }));
|
|
219
|
+
fs.rm(frameDir, { recursive: true }, () => { });
|
|
220
|
+
fs.unlink(videoPath, () => { });
|
|
221
|
+
}
|
|
222
|
+
catch (err) {
|
|
223
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
224
|
+
await react(ctx, "👎");
|
|
225
|
+
if (errorMsg.includes("abort")) {
|
|
226
|
+
await ctx.reply("Anfrage abgebrochen.");
|
|
227
|
+
}
|
|
228
|
+
else {
|
|
229
|
+
await ctx.reply(`Error: ${errorMsg}`);
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
finally {
|
|
233
|
+
clearInterval(typingInterval);
|
|
234
|
+
session.isProcessing = false;
|
|
235
|
+
session.abortController = null;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import { InputFile } from "grammy";
|
|
2
|
+
import fs from "fs";
|
|
3
|
+
import path from "path";
|
|
4
|
+
import os from "os";
|
|
5
|
+
import https from "https";
|
|
6
|
+
/** React to a message with an emoji. Silently fails if not supported. */
|
|
7
|
+
async function react(ctx, emoji) {
|
|
8
|
+
try {
|
|
9
|
+
await ctx.react(emoji);
|
|
10
|
+
}
|
|
11
|
+
catch { /* ignore */ }
|
|
12
|
+
}
|
|
13
|
+
import { config } from "../config.js";
|
|
14
|
+
import { getSession, addToHistory } from "../services/session.js";
|
|
15
|
+
import { TelegramStreamer } from "../services/telegram.js";
|
|
16
|
+
import { transcribeAudio, textToSpeech } from "../services/voice.js";
|
|
17
|
+
import { getRegistry } from "../engine.js";
|
|
18
|
+
import { buildSystemPrompt } from "../services/personality.js";
|
|
19
|
+
const TEMP_DIR = path.join(os.tmpdir(), "alvin-bot");
|
|
20
|
+
if (!fs.existsSync(TEMP_DIR)) {
|
|
21
|
+
fs.mkdirSync(TEMP_DIR, { recursive: true });
|
|
22
|
+
}
|
|
23
|
+
async function downloadFile(url, dest) {
|
|
24
|
+
return new Promise((resolve, reject) => {
|
|
25
|
+
const file = fs.createWriteStream(dest);
|
|
26
|
+
https.get(url, (response) => {
|
|
27
|
+
response.pipe(file);
|
|
28
|
+
file.on("finish", () => file.close(() => resolve()));
|
|
29
|
+
}).on("error", (err) => {
|
|
30
|
+
fs.unlink(dest, () => { });
|
|
31
|
+
reject(err);
|
|
32
|
+
});
|
|
33
|
+
});
|
|
34
|
+
}
|
|
35
|
+
export async function handleVoice(ctx) {
|
|
36
|
+
const voice = ctx.message?.voice;
|
|
37
|
+
if (!voice)
|
|
38
|
+
return;
|
|
39
|
+
const userId = ctx.from.id;
|
|
40
|
+
const session = getSession(userId);
|
|
41
|
+
if (session.isProcessing) {
|
|
42
|
+
await ctx.reply("Please wait, previous request still running... (/cancel to abort)");
|
|
43
|
+
return;
|
|
44
|
+
}
|
|
45
|
+
if (!config.apiKeys.groq) {
|
|
46
|
+
await ctx.reply("Voice nicht konfiguriert (GROQ_API_KEY fehlt).");
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
session.isProcessing = true;
|
|
50
|
+
session.abortController = new AbortController();
|
|
51
|
+
const streamer = new TelegramStreamer(ctx.chat.id, ctx.api, ctx.message?.message_id);
|
|
52
|
+
let finalText = "";
|
|
53
|
+
const typingInterval = setInterval(() => {
|
|
54
|
+
ctx.api.sendChatAction(ctx.chat.id, "typing").catch(() => { });
|
|
55
|
+
}, 4000);
|
|
56
|
+
try {
|
|
57
|
+
await react(ctx, "🎧");
|
|
58
|
+
await ctx.api.sendChatAction(ctx.chat.id, "typing");
|
|
59
|
+
// 1. Download voice message
|
|
60
|
+
const file = await ctx.api.getFile(voice.file_id);
|
|
61
|
+
const fileUrl = `https://api.telegram.org/file/bot${config.botToken}/${file.file_path}`;
|
|
62
|
+
const audioPath = path.join(TEMP_DIR, `voice_${Date.now()}.ogg`);
|
|
63
|
+
await downloadFile(fileUrl, audioPath);
|
|
64
|
+
// 2. Transcribe with Groq Whisper
|
|
65
|
+
const transcript = await transcribeAudio(audioPath);
|
|
66
|
+
fs.unlink(audioPath, () => { });
|
|
67
|
+
if (!transcript.trim()) {
|
|
68
|
+
await ctx.reply("Could not understand the voice message.");
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
// Show what was understood
|
|
72
|
+
await ctx.reply(`"${transcript}"`);
|
|
73
|
+
// 3. Send to AI via provider system
|
|
74
|
+
session.messageCount++;
|
|
75
|
+
const registry = getRegistry();
|
|
76
|
+
const activeProvider = registry.getActive();
|
|
77
|
+
const isSDK = activeProvider.config.type === "claude-sdk";
|
|
78
|
+
const queryOpts = {
|
|
79
|
+
prompt: transcript,
|
|
80
|
+
systemPrompt: buildSystemPrompt(isSDK, session.language),
|
|
81
|
+
workingDir: session.workingDir,
|
|
82
|
+
effort: session.effort,
|
|
83
|
+
abortSignal: session.abortController.signal,
|
|
84
|
+
sessionId: isSDK ? session.sessionId : null,
|
|
85
|
+
history: !isSDK ? session.history : undefined,
|
|
86
|
+
_sessionState: isSDK ? {
|
|
87
|
+
messageCount: session.messageCount,
|
|
88
|
+
toolUseCount: session.toolUseCount,
|
|
89
|
+
} : undefined,
|
|
90
|
+
};
|
|
91
|
+
if (!isSDK) {
|
|
92
|
+
addToHistory(userId, { role: "user", content: transcript });
|
|
93
|
+
}
|
|
94
|
+
for await (const chunk of registry.queryWithFallback(queryOpts)) {
|
|
95
|
+
switch (chunk.type) {
|
|
96
|
+
case "text":
|
|
97
|
+
finalText = chunk.text || "";
|
|
98
|
+
await streamer.update(finalText);
|
|
99
|
+
break;
|
|
100
|
+
case "tool_use":
|
|
101
|
+
if (chunk.toolName)
|
|
102
|
+
session.toolUseCount++;
|
|
103
|
+
break;
|
|
104
|
+
case "done":
|
|
105
|
+
if (chunk.sessionId)
|
|
106
|
+
session.sessionId = chunk.sessionId;
|
|
107
|
+
if (chunk.costUsd)
|
|
108
|
+
session.totalCost += chunk.costUsd;
|
|
109
|
+
session.lastActivity = Date.now();
|
|
110
|
+
break;
|
|
111
|
+
case "error":
|
|
112
|
+
await ctx.reply(`Error: ${chunk.error}`);
|
|
113
|
+
break;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
await streamer.finalize(finalText);
|
|
117
|
+
await react(ctx, "👍");
|
|
118
|
+
if (!isSDK && finalText) {
|
|
119
|
+
addToHistory(userId, { role: "assistant", content: finalText });
|
|
120
|
+
}
|
|
121
|
+
// 4. Send voice reply if enabled
|
|
122
|
+
if (session.voiceReply && finalText.trim()) {
|
|
123
|
+
try {
|
|
124
|
+
await ctx.api.sendChatAction(ctx.chat.id, "upload_voice");
|
|
125
|
+
const ttsPath = await textToSpeech(finalText);
|
|
126
|
+
await ctx.replyWithVoice(new InputFile(fs.readFileSync(ttsPath), "response.mp3"));
|
|
127
|
+
fs.unlink(ttsPath, () => { });
|
|
128
|
+
}
|
|
129
|
+
catch (err) {
|
|
130
|
+
console.error("TTS error:", err);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
catch (err) {
|
|
135
|
+
const errorMsg = err instanceof Error ? err.message : String(err);
|
|
136
|
+
if (errorMsg.includes("abort")) {
|
|
137
|
+
await ctx.reply("Anfrage abgebrochen.");
|
|
138
|
+
}
|
|
139
|
+
else {
|
|
140
|
+
await ctx.reply(`Error: ${errorMsg}`);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
finally {
|
|
144
|
+
clearInterval(typingInterval);
|
|
145
|
+
session.isProcessing = false;
|
|
146
|
+
session.abortController = null;
|
|
147
|
+
}
|
|
148
|
+
}
|