aiden-runtime 4.0.1 → 4.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -7
- package/config/hardware.json +2 -2
- package/dist/api/server.js +50 -52
- package/dist/cli/v4/aidenCLI.js +513 -14
- package/dist/cli/v4/aidenPrompt.js +317 -0
- package/dist/cli/v4/box.js +105 -39
- package/dist/cli/v4/callbacks.js +39 -6
- package/dist/cli/v4/chatSession.js +269 -52
- package/dist/cli/v4/citationFooter.js +97 -0
- package/dist/cli/v4/commands/channel.js +656 -0
- package/dist/cli/v4/commands/clear.js +1 -1
- package/dist/cli/v4/commands/compress.js +1 -1
- package/dist/cli/v4/commands/cron.js +44 -16
- package/dist/cli/v4/commands/fanout.js +236 -0
- package/dist/cli/v4/commands/help.js +15 -4
- package/dist/cli/v4/commands/history.js +84 -0
- package/dist/cli/v4/commands/index.js +19 -1
- package/dist/cli/v4/commands/mcp.js +358 -0
- package/dist/cli/v4/commands/setup.js +34 -0
- package/dist/cli/v4/commands/show.js +43 -0
- package/dist/cli/v4/commands/skills.js +169 -4
- package/dist/cli/v4/commands/status.js +84 -0
- package/dist/cli/v4/commands/subagent.js +78 -0
- package/dist/cli/v4/commands/verbose.js +1 -1
- package/dist/cli/v4/commands/voice.js +218 -0
- package/dist/cli/v4/cronCli.js +103 -0
- package/dist/cli/v4/display.js +300 -14
- package/dist/cli/v4/doctor.js +41 -0
- package/dist/cli/v4/envSources.js +105 -0
- package/dist/cli/v4/ghostMatch.js +74 -0
- package/dist/cli/v4/historyStore.js +163 -0
- package/dist/cli/v4/pasteCompression.js +124 -0
- package/dist/cli/v4/pasteIntercept.js +203 -0
- package/dist/cli/v4/replyRenderer.js +209 -0
- package/dist/cli/v4/resizeGuard.js +92 -0
- package/dist/cli/v4/setupWizard.js +466 -232
- package/dist/cli/v4/shellInterpolation.js +139 -0
- package/dist/cli/v4/skinEngine.js +21 -1
- package/dist/cli/v4/streamingPrefix.js +121 -0
- package/dist/cli/v4/syntaxHighlight.js +345 -0
- package/dist/cli/v4/table.js +216 -0
- package/dist/cli/v4/themeDetect.js +81 -0
- package/dist/cli/v4/uiBuild.js +74 -0
- package/dist/cli/v4/voiceCli.js +113 -0
- package/dist/cli/v4/voicePromptApi.js +196 -0
- package/dist/core/channels/discord.js +16 -10
- package/dist/core/channels/email.js +13 -9
- package/dist/core/channels/imessage.js +13 -9
- package/dist/core/channels/manager.js +25 -7
- package/dist/core/channels/pdf-extract.js +180 -0
- package/dist/core/channels/photo-vision.js +157 -0
- package/dist/core/channels/signal.js +11 -7
- package/dist/core/channels/slack.js +13 -10
- package/dist/core/channels/telegram-commands.js +154 -0
- package/dist/core/channels/telegram-groups.js +198 -0
- package/dist/core/channels/telegram-rate-limit.js +124 -0
- package/dist/core/channels/telegram.js +1980 -0
- package/dist/core/channels/twilio.js +11 -7
- package/dist/core/channels/webhook.js +9 -5
- package/dist/core/channels/whatsapp.js +15 -11
- package/dist/core/channels/whisper-transcribe.js +163 -0
- package/dist/core/cronManager.js +33 -294
- package/dist/core/gateway.js +29 -8
- package/dist/core/playwrightBridge.js +90 -0
- package/dist/core/v4/aidenAgent.js +35 -0
- package/dist/core/v4/auxiliaryClient.js +2 -2
- package/dist/core/v4/cron/atomicWrite.js +18 -4
- package/dist/core/v4/cron/cronExecute.js +300 -0
- package/dist/core/v4/cron/cronManager.js +502 -0
- package/dist/core/v4/cron/cronState.js +314 -0
- package/dist/core/v4/cron/cronTick.js +90 -0
- package/dist/core/v4/cron/diagnostics.js +104 -0
- package/dist/core/v4/cron/graceWindow.js +79 -0
- package/dist/core/v4/firstRun/providerDetection.js +287 -0
- package/dist/core/v4/logger/factory.js +110 -0
- package/dist/core/v4/logger/index.js +22 -0
- package/dist/core/v4/logger/logger.js +101 -0
- package/dist/core/v4/logger/sinks/fileSink.js +110 -0
- package/dist/core/v4/logger/sinks/multiSink.js +43 -0
- package/dist/core/v4/logger/sinks/nullSink.js +53 -0
- package/dist/core/v4/logger/sinks/stdSink.js +81 -0
- package/dist/core/v4/mcp/server/diagnostics.js +40 -0
- package/dist/core/v4/mcp/server/skillBridge.js +94 -0
- package/dist/core/v4/mcp/server/stdioServer.js +119 -0
- package/dist/core/v4/mcp/server/toolBridge.js +168 -0
- package/dist/core/v4/platformPaths.js +105 -0
- package/dist/core/v4/providerFallback.js +25 -0
- package/dist/core/v4/skillLoader.js +21 -5
- package/dist/core/v4/skillMining/candidateStore.js +164 -0
- package/dist/core/v4/skillMining/extractorPrompt.js +111 -0
- package/dist/core/v4/skillMining/proposalBuilder.js +139 -0
- package/dist/core/v4/skillMining/skillMiner.js +191 -0
- package/dist/core/v4/skillMining/traceFingerprint.js +51 -0
- package/dist/core/v4/subagent/budget.js +76 -0
- package/dist/core/v4/subagent/diagnostics.js +22 -0
- package/dist/core/v4/subagent/fanout.js +216 -0
- package/dist/core/v4/subagent/merger.js +148 -0
- package/dist/core/v4/subagent/providerRotation.js +54 -0
- package/dist/core/v4/voice/audioStream.js +373 -0
- package/dist/core/v4/voice/cliVoice.js +393 -0
- package/dist/core/v4/voice/diagnostics.js +66 -0
- package/dist/core/v4/voice/ttsStream.js +193 -0
- package/dist/core/version.js +1 -1
- package/dist/core/visionAnalyze.js +291 -90
- package/dist/core/voice/audio.js +61 -5
- package/dist/core/voice/audioBackend.js +134 -0
- package/dist/core/voice/stt.js +61 -6
- package/dist/core/voice/tts.js +19 -3
- package/dist/providers/v4/nullAdapter.js +58 -0
- package/dist/tools/v4/index.js +32 -1
- package/dist/tools/v4/subagent/subagentFanout.js +166 -0
- package/package.json +11 -2
|
@@ -19,6 +19,7 @@ exports.TwilioAdapter = void 0;
|
|
|
19
19
|
// TWILIO_ALLOWED_NUMBERS — optional comma-separated inbound allowlist
|
|
20
20
|
// WEBHOOK_URL — base URL for inbound webhook registration
|
|
21
21
|
const gateway_1 = require("../gateway");
|
|
22
|
+
const logger_1 = require("../v4/logger");
|
|
22
23
|
// SMS max segment length per GSM spec
|
|
23
24
|
const SMS_CHUNK_SIZE = 160;
|
|
24
25
|
/** Split a message into ≤160-character segments */
|
|
@@ -34,6 +35,8 @@ function chunkSms(text) {
|
|
|
34
35
|
class TwilioAdapter {
|
|
35
36
|
constructor(app) {
|
|
36
37
|
this.name = 'sms';
|
|
38
|
+
// Phase v4.1-1.3a — diagnostics route through scope logger.
|
|
39
|
+
this.log = (0, logger_1.noopLogger)();
|
|
37
40
|
this.twilioClient = null;
|
|
38
41
|
this.healthy = false;
|
|
39
42
|
this.app = null;
|
|
@@ -45,10 +48,11 @@ class TwilioAdapter {
|
|
|
45
48
|
this.webhookUrl = process.env.WEBHOOK_URL ?? '';
|
|
46
49
|
this.app = app ?? null;
|
|
47
50
|
}
|
|
51
|
+
attachLogger(logger) { this.log = logger; }
|
|
48
52
|
// ── Lifecycle ──────────────────────────────────────────────
|
|
49
53
|
async start() {
|
|
50
54
|
if (!this.accountSid || !this.authToken || !this.fromNumber) {
|
|
51
|
-
|
|
55
|
+
this.log.info('Disabled — set TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN, TWILIO_PHONE_NUMBER to enable');
|
|
52
56
|
return;
|
|
53
57
|
}
|
|
54
58
|
let twilio;
|
|
@@ -56,7 +60,7 @@ class TwilioAdapter {
|
|
|
56
60
|
twilio = require('twilio');
|
|
57
61
|
}
|
|
58
62
|
catch (e) {
|
|
59
|
-
|
|
63
|
+
this.log.info(`Disabled — twilio package not available:${e.message}`);
|
|
60
64
|
return;
|
|
61
65
|
}
|
|
62
66
|
this.twilioClient = twilio(this.accountSid, this.authToken);
|
|
@@ -87,10 +91,10 @@ class TwilioAdapter {
|
|
|
87
91
|
});
|
|
88
92
|
}
|
|
89
93
|
if (!this.webhookUrl) {
|
|
90
|
-
|
|
94
|
+
this.log.info('Outbound ready — inbound SMS requires public webhook URL (set WEBHOOK_URL env or use ngrok)');
|
|
91
95
|
}
|
|
92
96
|
else {
|
|
93
|
-
|
|
97
|
+
this.log.info('Ready — inbound webhook: ${this.webhookUrl}/api/channels/sms/inbound');
|
|
94
98
|
}
|
|
95
99
|
this.healthy = true;
|
|
96
100
|
}
|
|
@@ -98,7 +102,7 @@ class TwilioAdapter {
|
|
|
98
102
|
this.healthy = false;
|
|
99
103
|
gateway_1.gateway.unregisterChannel('sms');
|
|
100
104
|
this.twilioClient = null;
|
|
101
|
-
|
|
105
|
+
this.log.info('Disconnected');
|
|
102
106
|
}
|
|
103
107
|
async send(target, message) {
|
|
104
108
|
if (!this.twilioClient || !this.healthy)
|
|
@@ -113,7 +117,7 @@ class TwilioAdapter {
|
|
|
113
117
|
});
|
|
114
118
|
}
|
|
115
119
|
catch (e) {
|
|
116
|
-
|
|
120
|
+
this.log.error(`send error:${e.message}`);
|
|
117
121
|
break;
|
|
118
122
|
}
|
|
119
123
|
}
|
|
@@ -136,7 +140,7 @@ class TwilioAdapter {
|
|
|
136
140
|
});
|
|
137
141
|
}
|
|
138
142
|
catch (e) {
|
|
139
|
-
|
|
143
|
+
this.log.error(`routeMessage error:${e.message}`);
|
|
140
144
|
return 'Something went wrong. Try again.';
|
|
141
145
|
}
|
|
142
146
|
}
|
|
@@ -62,9 +62,12 @@ exports.WebhookAdapter = void 0;
|
|
|
62
62
|
// No external SDK — uses Node.js built-in crypto only.
|
|
63
63
|
const crypto = __importStar(require("crypto"));
|
|
64
64
|
const gateway_1 = require("../gateway");
|
|
65
|
+
const logger_1 = require("../v4/logger");
|
|
65
66
|
class WebhookAdapter {
|
|
66
67
|
constructor(app) {
|
|
67
68
|
this.name = 'webhook';
|
|
69
|
+
// Phase v4.1-1.3a — diagnostics route through scope logger.
|
|
70
|
+
this.log = (0, logger_1.noopLogger)();
|
|
68
71
|
this.healthy = false;
|
|
69
72
|
this.secret = process.env.WEBHOOK_SECRET ?? '';
|
|
70
73
|
const rawOrigins = process.env.WEBHOOK_ALLOWED_ORIGINS ?? '';
|
|
@@ -73,14 +76,15 @@ class WebhookAdapter {
|
|
|
73
76
|
: [];
|
|
74
77
|
this.app = app ?? null;
|
|
75
78
|
}
|
|
79
|
+
attachLogger(logger) { this.log = logger; }
|
|
76
80
|
// ── Lifecycle ──────────────────────────────────────────────
|
|
77
81
|
async start() {
|
|
78
82
|
if (!this.app) {
|
|
79
|
-
|
|
83
|
+
this.log.warn('No Express app provided — endpoint not registered');
|
|
80
84
|
return;
|
|
81
85
|
}
|
|
82
86
|
if (!this.secret) {
|
|
83
|
-
|
|
87
|
+
this.log.info('Disabled — set WEBHOOK_SECRET to enable');
|
|
84
88
|
// Register the route but return 503 so callers get a clear error
|
|
85
89
|
this.app.post('/api/webhook', (_req, res) => {
|
|
86
90
|
res.status(503).json({ error: 'Webhook disabled — set WEBHOOK_SECRET to enable' });
|
|
@@ -134,12 +138,12 @@ class WebhookAdapter {
|
|
|
134
138
|
}
|
|
135
139
|
});
|
|
136
140
|
this.healthy = true;
|
|
137
|
-
|
|
141
|
+
this.log.info('Enabled — POST /api/webhook (HMAC-SHA256 required)');
|
|
138
142
|
}
|
|
139
143
|
async stop() {
|
|
140
144
|
this.healthy = false;
|
|
141
145
|
// Express routes cannot be unregistered at runtime; we simply mark unhealthy
|
|
142
|
-
|
|
146
|
+
this.log.info('Stopped');
|
|
143
147
|
}
|
|
144
148
|
/** Not applicable — webhook is request-response, not push-based */
|
|
145
149
|
async send(_target, _message) { }
|
|
@@ -179,7 +183,7 @@ class WebhookAdapter {
|
|
|
179
183
|
});
|
|
180
184
|
}
|
|
181
185
|
catch (e) {
|
|
182
|
-
|
|
186
|
+
this.log.error(`Async callback failed:${e.message}`);
|
|
183
187
|
}
|
|
184
188
|
}
|
|
185
189
|
}
|
|
@@ -63,9 +63,12 @@ exports.WhatsAppAdapter = void 0;
|
|
|
63
63
|
// instead of web automation
|
|
64
64
|
const path_1 = __importDefault(require("path"));
|
|
65
65
|
const gateway_1 = require("../gateway");
|
|
66
|
+
const logger_1 = require("../v4/logger");
|
|
66
67
|
class WhatsAppAdapter {
|
|
67
68
|
constructor() {
|
|
68
69
|
this.name = 'whatsapp';
|
|
70
|
+
// Phase v4.1-1.3a — diagnostics route through scope logger.
|
|
71
|
+
this.log = (0, logger_1.noopLogger)();
|
|
69
72
|
this.client = null;
|
|
70
73
|
this.healthy = false;
|
|
71
74
|
this.sessionPath = process.env.WHATSAPP_SESSION_PATH ?? path_1.default.join(process.cwd(), 'workspace', '.whatsapp_session');
|
|
@@ -73,6 +76,7 @@ class WhatsAppAdapter {
|
|
|
73
76
|
this.allowedNumbers = raw ? new Set(raw.split(',').map(s => s.trim()).filter(Boolean)) : new Set();
|
|
74
77
|
this.businessApiKey = process.env.WHATSAPP_BUSINESS_API_KEY ?? '';
|
|
75
78
|
}
|
|
79
|
+
attachLogger(logger) { this.log = logger; }
|
|
76
80
|
// ── Lifecycle ──────────────────────────────────────────────
|
|
77
81
|
async start() {
|
|
78
82
|
// Opt-in guard — silent unless WHATSAPP_ENABLED=true
|
|
@@ -87,7 +91,7 @@ class WhatsAppAdapter {
|
|
|
87
91
|
LocalAuth = wwebjs.LocalAuth;
|
|
88
92
|
}
|
|
89
93
|
catch (e) {
|
|
90
|
-
|
|
94
|
+
this.log.info(`Disabled — whatsapp-web.js not available:${e.message}`);
|
|
91
95
|
return;
|
|
92
96
|
}
|
|
93
97
|
this.client = new Client({
|
|
@@ -99,21 +103,21 @@ class WhatsAppAdapter {
|
|
|
99
103
|
});
|
|
100
104
|
// QR code for first-time auth
|
|
101
105
|
this.client.on('qr', (qr) => {
|
|
102
|
-
|
|
106
|
+
this.log.info('Scan this QR code with your WhatsApp mobile app:');
|
|
103
107
|
try {
|
|
104
108
|
const qrcode = require('qrcode-terminal');
|
|
105
109
|
qrcode.generate(qr, { small: true });
|
|
106
110
|
}
|
|
107
111
|
catch {
|
|
108
|
-
|
|
112
|
+
this.log.info(`QR (raw):${qr}`);
|
|
109
113
|
}
|
|
110
114
|
});
|
|
111
115
|
this.client.on('authenticated', () => {
|
|
112
|
-
|
|
116
|
+
this.log.info(`Session authenticated — session persisted at${this.sessionPath}`);
|
|
113
117
|
});
|
|
114
118
|
this.client.on('ready', () => {
|
|
115
119
|
this.healthy = true;
|
|
116
|
-
|
|
120
|
+
this.log.info('Client ready');
|
|
117
121
|
gateway_1.gateway.registerChannel('whatsapp', async (msg) => {
|
|
118
122
|
await this.send(msg.channelId, msg.text);
|
|
119
123
|
return true;
|
|
@@ -121,7 +125,7 @@ class WhatsAppAdapter {
|
|
|
121
125
|
});
|
|
122
126
|
this.client.on('disconnected', (reason) => {
|
|
123
127
|
this.healthy = false;
|
|
124
|
-
|
|
128
|
+
this.log.info(`Disconnected:${reason}`);
|
|
125
129
|
});
|
|
126
130
|
this.client.on('message', async (msg) => {
|
|
127
131
|
// Skip non-text messages and group messages (from field ends with @g.us)
|
|
@@ -133,13 +137,13 @@ class WhatsAppAdapter {
|
|
|
133
137
|
if (!this.isAllowed(senderNumber))
|
|
134
138
|
return;
|
|
135
139
|
const response = await this.processMessage(msg.from, senderNumber, msg.body);
|
|
136
|
-
await msg.reply(response).catch((e) =>
|
|
140
|
+
await msg.reply(response).catch((e) => this.log.error(`reply error:${e.message}`));
|
|
137
141
|
});
|
|
138
142
|
try {
|
|
139
143
|
await this.client.initialize();
|
|
140
144
|
}
|
|
141
145
|
catch (e) {
|
|
142
|
-
|
|
146
|
+
this.log.info(`Disabled — check WHATSAPP_SESSION_PATH:${e.message}`);
|
|
143
147
|
this.healthy = false;
|
|
144
148
|
}
|
|
145
149
|
}
|
|
@@ -150,14 +154,14 @@ class WhatsAppAdapter {
|
|
|
150
154
|
await this.client.destroy().catch(() => { });
|
|
151
155
|
this.client = null;
|
|
152
156
|
}
|
|
153
|
-
|
|
157
|
+
this.log.info('Disconnected');
|
|
154
158
|
}
|
|
155
159
|
async send(target, message) {
|
|
156
160
|
if (!this.client || !this.healthy)
|
|
157
161
|
return;
|
|
158
162
|
// Ensure target has @c.us suffix
|
|
159
163
|
const chatId = target.includes('@') ? target : `${target.replace('+', '')}@c.us`;
|
|
160
|
-
await this.client.sendMessage(chatId, message).catch((e) =>
|
|
164
|
+
await this.client.sendMessage(chatId, message).catch((e) => this.log.error(`send error:${e.message}`));
|
|
161
165
|
}
|
|
162
166
|
isHealthy() { return this.healthy; }
|
|
163
167
|
// ── Helpers ────────────────────────────────────────────────
|
|
@@ -177,7 +181,7 @@ class WhatsAppAdapter {
|
|
|
177
181
|
});
|
|
178
182
|
}
|
|
179
183
|
catch (e) {
|
|
180
|
-
|
|
184
|
+
this.log.error(`routeMessage error:${e.message}`);
|
|
181
185
|
return '❌ Something went wrong. Try again.';
|
|
182
186
|
}
|
|
183
187
|
}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
// ============================================================
|
|
3
|
+
// DevOS — Autonomous AI Execution System
|
|
4
|
+
// Copyright (c) 2026 Shiva Deore. All rights reserved.
|
|
5
|
+
// ============================================================
|
|
6
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
7
|
+
exports.MAX_VOICE_BYTES = void 0;
|
|
8
|
+
exports.transcribeForChannel = transcribeForChannel;
|
|
9
|
+
exports.isHallucinatedTranscript = isHallucinatedTranscript;
|
|
10
|
+
// core/channels/whisper-transcribe.ts — Phase v4.1-3.
|
|
11
|
+
//
|
|
12
|
+
// Channel-side Whisper adapter. Wraps the canonical
|
|
13
|
+
// `core/voice/stt.ts` chain with three Telegram-specific concerns:
|
|
14
|
+
//
|
|
15
|
+
// 1. 25 MB pre-upload size cap. Telegram voice/audio messages can
|
|
16
|
+
// claim arbitrary `file_size`; if a malicious caller manages to
|
|
17
|
+
// pass through a 200 MB blob we want to refuse before burning a
|
|
18
|
+
// Whisper API quota. The cap matches both the Telegram Bot API
|
|
19
|
+
// attachment ceiling and the OpenAI Whisper request limit.
|
|
20
|
+
// 2. Whisper hallucination guard. Both Groq and OpenAI Whisper
|
|
21
|
+
// sometimes return a known set of "noise transcripts" on near-
|
|
22
|
+
// silent input — "Thank you for watching", "Subtitles by …",
|
|
23
|
+
// etc. We catch the common shapes and convert them to an
|
|
24
|
+
// `isHallucination` failure so the caller can render a helpful
|
|
25
|
+
// "I couldn't make out your voice — please type instead"
|
|
26
|
+
// annotation rather than ferrying the noise to the agent.
|
|
27
|
+
// 3. Result-shape contract that matches the channel adapter's
|
|
28
|
+
// expectations:
|
|
29
|
+
// TranscriptionResult = {
|
|
30
|
+
// success, text?, avgLogprob?, error?, isHallucination?,
|
|
31
|
+
// provider?, durationMs?
|
|
32
|
+
// }
|
|
33
|
+
// `avgLogprob` is the average of segment-level Whisper
|
|
34
|
+
// `avg_logprob` — values are negative; closer to 0 = more
|
|
35
|
+
// confident. The Telegram adapter uses this against the
|
|
36
|
+
// `TELEGRAM_VOICE_CONFIDENCE_THRESHOLD` env var (default -0.5)
|
|
37
|
+
// to decide whether to echo the transcript to the user before
|
|
38
|
+
// handing it to the agent.
|
|
39
|
+
//
|
|
40
|
+
// No console.* — every diagnostic uses the injected `Logger` from
|
|
41
|
+
// `core/v4/logger`, defaulting to noop when none is wired.
|
|
42
|
+
const node_fs_1 = require("node:fs");
|
|
43
|
+
const stt_1 = require("../voice/stt");
|
|
44
|
+
const logger_1 = require("../v4/logger");
|
|
45
|
+
// 25 MiB. Telegram Bot API hard cap for attachments downloadable via
|
|
46
|
+
// getFile() is 20 MB (the *upload* limit is 50 MB), and OpenAI's
|
|
47
|
+
// Whisper request limit is 25 MB. Use the higher of the two we're
|
|
48
|
+
// sending TO so a fortunate-edge-case 22 MB OGG doesn't fail late.
|
|
49
|
+
exports.MAX_VOICE_BYTES = 25 * 1024 * 1024;
|
|
50
|
+
/**
|
|
51
|
+
* Whisper hallucination patterns. Case-insensitive, anchored loosely
|
|
52
|
+
* so common variants (capitalisation, surrounding punctuation, the
|
|
53
|
+
* "by Amara.org" credit appearing on its own line) all match. Order
|
|
54
|
+
* is irrelevant — we OR them.
|
|
55
|
+
*
|
|
56
|
+
* Sources observed on near-silent or short noise inputs from both the
|
|
57
|
+
* Groq `whisper-large-v3` model and the OpenAI `whisper-1` model.
|
|
58
|
+
*/
|
|
59
|
+
const HALLUCINATION_PATTERNS = [
|
|
60
|
+
/thank\s+you\s+for\s+watching/i,
|
|
61
|
+
/thanks\s+for\s+watching/i,
|
|
62
|
+
/subtitles?\s+by/i,
|
|
63
|
+
/amara\.org/i,
|
|
64
|
+
/¡subt[íi]tulos\s+por/i,
|
|
65
|
+
/sous-titrage/i,
|
|
66
|
+
];
|
|
67
|
+
/**
|
|
68
|
+
* Transcribe an audio file via the existing Aiden Whisper chain,
|
|
69
|
+
* applying channel-side guards (size cap, hallucination filter,
|
|
70
|
+
* confidence surfacing).
|
|
71
|
+
*
|
|
72
|
+
* Never throws — failures land on `result.success = false` with a
|
|
73
|
+
* human-readable `error`. Callers should treat any of these as a
|
|
74
|
+
* "transcription failed" signal:
|
|
75
|
+
*
|
|
76
|
+
* - `success === false` (size cap, network, no provider)
|
|
77
|
+
* - `isHallucination === true` (transcript matched noise pattern)
|
|
78
|
+
*
|
|
79
|
+
* Confident vs. low-confidence is the caller's call. Use:
|
|
80
|
+
*
|
|
81
|
+
* const confident = (result.avgLogprob ?? 0) >= -0.5
|
|
82
|
+
*
|
|
83
|
+
* (default threshold; configurable via `TELEGRAM_VOICE_CONFIDENCE_THRESHOLD`).
|
|
84
|
+
*/
|
|
85
|
+
async function transcribeForChannel(opts) {
|
|
86
|
+
const log = opts.logger ?? (0, logger_1.noopLogger)();
|
|
87
|
+
const cap = opts.maxBytesOverride ?? exports.MAX_VOICE_BYTES;
|
|
88
|
+
// ── 1. Size precheck ────────────────────────────────────────────
|
|
89
|
+
// Stat the file directly rather than trusting whatever size hint
|
|
90
|
+
// came down the wire — by the time this runs the file is on local
|
|
91
|
+
// disk, so the on-disk size is the truth. Refusing here avoids
|
|
92
|
+
// burning a Whisper API call on a payload it would reject anyway.
|
|
93
|
+
let sizeBytes;
|
|
94
|
+
try {
|
|
95
|
+
const st = await node_fs_1.promises.stat(opts.filePath);
|
|
96
|
+
sizeBytes = st.size;
|
|
97
|
+
}
|
|
98
|
+
catch (e) {
|
|
99
|
+
log.warn('voice file not readable', { path: opts.filePath, error: e?.message });
|
|
100
|
+
return {
|
|
101
|
+
success: false,
|
|
102
|
+
error: `audio file not readable: ${e?.message ?? 'unknown error'}`,
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
if (sizeBytes > cap) {
|
|
106
|
+
log.warn('voice file too large', { sizeBytes, cap });
|
|
107
|
+
return {
|
|
108
|
+
success: false,
|
|
109
|
+
error: `File too large: ${(sizeBytes / (1024 * 1024)).toFixed(1)} MB ` +
|
|
110
|
+
`(cap is ${(cap / (1024 * 1024)).toFixed(0)} MB).`,
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
// ── 2. Hand off to the canonical chain ──────────────────────────
|
|
114
|
+
const sttOpts = {
|
|
115
|
+
audioFilePath: opts.filePath,
|
|
116
|
+
logger: log,
|
|
117
|
+
};
|
|
118
|
+
if (opts.language)
|
|
119
|
+
sttOpts.language = opts.language;
|
|
120
|
+
const result = await (0, stt_1.transcribe)(sttOpts);
|
|
121
|
+
if (result.error || !result.text) {
|
|
122
|
+
return {
|
|
123
|
+
success: false,
|
|
124
|
+
error: result.error ?? 'empty transcript',
|
|
125
|
+
provider: result.provider,
|
|
126
|
+
durationMs: result.durationMs,
|
|
127
|
+
};
|
|
128
|
+
}
|
|
129
|
+
// ── 3. Hallucination guard ──────────────────────────────────────
|
|
130
|
+
if (isHallucinatedTranscript(result.text)) {
|
|
131
|
+
log.info('hallucinated transcript discarded', { snippet: result.text.slice(0, 60) });
|
|
132
|
+
return {
|
|
133
|
+
success: false,
|
|
134
|
+
isHallucination: true,
|
|
135
|
+
text: result.text,
|
|
136
|
+
error: 'Whisper returned a known noise pattern',
|
|
137
|
+
provider: result.provider,
|
|
138
|
+
durationMs: result.durationMs,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
return {
|
|
142
|
+
success: true,
|
|
143
|
+
text: result.text,
|
|
144
|
+
provider: result.provider,
|
|
145
|
+
durationMs: result.durationMs,
|
|
146
|
+
...(typeof result.confidence === 'number' ? { avgLogprob: result.confidence } : {}),
|
|
147
|
+
};
|
|
148
|
+
}
|
|
149
|
+
/**
|
|
150
|
+
* True when `text` matches one of the well-known Whisper noise
|
|
151
|
+
* outputs. Empty / whitespace-only strings also count — both Whisper
|
|
152
|
+
* variants emit them on dead silence and the channel layer wants
|
|
153
|
+
* them treated identically to noise.
|
|
154
|
+
*/
|
|
155
|
+
function isHallucinatedTranscript(text) {
|
|
156
|
+
if (!text || !text.trim())
|
|
157
|
+
return true;
|
|
158
|
+
for (const re of HALLUCINATION_PATTERNS) {
|
|
159
|
+
if (re.test(text))
|
|
160
|
+
return true;
|
|
161
|
+
}
|
|
162
|
+
return false;
|
|
163
|
+
}
|