@agentprojectcontext/apx 1.13.1 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -36,6 +36,7 @@ import { stripThinking } from "../thinking.js";
|
|
|
36
36
|
import { getRecentTelegramTurnsFromFs, appendGlobalMessage } from "../../core/messages-store.js";
|
|
37
37
|
import { readAgents } from "../../core/parser.js";
|
|
38
38
|
import { buildAgentSystem } from "../../core/agent-system.js";
|
|
39
|
+
import { transcribe as transcribeAudioFile } from "../transcription.js";
|
|
39
40
|
|
|
40
41
|
const API_BASE = "https://api.telegram.org";
|
|
41
42
|
const nowIso = () => new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
|
|
@@ -131,47 +132,9 @@ export async function sendAudio(token, chatId, audio, { caption, title, performe
|
|
|
131
132
|
return json.result;
|
|
132
133
|
}
|
|
133
134
|
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
* Returns the transcribed text, or throws if no key / API failure.
|
|
138
|
-
*/
|
|
139
|
-
async function transcribeAudio(filePath) {
|
|
140
|
-
let apiKey = process.env.OPENAI_API_KEY;
|
|
141
|
-
if (!apiKey) {
|
|
142
|
-
try {
|
|
143
|
-
const { readConfig } = await import("../../core/config.js");
|
|
144
|
-
apiKey = readConfig()?.engines?.openai?.api_key || "";
|
|
145
|
-
} catch { /* ignore */ }
|
|
146
|
-
}
|
|
147
|
-
if (!apiKey) throw new Error("OPENAI_API_KEY not set (env or engines.openai.api_key)");
|
|
148
|
-
|
|
149
|
-
const fileBuf = fs.readFileSync(filePath);
|
|
150
|
-
const ext = path.extname(filePath).slice(1).toLowerCase() || "ogg";
|
|
151
|
-
const mimeMap = {
|
|
152
|
-
oga: "audio/ogg", ogg: "audio/ogg", opus: "audio/ogg",
|
|
153
|
-
mp3: "audio/mpeg", m4a: "audio/mp4", mp4: "audio/mp4",
|
|
154
|
-
wav: "audio/wav", webm: "audio/webm",
|
|
155
|
-
};
|
|
156
|
-
const mime = mimeMap[ext] || "audio/ogg";
|
|
157
|
-
const blob = new Blob([fileBuf], { type: mime });
|
|
158
|
-
|
|
159
|
-
const form = new FormData();
|
|
160
|
-
form.append("file", blob, `audio.${ext}`);
|
|
161
|
-
form.append("model", "whisper-1");
|
|
162
|
-
|
|
163
|
-
const res = await fetch("https://api.openai.com/v1/audio/transcriptions", {
|
|
164
|
-
method: "POST",
|
|
165
|
-
headers: { Authorization: `Bearer ${apiKey}` },
|
|
166
|
-
body: form,
|
|
167
|
-
});
|
|
168
|
-
if (!res.ok) {
|
|
169
|
-
const err = await res.text().catch(() => "");
|
|
170
|
-
throw new Error(`Whisper ${res.status}: ${err.slice(0, 200)}`);
|
|
171
|
-
}
|
|
172
|
-
const json = await res.json();
|
|
173
|
-
return String(json.text || "").trim();
|
|
174
|
-
}
|
|
135
|
+
// Audio transcription is delegated to the central dispatcher
|
|
136
|
+
// (../transcription.js) which handles local (faster-whisper via Python) +
|
|
137
|
+
// OpenAI cloud fallback. See that module for config keys.
|
|
175
138
|
|
|
176
139
|
/**
|
|
177
140
|
* Download a file from Telegram servers.
|
|
@@ -444,6 +407,7 @@ class ChannelPoller {
|
|
|
444
407
|
let localPath = null;
|
|
445
408
|
let transcript = "";
|
|
446
409
|
let transcribeError = null;
|
|
410
|
+
let transcribeBackend = null;
|
|
447
411
|
try {
|
|
448
412
|
localPath = await downloadTelegramFile(token, incomingAudio.file_id, mediaDir);
|
|
449
413
|
this.log(`telegram[${this.channel.name}] audio saved: ${localPath}`);
|
|
@@ -452,8 +416,10 @@ class ChannelPoller {
|
|
|
452
416
|
}
|
|
453
417
|
if (localPath) {
|
|
454
418
|
try {
|
|
455
|
-
|
|
456
|
-
|
|
419
|
+
const result = await transcribeAudioFile(localPath);
|
|
420
|
+
transcript = result.text || "";
|
|
421
|
+
transcribeBackend = result.backend;
|
|
422
|
+
this.log(`telegram[${this.channel.name}] audio transcribed via ${transcribeBackend} (${transcript.length} chars, lang=${result.language || "?"})`);
|
|
457
423
|
} catch (e) {
|
|
458
424
|
transcribeError = e.message;
|
|
459
425
|
this.log(`telegram[${this.channel.name}] audio transcription failed: ${e.message}`);
|
|
@@ -480,6 +446,7 @@ class ChannelPoller {
|
|
|
480
446
|
file_id: incomingAudio.file_id,
|
|
481
447
|
duration: incomingAudio.duration,
|
|
482
448
|
mime_type: incomingAudio.mime_type,
|
|
449
|
+
transcription_backend: transcribeBackend,
|
|
483
450
|
transcription_error: transcribeError,
|
|
484
451
|
},
|
|
485
452
|
});
|
|
@@ -21,6 +21,7 @@ import setPermissionMode from "./tools/set-permission-mode.js";
|
|
|
21
21
|
import searchFiles from "./tools/search-files.js";
|
|
22
22
|
import listSkills from "./tools/list-skills.js";
|
|
23
23
|
import loadSkill from "./tools/load-skill.js";
|
|
24
|
+
import transcribeAudio from "./tools/transcribe-audio.js";
|
|
24
25
|
import { createPermissionGuard } from "./helpers.js";
|
|
25
26
|
import { buildBridgedTools, DEFAULT_CATEGORIES } from "./registry-bridge.js";
|
|
26
27
|
|
|
@@ -48,6 +49,7 @@ const NATIVE_TOOLS = [
|
|
|
48
49
|
searchFiles,
|
|
49
50
|
listSkills,
|
|
50
51
|
loadSkill,
|
|
52
|
+
transcribeAudio,
|
|
51
53
|
];
|
|
52
54
|
|
|
53
55
|
// Registry-backed bridges. Categories can be overridden per-process via env
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import os from "node:os";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import crypto from "node:crypto";
|
|
5
|
+
import { transcribe } from "../../transcription.js";
|
|
6
|
+
|
|
7
|
+
export default {
|
|
8
|
+
name: "transcribe_audio",
|
|
9
|
+
schema: {
|
|
10
|
+
type: "function",
|
|
11
|
+
function: {
|
|
12
|
+
name: "transcribe_audio",
|
|
13
|
+
description:
|
|
14
|
+
"Transcribe an audio file to text. Default backend is local faster-whisper (model 'medium' on CPU with int8 quantization), with automatic fallback to OpenAI Whisper API if local fails. Pass file_path for a file on disk, or base64 for raw audio bytes (will be written to a temp file). Override provider/model/language as needed.",
|
|
15
|
+
parameters: {
|
|
16
|
+
type: "object",
|
|
17
|
+
properties: {
|
|
18
|
+
file_path: { type: "string", description: "absolute path to audio file (.ogg, .mp3, .m4a, .wav, .webm, .opus)" },
|
|
19
|
+
base64: { type: "string", description: "alternative to file_path — raw base64 audio bytes (or 'data:audio/...;base64,...' data URI)" },
|
|
20
|
+
format: { type: "string", description: "file extension hint when using base64 (default 'ogg')" },
|
|
21
|
+
provider: { type: "string", description: "override the configured provider: 'auto' | 'local' | 'openai'" },
|
|
22
|
+
model: { type: "string", description: "local model size: tiny | base | small | medium | large | large-v2 | large-v3 (default medium)" },
|
|
23
|
+
language: { type: "string", description: "ISO 639-1 code (e.g. 'es', 'en') or 'auto' for detection" },
|
|
24
|
+
device: { type: "string", description: "local device: cpu | cuda (default cpu)" },
|
|
25
|
+
compute_type: { type: "string", description: "local quantization: int8 | int8_float16 | float16 | float32 (default int8)" },
|
|
26
|
+
},
|
|
27
|
+
},
|
|
28
|
+
},
|
|
29
|
+
},
|
|
30
|
+
makeHandler: () => async ({ file_path, base64, format = "ogg", provider, model, language, device, compute_type } = {}) => {
|
|
31
|
+
if (!file_path && !base64) throw new Error("transcribe_audio: file_path or base64 required");
|
|
32
|
+
|
|
33
|
+
let pathToUse = file_path;
|
|
34
|
+
let cleanupTmp = false;
|
|
35
|
+
|
|
36
|
+
if (!pathToUse && base64) {
|
|
37
|
+
const clean = String(base64).replace(/^data:audio\/[a-z]+;base64,/, "");
|
|
38
|
+
const buf = Buffer.from(clean, "base64");
|
|
39
|
+
const tmpDir = path.join(os.tmpdir(), "apx-transcribe");
|
|
40
|
+
fs.mkdirSync(tmpDir, { recursive: true });
|
|
41
|
+
const id = crypto.randomBytes(6).toString("hex");
|
|
42
|
+
pathToUse = path.join(tmpDir, `audio-${id}.${String(format).replace(/^\./, "") || "ogg"}`);
|
|
43
|
+
fs.writeFileSync(pathToUse, buf);
|
|
44
|
+
cleanupTmp = true;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
try {
|
|
48
|
+
const overrides = {};
|
|
49
|
+
if (provider) overrides.provider = provider;
|
|
50
|
+
if (model) overrides.model = model;
|
|
51
|
+
if (language) overrides.language = language;
|
|
52
|
+
if (device) overrides.device = device;
|
|
53
|
+
if (compute_type) overrides.compute_type = compute_type;
|
|
54
|
+
return await transcribe(pathToUse, overrides);
|
|
55
|
+
} finally {
|
|
56
|
+
if (cleanupTmp) {
|
|
57
|
+
try { fs.unlinkSync(pathToUse); } catch { /* ignore */ }
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
},
|
|
61
|
+
};
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
// daemon/transcription.js
|
|
2
|
+
// Audio transcription dispatcher. Two backends:
|
|
3
|
+
//
|
|
4
|
+
// - LOCAL (faster-whisper via Python subprocess) — ported from Panda's
|
|
5
|
+
// transcription_service.py. Same defaults: model "medium", device "cpu",
|
|
6
|
+
// compute_type "int8", beam_size 5, auto language detection. Requires
|
|
7
|
+
// `pip3 install faster-whisper` on the host.
|
|
8
|
+
//
|
|
9
|
+
// - OPENAI (Whisper-1 cloud API) — needs OPENAI_API_KEY or
|
|
10
|
+
// engines.openai.api_key in config.
|
|
11
|
+
//
|
|
12
|
+
// Provider selection in ~/.apx/config.json:
|
|
13
|
+
// "transcription": {
|
|
14
|
+
// "provider": "auto" | "local" | "openai", // default "auto"
|
|
15
|
+
// "local": {
|
|
16
|
+
// "model": "medium", // tiny | base | small | medium | large | large-v2 | large-v3
|
|
17
|
+
// "device": "cpu", // cpu | cuda
|
|
18
|
+
// "compute_type": "int8", // int8 | int8_float16 | float16 | float32
|
|
19
|
+
// "language": "auto", // ISO 639-1 code or "auto"
|
|
20
|
+
// "beam_size": 5
|
|
21
|
+
// }
|
|
22
|
+
// }
|
|
23
|
+
//
|
|
24
|
+
// "auto" tries local first; on failure falls back to openai.
|
|
25
|
+
|
|
26
|
+
import fs from "node:fs";
|
|
27
|
+
import path from "node:path";
|
|
28
|
+
import { execFile } from "node:child_process";
|
|
29
|
+
import { fileURLToPath } from "node:url";
|
|
30
|
+
|
|
31
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
32
|
+
const __dirname = path.dirname(__filename);
|
|
33
|
+
const PYTHON_HELPER = path.join(__dirname, "whisper-transcribe.py");
|
|
34
|
+
|
|
35
|
+
const DEFAULT_LOCAL = {
|
|
36
|
+
model: "medium",
|
|
37
|
+
device: "cpu",
|
|
38
|
+
compute_type: "int8",
|
|
39
|
+
language: "auto",
|
|
40
|
+
beam_size: 5,
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
// Config
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
async function getConfig() {
|
|
48
|
+
try {
|
|
49
|
+
const { readConfig } = await import("../core/config.js");
|
|
50
|
+
const cfg = readConfig() || {};
|
|
51
|
+
const t = cfg.transcription || {};
|
|
52
|
+
const openaiKey = cfg.engines?.openai?.api_key || process.env.OPENAI_API_KEY || "";
|
|
53
|
+
return {
|
|
54
|
+
provider: t.provider || "auto",
|
|
55
|
+
local: { ...DEFAULT_LOCAL, ...(t.local || {}) },
|
|
56
|
+
openaiKey,
|
|
57
|
+
};
|
|
58
|
+
} catch {
|
|
59
|
+
return {
|
|
60
|
+
provider: "auto",
|
|
61
|
+
local: { ...DEFAULT_LOCAL },
|
|
62
|
+
openaiKey: process.env.OPENAI_API_KEY || "",
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// ---------------------------------------------------------------------------
|
|
68
|
+
// Local backend (Python + faster-whisper)
|
|
69
|
+
// ---------------------------------------------------------------------------
|
|
70
|
+
|
|
71
|
+
function transcribeLocal(filePath, opts) {
|
|
72
|
+
return new Promise((resolve, reject) => {
|
|
73
|
+
const args = [
|
|
74
|
+
PYTHON_HELPER,
|
|
75
|
+
filePath,
|
|
76
|
+
"--model", String(opts.model || DEFAULT_LOCAL.model),
|
|
77
|
+
"--language", String(opts.language || DEFAULT_LOCAL.language),
|
|
78
|
+
"--device", String(opts.device || DEFAULT_LOCAL.device),
|
|
79
|
+
"--compute-type", String(opts.compute_type || DEFAULT_LOCAL.compute_type),
|
|
80
|
+
"--beam-size", String(opts.beam_size || DEFAULT_LOCAL.beam_size),
|
|
81
|
+
];
|
|
82
|
+
execFile("python3", args, { maxBuffer: 16 * 1024 * 1024, timeout: 5 * 60_000 }, (err, stdout, stderr) => {
|
|
83
|
+
if (err) {
|
|
84
|
+
const tail = (stderr || err.message || "").slice(-300);
|
|
85
|
+
return reject(new Error(`local transcription failed: ${tail}`));
|
|
86
|
+
}
|
|
87
|
+
let parsed;
|
|
88
|
+
try { parsed = JSON.parse(String(stdout).trim().split("\n").pop()); }
|
|
89
|
+
catch (e) {
|
|
90
|
+
return reject(new Error(`could not parse helper output: ${stdout.slice(0, 300)}`));
|
|
91
|
+
}
|
|
92
|
+
if (!parsed.ok) return reject(new Error(parsed.error || "unknown local transcription error"));
|
|
93
|
+
resolve({
|
|
94
|
+
ok: true,
|
|
95
|
+
backend: "local",
|
|
96
|
+
text: parsed.text || "",
|
|
97
|
+
language: parsed.language || null,
|
|
98
|
+
language_probability: parsed.language_probability ?? null,
|
|
99
|
+
duration: parsed.duration ?? null,
|
|
100
|
+
model: parsed.model,
|
|
101
|
+
compute_type: parsed.compute_type,
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
// OpenAI backend (Whisper-1 cloud)
|
|
109
|
+
// ---------------------------------------------------------------------------
|
|
110
|
+
|
|
111
|
+
async function transcribeOpenAI(filePath, apiKey) {
|
|
112
|
+
if (!apiKey) throw new Error("OPENAI_API_KEY not set (env or engines.openai.api_key)");
|
|
113
|
+
|
|
114
|
+
const fileBuf = fs.readFileSync(filePath);
|
|
115
|
+
const ext = path.extname(filePath).slice(1).toLowerCase() || "ogg";
|
|
116
|
+
const mimeMap = {
|
|
117
|
+
oga: "audio/ogg", ogg: "audio/ogg", opus: "audio/ogg",
|
|
118
|
+
mp3: "audio/mpeg", m4a: "audio/mp4", mp4: "audio/mp4",
|
|
119
|
+
wav: "audio/wav", webm: "audio/webm",
|
|
120
|
+
};
|
|
121
|
+
const blob = new Blob([fileBuf], { type: mimeMap[ext] || "audio/ogg" });
|
|
122
|
+
|
|
123
|
+
const form = new FormData();
|
|
124
|
+
form.append("file", blob, `audio.${ext}`);
|
|
125
|
+
form.append("model", "whisper-1");
|
|
126
|
+
|
|
127
|
+
const res = await fetch("https://api.openai.com/v1/audio/transcriptions", {
|
|
128
|
+
method: "POST",
|
|
129
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
130
|
+
body: form,
|
|
131
|
+
});
|
|
132
|
+
if (!res.ok) {
|
|
133
|
+
const err = await res.text().catch(() => "");
|
|
134
|
+
throw new Error(`Whisper API ${res.status}: ${err.slice(0, 200)}`);
|
|
135
|
+
}
|
|
136
|
+
const json = await res.json();
|
|
137
|
+
return {
|
|
138
|
+
ok: true,
|
|
139
|
+
backend: "openai",
|
|
140
|
+
text: String(json.text || "").trim(),
|
|
141
|
+
language: null,
|
|
142
|
+
language_probability: null,
|
|
143
|
+
duration: null,
|
|
144
|
+
model: "whisper-1",
|
|
145
|
+
};
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// ---------------------------------------------------------------------------
|
|
149
|
+
// Public API
|
|
150
|
+
// ---------------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* Transcribe an audio file using the configured backend.
|
|
154
|
+
* Returns { ok, backend, text, language?, language_probability?, duration?, model? }.
|
|
155
|
+
*
|
|
156
|
+
* @param {string} filePath absolute path to audio file
|
|
157
|
+
* @param {object} overrides optional: { provider, model, language, ... }
|
|
158
|
+
*/
|
|
159
|
+
export async function transcribe(filePath, overrides = {}) {
|
|
160
|
+
if (!filePath || !fs.existsSync(filePath)) {
|
|
161
|
+
throw new Error(`transcribe: file not found: ${filePath}`);
|
|
162
|
+
}
|
|
163
|
+
const cfg = await getConfig();
|
|
164
|
+
const provider = overrides.provider || cfg.provider;
|
|
165
|
+
const localOpts = { ...cfg.local, ...overrides };
|
|
166
|
+
|
|
167
|
+
if (provider === "openai") {
|
|
168
|
+
return transcribeOpenAI(filePath, cfg.openaiKey);
|
|
169
|
+
}
|
|
170
|
+
if (provider === "local") {
|
|
171
|
+
return transcribeLocal(filePath, localOpts);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// auto: local first, fall back to openai
|
|
175
|
+
try {
|
|
176
|
+
return await transcribeLocal(filePath, localOpts);
|
|
177
|
+
} catch (localErr) {
|
|
178
|
+
if (!cfg.openaiKey) {
|
|
179
|
+
throw new Error(
|
|
180
|
+
`local transcription failed and no OpenAI fallback available: ${localErr.message}`
|
|
181
|
+
);
|
|
182
|
+
}
|
|
183
|
+
return transcribeOpenAI(filePath, cfg.openaiKey);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// ---------------------------------------------------------------------------
|
|
188
|
+
// Diagnostics
|
|
189
|
+
// ---------------------------------------------------------------------------
|
|
190
|
+
|
|
191
|
+
export const TRANSCRIPTION_PATHS = {
|
|
192
|
+
python_helper: PYTHON_HELPER,
|
|
193
|
+
};
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Local audio transcription via faster-whisper. Mirrors the implementation in
|
|
4
|
+
the Panda project (transcription_service.py): same default model "medium",
|
|
5
|
+
device cpu, compute_type int8, beam_size 5. Lazy singleton model cache.
|
|
6
|
+
|
|
7
|
+
Invoked by APX daemon (Node) as a subprocess. Args:
|
|
8
|
+
whisper-transcribe.py <audio_path> [--model medium] [--language auto] [--device cpu] [--compute-type int8] [--beam-size 5]
|
|
9
|
+
|
|
10
|
+
Outputs JSON on stdout:
|
|
11
|
+
{ "ok": true, "text": "...", "language": "es", "language_probability": 0.98, "duration": 12.4 }
|
|
12
|
+
{ "ok": false, "error": "..." }
|
|
13
|
+
"""
|
|
14
|
+
import argparse
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
import sys
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def main() -> int:
|
|
21
|
+
parser = argparse.ArgumentParser()
|
|
22
|
+
parser.add_argument("audio_path")
|
|
23
|
+
parser.add_argument("--model", default="medium")
|
|
24
|
+
parser.add_argument("--language", default="auto")
|
|
25
|
+
parser.add_argument("--device", default="cpu")
|
|
26
|
+
parser.add_argument("--compute-type", dest="compute_type", default="int8")
|
|
27
|
+
parser.add_argument("--beam-size", dest="beam_size", type=int, default=5)
|
|
28
|
+
args = parser.parse_args()
|
|
29
|
+
|
|
30
|
+
if not os.path.exists(args.audio_path):
|
|
31
|
+
print(json.dumps({"ok": False, "error": f"file not found: {args.audio_path}"}))
|
|
32
|
+
return 1
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
from faster_whisper import WhisperModel
|
|
36
|
+
except ImportError as e:
|
|
37
|
+
print(json.dumps({
|
|
38
|
+
"ok": False,
|
|
39
|
+
"error": "faster-whisper not installed. Run: pip3 install faster-whisper",
|
|
40
|
+
"import_error": str(e),
|
|
41
|
+
}))
|
|
42
|
+
return 1
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
model = WhisperModel(args.model, device=args.device, compute_type=args.compute_type)
|
|
46
|
+
except Exception as e:
|
|
47
|
+
print(json.dumps({"ok": False, "error": f"failed to load model '{args.model}': {e}"}))
|
|
48
|
+
return 1
|
|
49
|
+
|
|
50
|
+
language = None if args.language == "auto" else args.language
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
segments, info = model.transcribe(args.audio_path, beam_size=args.beam_size, language=language)
|
|
54
|
+
text = " ".join(seg.text.strip() for seg in segments).strip()
|
|
55
|
+
print(json.dumps({
|
|
56
|
+
"ok": True,
|
|
57
|
+
"text": text,
|
|
58
|
+
"language": info.language,
|
|
59
|
+
"language_probability": round(info.language_probability, 4),
|
|
60
|
+
"duration": round(info.duration, 2),
|
|
61
|
+
"model": args.model,
|
|
62
|
+
"compute_type": args.compute_type,
|
|
63
|
+
}))
|
|
64
|
+
return 0
|
|
65
|
+
except Exception as e:
|
|
66
|
+
print(json.dumps({"ok": False, "error": f"transcription failed: {e}"}))
|
|
67
|
+
return 1
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
if __name__ == "__main__":
|
|
71
|
+
sys.exit(main())
|