@hasna/microservices 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/index.js +70 -0
- package/bin/mcp.js +71 -1
- package/dist/index.js +70 -0
- package/microservices/microservice-ads/package.json +27 -0
- package/microservices/microservice-ads/src/cli/index.ts +407 -0
- package/microservices/microservice-ads/src/db/campaigns.ts +493 -0
- package/microservices/microservice-ads/src/db/database.ts +93 -0
- package/microservices/microservice-ads/src/db/migrations.ts +60 -0
- package/microservices/microservice-ads/src/index.ts +39 -0
- package/microservices/microservice-ads/src/mcp/index.ts +320 -0
- package/microservices/microservice-contracts/package.json +27 -0
- package/microservices/microservice-contracts/src/cli/index.ts +383 -0
- package/microservices/microservice-contracts/src/db/contracts.ts +496 -0
- package/microservices/microservice-contracts/src/db/database.ts +93 -0
- package/microservices/microservice-contracts/src/db/migrations.ts +58 -0
- package/microservices/microservice-contracts/src/index.ts +43 -0
- package/microservices/microservice-contracts/src/mcp/index.ts +308 -0
- package/microservices/microservice-domains/package.json +27 -0
- package/microservices/microservice-domains/src/cli/index.ts +438 -0
- package/microservices/microservice-domains/src/db/database.ts +93 -0
- package/microservices/microservice-domains/src/db/domains.ts +551 -0
- package/microservices/microservice-domains/src/db/migrations.ts +60 -0
- package/microservices/microservice-domains/src/index.ts +44 -0
- package/microservices/microservice-domains/src/mcp/index.ts +368 -0
- package/microservices/microservice-hiring/package.json +27 -0
- package/microservices/microservice-hiring/src/cli/index.ts +431 -0
- package/microservices/microservice-hiring/src/db/database.ts +93 -0
- package/microservices/microservice-hiring/src/db/hiring.ts +582 -0
- package/microservices/microservice-hiring/src/db/migrations.ts +68 -0
- package/microservices/microservice-hiring/src/index.ts +51 -0
- package/microservices/microservice-hiring/src/mcp/index.ts +464 -0
- package/microservices/microservice-payments/package.json +27 -0
- package/microservices/microservice-payments/src/cli/index.ts +357 -0
- package/microservices/microservice-payments/src/db/database.ts +93 -0
- package/microservices/microservice-payments/src/db/migrations.ts +63 -0
- package/microservices/microservice-payments/src/db/payments.ts +652 -0
- package/microservices/microservice-payments/src/index.ts +51 -0
- package/microservices/microservice-payments/src/mcp/index.ts +460 -0
- package/microservices/microservice-payroll/package.json +27 -0
- package/microservices/microservice-payroll/src/cli/index.ts +374 -0
- package/microservices/microservice-payroll/src/db/database.ts +93 -0
- package/microservices/microservice-payroll/src/db/migrations.ts +69 -0
- package/microservices/microservice-payroll/src/db/payroll.ts +741 -0
- package/microservices/microservice-payroll/src/index.ts +48 -0
- package/microservices/microservice-payroll/src/mcp/index.ts +420 -0
- package/microservices/microservice-shipping/package.json +27 -0
- package/microservices/microservice-shipping/src/cli/index.ts +398 -0
- package/microservices/microservice-shipping/src/db/database.ts +93 -0
- package/microservices/microservice-shipping/src/db/migrations.ts +61 -0
- package/microservices/microservice-shipping/src/db/shipping.ts +643 -0
- package/microservices/microservice-shipping/src/index.ts +53 -0
- package/microservices/microservice-shipping/src/mcp/index.ts +385 -0
- package/microservices/microservice-social/package.json +27 -0
- package/microservices/microservice-social/src/cli/index.ts +447 -0
- package/microservices/microservice-social/src/db/database.ts +93 -0
- package/microservices/microservice-social/src/db/migrations.ts +55 -0
- package/microservices/microservice-social/src/db/social.ts +672 -0
- package/microservices/microservice-social/src/index.ts +46 -0
- package/microservices/microservice-social/src/mcp/index.ts +435 -0
- package/microservices/microservice-subscriptions/package.json +27 -0
- package/microservices/microservice-subscriptions/src/cli/index.ts +400 -0
- package/microservices/microservice-subscriptions/src/db/database.ts +93 -0
- package/microservices/microservice-subscriptions/src/db/migrations.ts +57 -0
- package/microservices/microservice-subscriptions/src/db/subscriptions.ts +692 -0
- package/microservices/microservice-subscriptions/src/index.ts +41 -0
- package/microservices/microservice-subscriptions/src/mcp/index.ts +365 -0
- package/microservices/microservice-transcriber/package.json +28 -0
- package/microservices/microservice-transcriber/src/cli/index.ts +1347 -0
- package/microservices/microservice-transcriber/src/db/annotations.ts +37 -0
- package/microservices/microservice-transcriber/src/db/database.ts +82 -0
- package/microservices/microservice-transcriber/src/db/migrations.ts +72 -0
- package/microservices/microservice-transcriber/src/db/transcripts.ts +395 -0
- package/microservices/microservice-transcriber/src/index.ts +43 -0
- package/microservices/microservice-transcriber/src/lib/config.ts +77 -0
- package/microservices/microservice-transcriber/src/lib/diff.ts +91 -0
- package/microservices/microservice-transcriber/src/lib/downloader.ts +570 -0
- package/microservices/microservice-transcriber/src/lib/feeds.ts +62 -0
- package/microservices/microservice-transcriber/src/lib/live.ts +94 -0
- package/microservices/microservice-transcriber/src/lib/notion.ts +129 -0
- package/microservices/microservice-transcriber/src/lib/providers.ts +713 -0
- package/microservices/microservice-transcriber/src/lib/summarizer.ts +147 -0
- package/microservices/microservice-transcriber/src/lib/translator.ts +75 -0
- package/microservices/microservice-transcriber/src/lib/webhook.ts +37 -0
- package/microservices/microservice-transcriber/src/mcp/index.ts +1070 -0
- package/microservices/microservice-transcriber/src/server/index.ts +199 -0
- package/package.json +1 -1
- package/microservices/microservice-invoices/dashboard/dist/assets/index-Bngq7FNM.css +0 -1
- package/microservices/microservice-invoices/dashboard/dist/assets/index-aHW4ARZR.js +0 -124
- package/microservices/microservice-invoices/dashboard/dist/index.html +0 -13
|
@@ -0,0 +1,713 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Transcription providers — ElevenLabs (default) and OpenAI Whisper.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { readFileSync, unlinkSync, existsSync, statSync } from "node:fs";
|
|
6
|
+
import { basename } from "node:path";
|
|
7
|
+
import type { TranscriptChapterSegment, TranscriptMetadata, TranscriptProvider, TranscriptSpeakerSegment, TranscriptWord } from "../db/transcripts.js";
|
|
8
|
+
import type { VideoChapter } from "./downloader.js";
|
|
9
|
+
import { getAudioDuration, splitAudioIntoChunks } from "./downloader.js";
|
|
10
|
+
|
|
11
|
+
export interface TranscriptionResult {
|
|
12
|
+
text: string;
|
|
13
|
+
language: string;
|
|
14
|
+
duration_seconds: number | null;
|
|
15
|
+
metadata: TranscriptMetadata;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface TranscribeOptions {
|
|
19
|
+
provider?: TranscriptProvider;
|
|
20
|
+
language?: string;
|
|
21
|
+
diarize?: boolean;
|
|
22
|
+
vocab?: string[]; // custom vocabulary hints
|
|
23
|
+
chunkDurationSec?: number; // override auto-chunk size (default: 600 = 10 min)
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const AUTO_CHUNK_THRESHOLD_SEC = 600; // 10 minutes
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Transcribe an audio file using the specified provider.
|
|
30
|
+
* Auto-chunks files longer than 10 minutes — splits into segments,
|
|
31
|
+
* transcribes each, then combines results with correct timestamp offsets.
|
|
32
|
+
*/
|
|
33
|
+
export async function transcribeFile(
|
|
34
|
+
filePath: string,
|
|
35
|
+
options: TranscribeOptions = {}
|
|
36
|
+
): Promise<TranscriptionResult> {
|
|
37
|
+
const provider = options.provider ?? "elevenlabs";
|
|
38
|
+
const chunkDuration = options.chunkDurationSec ?? AUTO_CHUNK_THRESHOLD_SEC;
|
|
39
|
+
|
|
40
|
+
// Check duration — auto-chunk if long
|
|
41
|
+
let duration: number;
|
|
42
|
+
try {
|
|
43
|
+
duration = await getAudioDuration(filePath);
|
|
44
|
+
} catch {
|
|
45
|
+
duration = 0; // can't probe — just send it as-is
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
if (duration > chunkDuration) {
|
|
49
|
+
return transcribeChunked(filePath, duration, chunkDuration, options);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return transcribeSingle(filePath, options);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
async function transcribeSingle(
|
|
56
|
+
filePath: string,
|
|
57
|
+
options: TranscribeOptions
|
|
58
|
+
): Promise<TranscriptionResult> {
|
|
59
|
+
const provider = options.provider ?? "elevenlabs";
|
|
60
|
+
|
|
61
|
+
if (provider === "elevenlabs") {
|
|
62
|
+
return transcribeWithElevenLabs(filePath, options.language, options.diarize, options.vocab);
|
|
63
|
+
} else if (provider === "openai") {
|
|
64
|
+
return transcribeWithOpenAI(filePath, options.language, options.vocab);
|
|
65
|
+
} else if (provider === "deepgram") {
|
|
66
|
+
return transcribeWithDeepGram(filePath, options.language, options.diarize, options.vocab);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
throw new Error(`Unknown provider: ${provider}`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Chunk-based transcription: splits audio, transcribes each chunk,
|
|
74
|
+
* combines text and word arrays with correct timestamp offsets.
|
|
75
|
+
*/
|
|
76
|
+
async function transcribeChunked(
|
|
77
|
+
filePath: string,
|
|
78
|
+
totalDuration: number,
|
|
79
|
+
chunkDuration: number,
|
|
80
|
+
options: TranscribeOptions
|
|
81
|
+
): Promise<TranscriptionResult> {
|
|
82
|
+
const chunks = await splitAudioIntoChunks(filePath, chunkDuration);
|
|
83
|
+
|
|
84
|
+
const allTexts: string[] = [];
|
|
85
|
+
const allWords: TranscriptWord[] = [];
|
|
86
|
+
let language = "en";
|
|
87
|
+
let model: string | undefined;
|
|
88
|
+
let diarized = false;
|
|
89
|
+
const allSpeakers: TranscriptSpeakerSegment[] = [];
|
|
90
|
+
|
|
91
|
+
try {
|
|
92
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
93
|
+
const chunk = chunks[i];
|
|
94
|
+
const result = await transcribeSingle(chunk.path, options);
|
|
95
|
+
|
|
96
|
+
allTexts.push(result.text);
|
|
97
|
+
language = result.language;
|
|
98
|
+
model = result.metadata.model;
|
|
99
|
+
if (result.metadata.diarized) diarized = true;
|
|
100
|
+
|
|
101
|
+
// Offset word timestamps by chunk start position
|
|
102
|
+
if (result.metadata.words) {
|
|
103
|
+
for (const w of result.metadata.words) {
|
|
104
|
+
allWords.push({
|
|
105
|
+
...w,
|
|
106
|
+
start: w.start + chunk.startOffset,
|
|
107
|
+
end: w.end + chunk.startOffset,
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Offset speaker segments
|
|
113
|
+
if (result.metadata.speakers) {
|
|
114
|
+
for (const s of result.metadata.speakers) {
|
|
115
|
+
allSpeakers.push({
|
|
116
|
+
...s,
|
|
117
|
+
start: s.start + chunk.startOffset,
|
|
118
|
+
end: s.end + chunk.startOffset,
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
} finally {
|
|
124
|
+
// Clean up chunk temp files (don't delete original)
|
|
125
|
+
for (const chunk of chunks) {
|
|
126
|
+
if (chunk.path !== filePath) {
|
|
127
|
+
try { if (existsSync(chunk.path)) unlinkSync(chunk.path); } catch {}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const combinedText = allTexts.join("\n\n");
|
|
133
|
+
|
|
134
|
+
return {
|
|
135
|
+
text: combinedText,
|
|
136
|
+
language,
|
|
137
|
+
duration_seconds: totalDuration,
|
|
138
|
+
metadata: {
|
|
139
|
+
model,
|
|
140
|
+
words: allWords.length > 0 ? allWords : undefined,
|
|
141
|
+
speakers: allSpeakers.length > 0 ? allSpeakers : undefined,
|
|
142
|
+
...(diarized ? { diarized: true } : {}),
|
|
143
|
+
},
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ---------------------------------------------------------------------------
|
|
148
|
+
// ElevenLabs
|
|
149
|
+
// ---------------------------------------------------------------------------
|
|
150
|
+
|
|
151
|
+
async function transcribeWithElevenLabs(
|
|
152
|
+
filePath: string,
|
|
153
|
+
language?: string,
|
|
154
|
+
diarize?: boolean,
|
|
155
|
+
vocab?: string[]
|
|
156
|
+
): Promise<TranscriptionResult> {
|
|
157
|
+
const apiKey = process.env["ELEVENLABS_API_KEY"];
|
|
158
|
+
if (!apiKey) throw new Error("ELEVENLABS_API_KEY is not set");
|
|
159
|
+
|
|
160
|
+
const fileName = basename(filePath);
|
|
161
|
+
const fileSize = statSync(filePath).size;
|
|
162
|
+
if (fileSize === 0) throw new Error(`Audio file is empty: ${filePath}`);
|
|
163
|
+
|
|
164
|
+
// Use Bun.file() for proper streaming file upload (handles large files correctly)
|
|
165
|
+
const audioFile = Bun.file(filePath);
|
|
166
|
+
|
|
167
|
+
const form = new FormData();
|
|
168
|
+
form.append("file", audioFile, fileName);
|
|
169
|
+
form.append("model_id", "scribe_v1");
|
|
170
|
+
form.append("timestamps_granularity", "word");
|
|
171
|
+
if (language) form.append("language_code", language);
|
|
172
|
+
if (diarize) form.append("diarize", "true");
|
|
173
|
+
if (vocab && vocab.length > 0) {
|
|
174
|
+
// ElevenLabs accepts custom_spelling as JSON array of {from, to} for custom vocab
|
|
175
|
+
form.append("custom_spelling", JSON.stringify(vocab.map((v) => ({ from: v.toLowerCase(), to: v }))));
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const res = await fetch("https://api.elevenlabs.io/v1/speech-to-text", {
|
|
179
|
+
method: "POST",
|
|
180
|
+
headers: { "xi-api-key": apiKey },
|
|
181
|
+
body: form,
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
if (!res.ok) {
|
|
185
|
+
const body = await res.text();
|
|
186
|
+
throw new Error(`ElevenLabs API error ${res.status}: ${body}`);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const data = (await res.json()) as {
|
|
190
|
+
text: string;
|
|
191
|
+
language_code: string;
|
|
192
|
+
language_probability: number;
|
|
193
|
+
words?: Array<{ text: string; type: string; start: number; end: number; speaker_id?: string; logprob?: number }>;
|
|
194
|
+
};
|
|
195
|
+
|
|
196
|
+
const words = (data.words ?? [])
|
|
197
|
+
.filter((w) => w.type === "word")
|
|
198
|
+
.map((w) => ({
|
|
199
|
+
text: w.text,
|
|
200
|
+
start: w.start,
|
|
201
|
+
end: w.end,
|
|
202
|
+
type: w.type,
|
|
203
|
+
speaker_id: w.speaker_id,
|
|
204
|
+
...(w.logprob !== undefined ? { logprob: w.logprob } : {}),
|
|
205
|
+
}));
|
|
206
|
+
|
|
207
|
+
const duration = words.length > 0 ? words[words.length - 1].end : null;
|
|
208
|
+
|
|
209
|
+
// Build speaker segments and formatted text when diarized
|
|
210
|
+
let text = data.text;
|
|
211
|
+
let speakers: TranscriptSpeakerSegment[] | undefined;
|
|
212
|
+
|
|
213
|
+
if (diarize && words.some((w) => w.speaker_id)) {
|
|
214
|
+
speakers = buildSpeakerSegments(words);
|
|
215
|
+
text = formatDiarizedText(speakers);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
text,
|
|
220
|
+
language: data.language_code ?? language ?? "en",
|
|
221
|
+
duration_seconds: duration,
|
|
222
|
+
metadata: {
|
|
223
|
+
model: "scribe_v1",
|
|
224
|
+
words,
|
|
225
|
+
speakers,
|
|
226
|
+
language_probability: data.language_probability,
|
|
227
|
+
...(diarize ? { diarized: true } : {}),
|
|
228
|
+
},
|
|
229
|
+
};
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
function buildSpeakerSegments(
|
|
233
|
+
words: Array<{ text: string; start: number; end: number; speaker_id?: string }>
|
|
234
|
+
): TranscriptSpeakerSegment[] {
|
|
235
|
+
const segments: TranscriptSpeakerSegment[] = [];
|
|
236
|
+
let current: TranscriptSpeakerSegment | null = null;
|
|
237
|
+
|
|
238
|
+
for (const word of words) {
|
|
239
|
+
const speakerId = word.speaker_id ?? "speaker_unknown";
|
|
240
|
+
if (!current || current.speaker_id !== speakerId) {
|
|
241
|
+
if (current) segments.push(current);
|
|
242
|
+
current = { speaker_id: speakerId, start: word.start, end: word.end, text: word.text };
|
|
243
|
+
} else {
|
|
244
|
+
current.end = word.end;
|
|
245
|
+
current.text += " " + word.text;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
if (current) segments.push(current);
|
|
249
|
+
return segments;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
function formatDiarizedText(segments: TranscriptSpeakerSegment[]): string {
|
|
253
|
+
return segments
|
|
254
|
+
.map((s) => {
|
|
255
|
+
// Convert "speaker_0" → "Speaker 1" (1-indexed for readability)
|
|
256
|
+
const label = s.speaker_id.replace(/speaker_(\d+)/, (_, n) => `Speaker ${parseInt(n) + 1}`);
|
|
257
|
+
return `${label}: ${s.text}`;
|
|
258
|
+
})
|
|
259
|
+
.join("\n");
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// ---------------------------------------------------------------------------
|
|
263
|
+
// OpenAI Whisper
|
|
264
|
+
// ---------------------------------------------------------------------------
|
|
265
|
+
|
|
266
|
+
async function transcribeWithOpenAI(
|
|
267
|
+
filePath: string,
|
|
268
|
+
language?: string,
|
|
269
|
+
vocab?: string[]
|
|
270
|
+
): Promise<TranscriptionResult> {
|
|
271
|
+
const apiKey = process.env["OPENAI_API_KEY"];
|
|
272
|
+
if (!apiKey) throw new Error("OPENAI_API_KEY is not set");
|
|
273
|
+
|
|
274
|
+
const fileName = basename(filePath);
|
|
275
|
+
const audioFile = Bun.file(filePath);
|
|
276
|
+
|
|
277
|
+
const form = new FormData();
|
|
278
|
+
form.append("file", audioFile, fileName);
|
|
279
|
+
form.append("model", "whisper-1");
|
|
280
|
+
form.append("response_format", "verbose_json");
|
|
281
|
+
form.append("timestamp_granularities[]", "word");
|
|
282
|
+
form.append("timestamp_granularities[]", "segment");
|
|
283
|
+
if (language) form.append("language", language);
|
|
284
|
+
if (vocab && vocab.length > 0) {
|
|
285
|
+
// OpenAI Whisper uses prompt field for vocabulary hints
|
|
286
|
+
form.append("prompt", vocab.join(", "));
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
const res = await fetch("https://api.openai.com/v1/audio/transcriptions", {
|
|
290
|
+
method: "POST",
|
|
291
|
+
headers: { Authorization: `Bearer ${apiKey}` },
|
|
292
|
+
body: form,
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
if (!res.ok) {
|
|
296
|
+
const body = await res.text();
|
|
297
|
+
throw new Error(`OpenAI API error ${res.status}: ${body}`);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
const data = (await res.json()) as {
|
|
301
|
+
text: string;
|
|
302
|
+
language: string;
|
|
303
|
+
duration: number;
|
|
304
|
+
words?: Array<{ word: string; start: number; end: number }>;
|
|
305
|
+
segments?: Array<{ start: number; end: number; text: string }>;
|
|
306
|
+
};
|
|
307
|
+
|
|
308
|
+
const words = (data.words ?? []).map((w) => ({
|
|
309
|
+
text: w.word,
|
|
310
|
+
start: w.start,
|
|
311
|
+
end: w.end,
|
|
312
|
+
}));
|
|
313
|
+
|
|
314
|
+
const segments = (data.segments ?? []).map((s) => ({
|
|
315
|
+
start: s.start,
|
|
316
|
+
end: s.end,
|
|
317
|
+
text: s.text,
|
|
318
|
+
}));
|
|
319
|
+
|
|
320
|
+
return {
|
|
321
|
+
text: data.text,
|
|
322
|
+
language: data.language ?? language ?? "en",
|
|
323
|
+
duration_seconds: data.duration ?? null,
|
|
324
|
+
metadata: {
|
|
325
|
+
model: "whisper-1",
|
|
326
|
+
words,
|
|
327
|
+
segments,
|
|
328
|
+
},
|
|
329
|
+
};
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// ---------------------------------------------------------------------------
|
|
333
|
+
// Provider availability check
|
|
334
|
+
// ---------------------------------------------------------------------------
|
|
335
|
+
|
|
336
|
+
// ---------------------------------------------------------------------------
|
|
337
|
+
// Cost calculation
|
|
338
|
+
// ---------------------------------------------------------------------------
|
|
339
|
+
|
|
340
|
+
// Rates per minute (approximate, as of 2025)
|
|
341
|
+
const COST_PER_MINUTE: Record<string, number> = {
|
|
342
|
+
elevenlabs: 0.40 / 60, // ~$0.40/hr = $0.00667/min
|
|
343
|
+
openai: 0.006, // $0.006/min for Whisper
|
|
344
|
+
};
|
|
345
|
+
|
|
346
|
+
/**
|
|
347
|
+
* Estimate transcription cost in USD based on provider and duration.
|
|
348
|
+
*/
|
|
349
|
+
export function estimateCost(provider: string, durationSeconds: number): number {
|
|
350
|
+
const rate = COST_PER_MINUTE[provider] ?? 0;
|
|
351
|
+
return Math.round(rate * (durationSeconds / 60) * 10000) / 10000; // 4 decimal places
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
export function checkProviders(): { elevenlabs: boolean; openai: boolean; deepgram: boolean } {
|
|
355
|
+
return {
|
|
356
|
+
elevenlabs: !!process.env["ELEVENLABS_API_KEY"],
|
|
357
|
+
openai: !!process.env["OPENAI_API_KEY"],
|
|
358
|
+
deepgram: !!process.env["DEEPGRAM_API_KEY"],
|
|
359
|
+
};
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
// ---------------------------------------------------------------------------
|
|
363
|
+
// DeepGram
|
|
364
|
+
// ---------------------------------------------------------------------------
|
|
365
|
+
|
|
366
|
+
async function transcribeWithDeepGram(
|
|
367
|
+
filePath: string,
|
|
368
|
+
language?: string,
|
|
369
|
+
diarize?: boolean,
|
|
370
|
+
vocab?: string[]
|
|
371
|
+
): Promise<TranscriptionResult> {
|
|
372
|
+
const apiKey = process.env["DEEPGRAM_API_KEY"];
|
|
373
|
+
if (!apiKey) throw new Error("DEEPGRAM_API_KEY is not set");
|
|
374
|
+
|
|
375
|
+
const audioData = await Bun.file(filePath).arrayBuffer();
|
|
376
|
+
|
|
377
|
+
const params = new URLSearchParams({
|
|
378
|
+
model: "nova-3",
|
|
379
|
+
smart_format: "true",
|
|
380
|
+
punctuate: "true",
|
|
381
|
+
utterances: "true",
|
|
382
|
+
});
|
|
383
|
+
if (language) params.set("language", language);
|
|
384
|
+
if (diarize) params.set("diarize", "true");
|
|
385
|
+
if (vocab && vocab.length > 0) params.set("keywords", vocab.join(":5,") + ":5");
|
|
386
|
+
|
|
387
|
+
const res = await fetch(`https://api.deepgram.com/v1/listen?${params}`, {
|
|
388
|
+
method: "POST",
|
|
389
|
+
headers: {
|
|
390
|
+
Authorization: `Token ${apiKey}`,
|
|
391
|
+
"Content-Type": "audio/mpeg",
|
|
392
|
+
},
|
|
393
|
+
body: audioData,
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
if (!res.ok) {
|
|
397
|
+
const body = await res.text();
|
|
398
|
+
throw new Error(`DeepGram API error ${res.status}: ${body}`);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
const data = (await res.json()) as {
|
|
402
|
+
results: {
|
|
403
|
+
channels: Array<{
|
|
404
|
+
alternatives: Array<{
|
|
405
|
+
transcript: string;
|
|
406
|
+
words: Array<{ word: string; start: number; end: number; speaker?: number; confidence: number }>;
|
|
407
|
+
}>;
|
|
408
|
+
}>;
|
|
409
|
+
utterances?: Array<{ start: number; end: number; transcript: string; speaker: number }>;
|
|
410
|
+
};
|
|
411
|
+
metadata: { duration: number; language?: string };
|
|
412
|
+
};
|
|
413
|
+
|
|
414
|
+
const alt = data.results.channels[0]?.alternatives[0];
|
|
415
|
+
if (!alt) throw new Error("DeepGram returned no results");
|
|
416
|
+
|
|
417
|
+
const words = alt.words.map((w) => ({
|
|
418
|
+
text: w.word,
|
|
419
|
+
start: w.start,
|
|
420
|
+
end: w.end,
|
|
421
|
+
...(w.speaker !== undefined ? { speaker_id: `speaker_${w.speaker}` } : {}),
|
|
422
|
+
logprob: Math.log(w.confidence), // convert confidence to logprob
|
|
423
|
+
}));
|
|
424
|
+
|
|
425
|
+
let text = alt.transcript;
|
|
426
|
+
let speakers: TranscriptSpeakerSegment[] | undefined;
|
|
427
|
+
|
|
428
|
+
if (diarize && data.results.utterances) {
|
|
429
|
+
speakers = data.results.utterances.map((u) => ({
|
|
430
|
+
speaker_id: `speaker_${u.speaker}`,
|
|
431
|
+
start: u.start,
|
|
432
|
+
end: u.end,
|
|
433
|
+
text: u.transcript,
|
|
434
|
+
}));
|
|
435
|
+
text = speakers.map((s) => {
|
|
436
|
+
const label = s.speaker_id.replace(/speaker_(\d+)/, (_, n) => `Speaker ${parseInt(n) + 1}`);
|
|
437
|
+
return `${label}: ${s.text}`;
|
|
438
|
+
}).join("\n");
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
return {
|
|
442
|
+
text,
|
|
443
|
+
language: data.metadata.language ?? language ?? "en",
|
|
444
|
+
duration_seconds: data.metadata.duration ?? null,
|
|
445
|
+
metadata: {
|
|
446
|
+
model: "nova-3",
|
|
447
|
+
words,
|
|
448
|
+
speakers,
|
|
449
|
+
...(diarize ? { diarized: true } : {}),
|
|
450
|
+
},
|
|
451
|
+
};
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
// ---------------------------------------------------------------------------
|
|
455
|
+
// Markdown export helper
|
|
456
|
+
// ---------------------------------------------------------------------------
|
|
457
|
+
|
|
458
|
+
import type { Transcript } from "../db/transcripts.js";
|
|
459
|
+
|
|
460
|
+
function formatTimestamp(seconds: number): string {
|
|
461
|
+
const h = Math.floor(seconds / 3600);
|
|
462
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
463
|
+
const s = Math.floor(seconds % 60);
|
|
464
|
+
if (h > 0) return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`;
|
|
465
|
+
return `${m}:${String(s).padStart(2, "0")}`;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
/**
|
|
469
|
+
* Export a transcript as formatted Markdown.
|
|
470
|
+
* Chapters become ## headings, speaker labels are **bolded**, timestamps inline.
|
|
471
|
+
*/
|
|
472
|
+
export function toMarkdown(t: Transcript): string {
|
|
473
|
+
const lines: string[] = [];
|
|
474
|
+
|
|
475
|
+
// Title
|
|
476
|
+
lines.push(`# ${t.title ?? "Transcript"}`);
|
|
477
|
+
lines.push("");
|
|
478
|
+
|
|
479
|
+
// Metadata
|
|
480
|
+
if (t.source_url) lines.push(`> Source: ${t.source_url}`);
|
|
481
|
+
if (t.duration_seconds) lines.push(`> Duration: ${formatTimestamp(t.duration_seconds)}`);
|
|
482
|
+
if (t.provider) lines.push(`> Provider: ${t.provider}`);
|
|
483
|
+
lines.push("");
|
|
484
|
+
|
|
485
|
+
// Summary
|
|
486
|
+
if (t.metadata?.summary) {
|
|
487
|
+
lines.push("## Summary", "", t.metadata.summary, "");
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// Chapters with text
|
|
491
|
+
if (t.metadata?.chapters && t.metadata.chapters.length > 0) {
|
|
492
|
+
for (const ch of t.metadata.chapters) {
|
|
493
|
+
lines.push(`## ${ch.title}`, "");
|
|
494
|
+
lines.push(`_[${formatTimestamp(ch.start_time)}]_`, "");
|
|
495
|
+
// Format text with speaker labels bolded
|
|
496
|
+
const formatted = ch.text.replace(/(Speaker \d+|[A-Z][a-z]+ [A-Z][a-z]+):/g, "\n\n**$1:**");
|
|
497
|
+
lines.push(formatted.trim(), "");
|
|
498
|
+
}
|
|
499
|
+
} else if (t.metadata?.speakers && t.metadata.speakers.length > 0) {
|
|
500
|
+
// Diarized without chapters — use speaker segments
|
|
501
|
+
for (const seg of t.metadata.speakers) {
|
|
502
|
+
const label = seg.speaker_id.replace(/speaker_(\d+)/, (_, n) => `Speaker ${parseInt(n) + 1}`);
|
|
503
|
+
const ts = formatTimestamp(seg.start);
|
|
504
|
+
lines.push(`**${label}** _[${ts}]_: ${seg.text}`, "");
|
|
505
|
+
}
|
|
506
|
+
} else if (t.transcript_text) {
|
|
507
|
+
// Plain text — bold any speaker labels
|
|
508
|
+
const formatted = t.transcript_text.replace(/(Speaker \d+|[A-Z][a-z]+ [A-Z][a-z]+):/g, "\n\n**$1:**");
|
|
509
|
+
lines.push(formatted.trim());
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
return lines.join("\n");
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
// ---------------------------------------------------------------------------
|
|
516
|
+
// SRT export helper
|
|
517
|
+
// ---------------------------------------------------------------------------
|
|
518
|
+
|
|
519
|
+
export function toSrt(words: Array<{ text: string; start: number; end: number }>): string {
|
|
520
|
+
if (words.length === 0) return "";
|
|
521
|
+
|
|
522
|
+
const lines: string[] = [];
|
|
523
|
+
const chunkSize = 10; // words per subtitle block
|
|
524
|
+
let idx = 1;
|
|
525
|
+
|
|
526
|
+
for (let i = 0; i < words.length; i += chunkSize) {
|
|
527
|
+
const chunk = words.slice(i, i + chunkSize);
|
|
528
|
+
const start = formatSrtTime(chunk[0].start);
|
|
529
|
+
const end = formatSrtTime(chunk[chunk.length - 1].end);
|
|
530
|
+
const text = chunk.map((w) => w.text).join(" ");
|
|
531
|
+
lines.push(`${idx}\n${start} --> ${end}\n${text}\n`);
|
|
532
|
+
idx++;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
return lines.join("\n");
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
function formatSrtTime(seconds: number): string {
|
|
539
|
+
const h = Math.floor(seconds / 3600);
|
|
540
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
541
|
+
const s = Math.floor(seconds % 60);
|
|
542
|
+
const ms = Math.round((seconds % 1) * 1000);
|
|
543
|
+
return `${String(h).padStart(2, "0")}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")},${String(ms).padStart(3, "0")}`;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
// ---------------------------------------------------------------------------
|
|
547
|
+
// Chapter segmentation helper
|
|
548
|
+
// ---------------------------------------------------------------------------
|
|
549
|
+
|
|
550
|
+
/**
|
|
551
|
+
* Map word-level timestamps to video chapters, producing chapter-labelled segments.
|
|
552
|
+
* Words that don't fall cleanly within any chapter are attributed to the nearest chapter.
|
|
553
|
+
*/
|
|
554
|
+
export function segmentByChapters(
|
|
555
|
+
words: Array<{ text: string; start: number; end: number }>,
|
|
556
|
+
chapters: VideoChapter[]
|
|
557
|
+
): TranscriptChapterSegment[] {
|
|
558
|
+
if (chapters.length === 0 || words.length === 0) return [];
|
|
559
|
+
|
|
560
|
+
const segments: TranscriptChapterSegment[] = chapters.map((ch) => ({
|
|
561
|
+
title: ch.title,
|
|
562
|
+
start_time: ch.start_time,
|
|
563
|
+
end_time: ch.end_time,
|
|
564
|
+
text: "",
|
|
565
|
+
}));
|
|
566
|
+
|
|
567
|
+
for (const word of words) {
|
|
568
|
+
// Find chapter that contains the word's midpoint
|
|
569
|
+
const mid = (word.start + word.end) / 2;
|
|
570
|
+
let idx = segments.findIndex((s) => mid >= s.start_time && mid < s.end_time);
|
|
571
|
+
// Fall back to last chapter if midpoint is beyond last chapter end
|
|
572
|
+
if (idx === -1) idx = segments.length - 1;
|
|
573
|
+
segments[idx].text = segments[idx].text ? segments[idx].text + " " + word.text : word.text;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
return segments.filter((s) => s.text.length > 0);
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
// ---------------------------------------------------------------------------
|
|
580
|
+
// VTT (WebVTT) export helper
|
|
581
|
+
// ---------------------------------------------------------------------------
|
|
582
|
+
|
|
583
|
+
export function toVtt(words: Array<{ text: string; start: number; end: number }>): string {
|
|
584
|
+
if (words.length === 0) return "WEBVTT\n";
|
|
585
|
+
|
|
586
|
+
const lines: string[] = ["WEBVTT", ""];
|
|
587
|
+
const chunkSize = 10;
|
|
588
|
+
|
|
589
|
+
for (let i = 0; i < words.length; i += chunkSize) {
|
|
590
|
+
const chunk = words.slice(i, i + chunkSize);
|
|
591
|
+
const start = formatVttTime(chunk[0].start);
|
|
592
|
+
const end = formatVttTime(chunk[chunk.length - 1].end);
|
|
593
|
+
const text = chunk.map((w) => w.text).join(" ");
|
|
594
|
+
lines.push(`${start} --> ${end}`, text, "");
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
return lines.join("\n");
|
|
598
|
+
}
|
|
599
|
+
|
|
600
|
+
function formatVttTime(seconds: number): string {
|
|
601
|
+
const h = Math.floor(seconds / 3600);
|
|
602
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
603
|
+
const s = Math.floor(seconds % 60);
|
|
604
|
+
const ms = Math.round((seconds % 1) * 1000);
|
|
605
|
+
// VTT omits hours if zero, uses . not ,
|
|
606
|
+
if (h > 0) {
|
|
607
|
+
return `${String(h).padStart(2, "0")}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(ms).padStart(3, "0")}`;
|
|
608
|
+
}
|
|
609
|
+
return `${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(ms).padStart(3, "0")}`;
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
// ---------------------------------------------------------------------------
|
|
613
|
+
// ASS (Advanced SubStation Alpha) export helper
|
|
614
|
+
// ---------------------------------------------------------------------------
|
|
615
|
+
|
|
616
|
+
export interface AssStyle {
|
|
617
|
+
fontName?: string; // default: "Arial"
|
|
618
|
+
fontSize?: number; // default: 20
|
|
619
|
+
color?: string; // hex RGB e.g. "FFFFFF" or "#FFFFFF" (default: white)
|
|
620
|
+
outline?: number; // default: 2
|
|
621
|
+
shadow?: number; // default: 1
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
/**
|
|
625
|
+
* Convert #RRGGBB or RRGGBB hex to ASS &HAABBGGRR color (alpha=00=opaque).
|
|
626
|
+
*/
|
|
627
|
+
function hexToAssColor(hex: string): string {
|
|
628
|
+
const clean = hex.replace("#", "").padStart(6, "0");
|
|
629
|
+
const r = clean.slice(0, 2);
|
|
630
|
+
const g = clean.slice(2, 4);
|
|
631
|
+
const b = clean.slice(4, 6);
|
|
632
|
+
return `&H00${b}${g}${r}`.toUpperCase();
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
function formatAssTime(seconds: number): string {
|
|
636
|
+
const h = Math.floor(seconds / 3600);
|
|
637
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
638
|
+
const s = Math.floor(seconds % 60);
|
|
639
|
+
// ASS uses centiseconds (2 digits)
|
|
640
|
+
const cs = Math.round((seconds % 1) * 100);
|
|
641
|
+
return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(cs).padStart(2, "0")}`;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
// ---------------------------------------------------------------------------
|
|
645
|
+
// Confidence filtering helper
|
|
646
|
+
// ---------------------------------------------------------------------------
|
|
647
|
+
|
|
648
|
+
/**
|
|
649
|
+
* Rebuild transcript text with low-confidence words wrapped in [?..?] markers.
|
|
650
|
+
* Only applies to ElevenLabs transcripts that have logprob on words.
|
|
651
|
+
*
|
|
652
|
+
* @param words - word array with optional logprob
|
|
653
|
+
* @param threshold - confidence threshold 0.0–1.0 (default 0.7). Words below this are flagged.
|
|
654
|
+
*/
|
|
655
|
+
export function formatWithConfidence(
|
|
656
|
+
words: Array<{ text: string; logprob?: number }>,
|
|
657
|
+
threshold = 0.7
|
|
658
|
+
): string {
|
|
659
|
+
return words
|
|
660
|
+
.map((w) => {
|
|
661
|
+
if (w.logprob === undefined) return w.text;
|
|
662
|
+
const confidence = Math.exp(w.logprob);
|
|
663
|
+
return confidence < threshold ? `[?${w.text}?]` : w.text;
|
|
664
|
+
})
|
|
665
|
+
.join(" ");
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
export function toAss(
|
|
669
|
+
words: Array<{ text: string; start: number; end: number }>,
|
|
670
|
+
style: AssStyle = {}
|
|
671
|
+
): string {
|
|
672
|
+
if (words.length === 0) return "";
|
|
673
|
+
|
|
674
|
+
const fontName = style.fontName ?? "Arial";
|
|
675
|
+
const fontSize = style.fontSize ?? 20;
|
|
676
|
+
const primaryColor = hexToAssColor(style.color ?? "FFFFFF");
|
|
677
|
+
const outline = style.outline ?? 2;
|
|
678
|
+
const shadow = style.shadow ?? 1;
|
|
679
|
+
|
|
680
|
+
const scriptInfo = [
|
|
681
|
+
"[Script Info]",
|
|
682
|
+
"ScriptType: v4.00+",
|
|
683
|
+
"PlayResX: 384",
|
|
684
|
+
"PlayResY: 288",
|
|
685
|
+
"ScaledBorderAndShadow: yes",
|
|
686
|
+
"",
|
|
687
|
+
].join("\n");
|
|
688
|
+
|
|
689
|
+
const stylesSection = [
|
|
690
|
+
"[V4+ Styles]",
|
|
691
|
+
"Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding",
|
|
692
|
+
`Style: Default,${fontName},${fontSize},${primaryColor},&H000000FF,&H00000000,&H00000000,0,0,0,0,100,100,0,0,1,${outline},${shadow},2,10,10,10,1`,
|
|
693
|
+
"",
|
|
694
|
+
].join("\n");
|
|
695
|
+
|
|
696
|
+
const eventsHeader = [
|
|
697
|
+
"[Events]",
|
|
698
|
+
"Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text",
|
|
699
|
+
].join("\n");
|
|
700
|
+
|
|
701
|
+
const chunkSize = 10;
|
|
702
|
+
const dialogues: string[] = [];
|
|
703
|
+
|
|
704
|
+
for (let i = 0; i < words.length; i += chunkSize) {
|
|
705
|
+
const chunk = words.slice(i, i + chunkSize);
|
|
706
|
+
const start = formatAssTime(chunk[0].start);
|
|
707
|
+
const end = formatAssTime(chunk[chunk.length - 1].end);
|
|
708
|
+
const text = chunk.map((w) => w.text).join(" ");
|
|
709
|
+
dialogues.push(`Dialogue: 0,${start},${end},Default,,0,0,0,,${text}`);
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
return [scriptInfo, stylesSection, eventsHeader, ...dialogues].join("\n");
|
|
713
|
+
}
|