@hasna/microservices 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/index.js +7 -0
- package/bin/mcp.js +8 -1
- package/dist/index.js +7 -0
- package/microservices/microservice-transcriber/package.json +28 -0
- package/microservices/microservice-transcriber/src/cli/index.ts +1347 -0
- package/microservices/microservice-transcriber/src/db/annotations.ts +37 -0
- package/microservices/microservice-transcriber/src/db/database.ts +82 -0
- package/microservices/microservice-transcriber/src/db/migrations.ts +72 -0
- package/microservices/microservice-transcriber/src/db/transcripts.ts +395 -0
- package/microservices/microservice-transcriber/src/index.ts +43 -0
- package/microservices/microservice-transcriber/src/lib/config.ts +77 -0
- package/microservices/microservice-transcriber/src/lib/diff.ts +91 -0
- package/microservices/microservice-transcriber/src/lib/downloader.ts +570 -0
- package/microservices/microservice-transcriber/src/lib/feeds.ts +62 -0
- package/microservices/microservice-transcriber/src/lib/live.ts +94 -0
- package/microservices/microservice-transcriber/src/lib/notion.ts +129 -0
- package/microservices/microservice-transcriber/src/lib/providers.ts +713 -0
- package/microservices/microservice-transcriber/src/lib/summarizer.ts +147 -0
- package/microservices/microservice-transcriber/src/lib/translator.ts +75 -0
- package/microservices/microservice-transcriber/src/lib/webhook.ts +37 -0
- package/microservices/microservice-transcriber/src/mcp/index.ts +1070 -0
- package/microservices/microservice-transcriber/src/server/index.ts +199 -0
- package/package.json +1 -1
- package/microservices/microservice-invoices/dashboard/dist/assets/index-Bngq7FNM.css +0 -1
- package/microservices/microservice-invoices/dashboard/dist/assets/index-aHW4ARZR.js +0 -124
- package/microservices/microservice-invoices/dashboard/dist/index.html +0 -13
|
@@ -0,0 +1,1347 @@
|
|
|
1
|
+
#!/usr/bin/env bun
|
|
2
|
+
|
|
3
|
+
import { Command } from "commander";
|
|
4
|
+
import { writeFileSync } from "node:fs";
|
|
5
|
+
import {
|
|
6
|
+
createTranscript,
|
|
7
|
+
getTranscript,
|
|
8
|
+
updateTranscript,
|
|
9
|
+
deleteTranscript,
|
|
10
|
+
listTranscripts,
|
|
11
|
+
searchTranscripts,
|
|
12
|
+
countTranscripts,
|
|
13
|
+
renameSpeakers,
|
|
14
|
+
findBySourceUrl,
|
|
15
|
+
addTags,
|
|
16
|
+
removeTags,
|
|
17
|
+
getTags,
|
|
18
|
+
listAllTags,
|
|
19
|
+
listTranscriptsByTag,
|
|
20
|
+
searchWithContext,
|
|
21
|
+
type TranscriptProvider,
|
|
22
|
+
type TranscriptStatus,
|
|
23
|
+
type TranscriptSourceType,
|
|
24
|
+
} from "../db/transcripts.js";
|
|
25
|
+
import { prepareAudio, detectSourceType, getVideoInfo, downloadAudio, downloadVideo, createClip, isPlaylistUrl, getPlaylistUrls, type TrimOptions } from "../lib/downloader.js";
|
|
26
|
+
import { transcribeFile, checkProviders, toSrt, toVtt, toAss, toMarkdown, segmentByChapters, formatWithConfidence, estimateCost } from "../lib/providers.js";
|
|
27
|
+
import { getConfig, setConfig, resetConfig, CONFIG_DEFAULTS, CONFIG_KEYS, type ConfigKey } from "../lib/config.js";
|
|
28
|
+
import { summarizeText, extractHighlights, generateMeetingNotes, getDefaultSummaryProvider } from "../lib/summarizer.js";
|
|
29
|
+
import { translateText } from "../lib/translator.js";
|
|
30
|
+
import { fetchFeedEpisodes } from "../lib/feeds.js";
|
|
31
|
+
import { fireWebhook } from "../lib/webhook.js";
|
|
32
|
+
import { createAnnotation, listAnnotations, deleteAnnotation, formatTimestamp as fmtAnnoTs } from "../db/annotations.js";
|
|
33
|
+
import { pushToNotion } from "../lib/notion.js";
|
|
34
|
+
import { startLiveTranscription } from "../lib/live.js";
|
|
35
|
+
import { wordDiff, formatDiff, diffStats } from "../lib/diff.js";
|
|
36
|
+
|
|
37
|
+
const program = new Command();
|
|
38
|
+
|
|
39
|
+
program
|
|
40
|
+
.name("microservice-transcriber")
|
|
41
|
+
.description("Transcribe audio and video from files and URLs using ElevenLabs or OpenAI")
|
|
42
|
+
.version("0.0.1");
|
|
43
|
+
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
// transcribe
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
|
|
48
|
+
program
|
|
49
|
+
.command("transcribe <sources...>")
|
|
50
|
+
.description("Transcribe one or more files or URLs (YouTube, Vimeo, Wistia, etc.)")
|
|
51
|
+
.option("--provider <provider>", "Provider: elevenlabs or openai (uses config default)")
|
|
52
|
+
.option("--language <lang>", "Language code (e.g. en, fr, de). Auto-detected if omitted.")
|
|
53
|
+
.option("--title <title>", "Title (only used when transcribing a single source)")
|
|
54
|
+
.option("--start <seconds>", "Start time in seconds (trim audio before transcribing)", parseFloat)
|
|
55
|
+
.option("--end <seconds>", "End time in seconds (trim audio before transcribing)", parseFloat)
|
|
56
|
+
.option("--diarize", "Identify different speakers (ElevenLabs only)")
|
|
57
|
+
.option("--vocab <words>", "Custom vocabulary hints (comma-separated, e.g. 'Karpathy,MicroGPT,SABR')")
|
|
58
|
+
.option("--summarize", "Auto-summarize after transcription using AI")
|
|
59
|
+
.option("--force", "Re-transcribe even if URL was already transcribed")
|
|
60
|
+
.option("--json", "Output as JSON")
|
|
61
|
+
.action(async (rawSources: string[], opts) => {
|
|
62
|
+
let sources = rawSources;
|
|
63
|
+
const cfg = getConfig();
|
|
64
|
+
const provider = (opts.provider ?? cfg.defaultProvider) as TranscriptProvider;
|
|
65
|
+
const language = opts.language ?? (cfg.defaultLanguage !== "en" ? cfg.defaultLanguage : undefined);
|
|
66
|
+
const diarize = opts.diarize ?? cfg.diarize;
|
|
67
|
+
const available = checkProviders();
|
|
68
|
+
|
|
69
|
+
if (provider === "elevenlabs" && !available.elevenlabs) {
|
|
70
|
+
console.error("Error: ELEVENLABS_API_KEY is not set.");
|
|
71
|
+
process.exit(1);
|
|
72
|
+
}
|
|
73
|
+
if (provider === "openai" && !available.openai) {
|
|
74
|
+
console.error("Error: OPENAI_API_KEY is not set.");
|
|
75
|
+
process.exit(1);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (diarize && provider !== "elevenlabs") {
|
|
79
|
+
console.error("Warning: --diarize is only supported with ElevenLabs. Ignoring.");
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const trim: TrimOptions | undefined =
|
|
83
|
+
opts.start !== undefined || opts.end !== undefined
|
|
84
|
+
? { start: opts.start, end: opts.end }
|
|
85
|
+
: undefined;
|
|
86
|
+
|
|
87
|
+
// Expand playlist URLs into individual video URLs
|
|
88
|
+
const expandedSources: string[] = [];
|
|
89
|
+
for (const src of sources) {
|
|
90
|
+
if (isPlaylistUrl(src)) {
|
|
91
|
+
if (!opts.json) console.log(`Expanding playlist ${src}...`);
|
|
92
|
+
try {
|
|
93
|
+
const videos = await getPlaylistUrls(src);
|
|
94
|
+
if (!opts.json) console.log(` Found ${videos.length} video(s)`);
|
|
95
|
+
expandedSources.push(...videos.map((v) => v.url));
|
|
96
|
+
} catch (e) {
|
|
97
|
+
console.error(`Failed to expand playlist: ${e instanceof Error ? e.message : e}`);
|
|
98
|
+
expandedSources.push(src); // fallback: try as single video
|
|
99
|
+
}
|
|
100
|
+
} else {
|
|
101
|
+
expandedSources.push(src);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
sources = expandedSources;
|
|
105
|
+
|
|
106
|
+
const isBatch = sources.length > 1;
|
|
107
|
+
const results: Array<{ source: string; id: string; success: boolean; error?: string }> = [];
|
|
108
|
+
|
|
109
|
+
for (let i = 0; i < sources.length; i++) {
|
|
110
|
+
const source = sources[i];
|
|
111
|
+
if (!opts.json && isBatch) {
|
|
112
|
+
console.log(`\n[${i + 1}/${sources.length}] Transcribing ${source}...`);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Duplicate detection
|
|
116
|
+
if (!opts.force) {
|
|
117
|
+
const existing = findBySourceUrl(source);
|
|
118
|
+
if (existing) {
|
|
119
|
+
if (!opts.json) {
|
|
120
|
+
console.log(` Already transcribed: ${existing.id.slice(0, 8)} "${existing.title ?? source}". Use --force to re-transcribe.`);
|
|
121
|
+
}
|
|
122
|
+
results.push({ source, id: existing.id, success: true });
|
|
123
|
+
continue;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const sourceType = detectSourceType(source);
|
|
128
|
+
const record = createTranscript({
|
|
129
|
+
source_url: source,
|
|
130
|
+
source_type: sourceType,
|
|
131
|
+
provider,
|
|
132
|
+
language,
|
|
133
|
+
title: !isBatch ? opts.title : undefined,
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
if (!opts.json && !isBatch) {
|
|
137
|
+
const trimStr = trim ? ` [${trim.start ?? 0}s → ${trim.end ?? "end"}]` : "";
|
|
138
|
+
console.log(`Transcribing ${source} [${sourceType}]${trimStr} with ${provider}...`);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
updateTranscript(record.id, { status: "processing" });
|
|
142
|
+
|
|
143
|
+
let audio: Awaited<ReturnType<typeof prepareAudio>> | null = null;
|
|
144
|
+
try {
|
|
145
|
+
audio = await prepareAudio(source, trim);
|
|
146
|
+
|
|
147
|
+
if (!isBatch && !opts.title && audio.videoTitle) {
|
|
148
|
+
updateTranscript(record.id, { title: audio.videoTitle });
|
|
149
|
+
} else if (isBatch && audio.videoTitle) {
|
|
150
|
+
updateTranscript(record.id, { title: audio.videoTitle });
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const vocabList = [
|
|
154
|
+
...(cfg.vocab ?? []),
|
|
155
|
+
...(opts.vocab ? opts.vocab.split(",").map((v: string) => v.trim()) : []),
|
|
156
|
+
].filter(Boolean);
|
|
157
|
+
|
|
158
|
+
const result = await transcribeFile(audio.filePath, {
|
|
159
|
+
provider,
|
|
160
|
+
language,
|
|
161
|
+
diarize: diarize && provider === "elevenlabs",
|
|
162
|
+
vocab: vocabList.length > 0 ? vocabList : undefined,
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
const chapterSegments = audio.chapters.length > 0 && result.metadata.words
|
|
166
|
+
? segmentByChapters(result.metadata.words, audio.chapters)
|
|
167
|
+
: undefined;
|
|
168
|
+
|
|
169
|
+
const cost = result.duration_seconds ? estimateCost(provider, result.duration_seconds) : undefined;
|
|
170
|
+
|
|
171
|
+
const updated = updateTranscript(record.id, {
|
|
172
|
+
status: "completed",
|
|
173
|
+
transcript_text: result.text,
|
|
174
|
+
duration_seconds: result.duration_seconds ?? undefined,
|
|
175
|
+
word_count: result.text.split(/\s+/).filter(Boolean).length,
|
|
176
|
+
metadata: {
|
|
177
|
+
...result.metadata,
|
|
178
|
+
...(trim ? { trim_start: trim.start, trim_end: trim.end } : {}),
|
|
179
|
+
...(chapterSegments ? { chapters: chapterSegments } : {}),
|
|
180
|
+
...(cost !== undefined ? { cost_usd: cost } : {}),
|
|
181
|
+
},
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
// Auto-summarize if requested
|
|
185
|
+
if (opts.summarize && result.text) {
|
|
186
|
+
try {
|
|
187
|
+
if (!opts.json) process.stdout.write(" Summarizing...");
|
|
188
|
+
const summary = await summarizeText(result.text);
|
|
189
|
+
updateTranscript(record.id, {
|
|
190
|
+
metadata: { ...updated?.metadata, summary },
|
|
191
|
+
});
|
|
192
|
+
if (!opts.json) console.log(" done.");
|
|
193
|
+
} catch (e) {
|
|
194
|
+
if (!opts.json) console.error(` Warning: summarize failed — ${e instanceof Error ? e.message : e}`);
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Fire webhook
|
|
199
|
+
fireWebhook({
|
|
200
|
+
event: "transcription.completed", id: record.id, title: getTranscript(record.id)?.title ?? null,
|
|
201
|
+
status: "completed", source_url: source, provider, duration_seconds: result.duration_seconds,
|
|
202
|
+
word_count: result.text.split(/\s+/).filter(Boolean).length, timestamp: new Date().toISOString(),
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
results.push({ source, id: record.id, success: true });
|
|
206
|
+
|
|
207
|
+
if (opts.json && !isBatch) {
|
|
208
|
+
console.log(JSON.stringify(getTranscript(record.id), null, 2));
|
|
209
|
+
} else if (!opts.json && !isBatch) {
|
|
210
|
+
console.log(`\nTranscript ID: ${record.id}`);
|
|
211
|
+
console.log(`Duration: ${result.duration_seconds ? `${result.duration_seconds.toFixed(1)}s` : "unknown"}`);
|
|
212
|
+
console.log(`Language: ${result.language}`);
|
|
213
|
+
console.log(`\n--- Transcript ---\n`);
|
|
214
|
+
console.log(result.text);
|
|
215
|
+
} else if (!opts.json) {
|
|
216
|
+
console.log(` ✓ ${record.id.slice(0, 8)} — ${audio.videoTitle ?? source.slice(0, 60)}`);
|
|
217
|
+
}
|
|
218
|
+
} catch (error) {
|
|
219
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
220
|
+
updateTranscript(record.id, { status: "failed", error_message: msg });
|
|
221
|
+
fireWebhook({
|
|
222
|
+
event: "transcription.failed", id: record.id, title: null,
|
|
223
|
+
status: "failed", source_url: source, provider, duration_seconds: null,
|
|
224
|
+
word_count: null, timestamp: new Date().toISOString(),
|
|
225
|
+
});
|
|
226
|
+
results.push({ source, id: record.id, success: false, error: msg });
|
|
227
|
+
if (!opts.json) console.error(` ✗ ${msg}`);
|
|
228
|
+
} finally {
|
|
229
|
+
audio?.cleanup();
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
if (isBatch) {
|
|
234
|
+
const succeeded = results.filter((r) => r.success).length;
|
|
235
|
+
const failed = results.filter((r) => !r.success).length;
|
|
236
|
+
if (opts.json) {
|
|
237
|
+
console.log(JSON.stringify({ results, summary: { succeeded, failed, total: sources.length } }, null, 2));
|
|
238
|
+
} else {
|
|
239
|
+
console.log(`\nDone: ${succeeded} completed, ${failed} failed.`);
|
|
240
|
+
}
|
|
241
|
+
if (failed > 0) process.exit(1);
|
|
242
|
+
}
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
// ---------------------------------------------------------------------------
|
|
246
|
+
// live
|
|
247
|
+
// ---------------------------------------------------------------------------
|
|
248
|
+
|
|
249
|
+
program
|
|
250
|
+
.command("live")
|
|
251
|
+
.description("Transcribe from microphone in real-time (stop with Ctrl+C)")
|
|
252
|
+
.option("--provider <provider>", "Provider (uses config default)")
|
|
253
|
+
.option("--language <lang>", "Language code")
|
|
254
|
+
.option("--chunk-duration <seconds>", "Seconds per chunk (default: 30)", parseInt)
|
|
255
|
+
.option("--title <title>", "Title for the saved transcript")
|
|
256
|
+
.action(async (opts) => {
|
|
257
|
+
const cfg = getConfig();
|
|
258
|
+
const provider = (opts.provider ?? cfg.defaultProvider) as TranscriptProvider;
|
|
259
|
+
const available = checkProviders();
|
|
260
|
+
|
|
261
|
+
if (provider === "elevenlabs" && !available.elevenlabs) { console.error("ELEVENLABS_API_KEY not set."); process.exit(1); }
|
|
262
|
+
if (provider === "openai" && !available.openai) { console.error("OPENAI_API_KEY not set."); process.exit(1); }
|
|
263
|
+
if (provider === "deepgram" && !available.deepgram) { console.error("DEEPGRAM_API_KEY not set."); process.exit(1); }
|
|
264
|
+
|
|
265
|
+
console.log(`Live transcription with ${provider}. Press Ctrl+C to stop.\n`);
|
|
266
|
+
|
|
267
|
+
const session = startLiveTranscription({
|
|
268
|
+
provider,
|
|
269
|
+
language: opts.language,
|
|
270
|
+
chunkDurationSec: opts.chunkDuration ?? 30,
|
|
271
|
+
onChunk: (text, idx) => {
|
|
272
|
+
console.log(`[chunk ${idx + 1}] ${text}`);
|
|
273
|
+
},
|
|
274
|
+
onError: (err, idx) => {
|
|
275
|
+
console.error(`[chunk ${idx + 1}] Error: ${err.message}`);
|
|
276
|
+
},
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
// Handle Ctrl+C
|
|
280
|
+
process.on("SIGINT", async () => {
|
|
281
|
+
console.log("\nStopping...");
|
|
282
|
+
const result = await session.stop();
|
|
283
|
+
|
|
284
|
+
if (result.fullText) {
|
|
285
|
+
// Save to DB
|
|
286
|
+
const record = createTranscript({
|
|
287
|
+
source_url: "live:microphone",
|
|
288
|
+
source_type: "file",
|
|
289
|
+
provider,
|
|
290
|
+
language: opts.language,
|
|
291
|
+
title: opts.title ?? `Live recording ${new Date().toISOString().slice(0, 16)}`,
|
|
292
|
+
});
|
|
293
|
+
updateTranscript(record.id, {
|
|
294
|
+
status: "completed",
|
|
295
|
+
transcript_text: result.fullText,
|
|
296
|
+
word_count: result.fullText.split(/\s+/).filter(Boolean).length,
|
|
297
|
+
});
|
|
298
|
+
console.log(`\nSaved transcript: ${record.id}`);
|
|
299
|
+
console.log(`${result.chunks.length} chunk(s), ${result.fullText.split(/\s+/).length} words`);
|
|
300
|
+
}
|
|
301
|
+
process.exit(0);
|
|
302
|
+
});
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
// ---------------------------------------------------------------------------
|
|
306
|
+
// info
|
|
307
|
+
// ---------------------------------------------------------------------------
|
|
308
|
+
|
|
309
|
+
program
|
|
310
|
+
.command("info <url>")
|
|
311
|
+
.description("Fetch video metadata (title, duration, chapters) without downloading or transcribing")
|
|
312
|
+
.option("--json", "Output as JSON")
|
|
313
|
+
.action(async (url: string, opts) => {
|
|
314
|
+
try {
|
|
315
|
+
const info = await getVideoInfo(url);
|
|
316
|
+
|
|
317
|
+
if (opts.json) {
|
|
318
|
+
console.log(JSON.stringify(info, null, 2));
|
|
319
|
+
return;
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
console.log(`Title: ${info.title ?? "(unknown)"}`);
|
|
323
|
+
console.log(`Platform: ${info.platform ?? "(unknown)"}`);
|
|
324
|
+
console.log(`Uploader: ${info.uploader ?? "(unknown)"}`);
|
|
325
|
+
if (info.duration !== null) {
|
|
326
|
+
const m = Math.floor(info.duration / 60);
|
|
327
|
+
const s = Math.floor(info.duration % 60);
|
|
328
|
+
console.log(`Duration: ${m}m ${s}s (${info.duration}s)`);
|
|
329
|
+
}
|
|
330
|
+
if (info.upload_date) {
|
|
331
|
+
const d = info.upload_date;
|
|
332
|
+
console.log(`Uploaded: ${d.slice(0, 4)}-${d.slice(4, 6)}-${d.slice(6, 8)}`);
|
|
333
|
+
}
|
|
334
|
+
if (info.view_count !== null) {
|
|
335
|
+
console.log(`Views: ${info.view_count.toLocaleString()}`);
|
|
336
|
+
}
|
|
337
|
+
if (info.description) {
|
|
338
|
+
console.log(`Description: ${info.description.slice(0, 120)}${info.description.length > 120 ? "…" : ""}`);
|
|
339
|
+
}
|
|
340
|
+
if (info.chapters.length > 0) {
|
|
341
|
+
console.log(`\nChapters (${info.chapters.length}):`);
|
|
342
|
+
for (const ch of info.chapters) {
|
|
343
|
+
const m = Math.floor(ch.start_time / 60);
|
|
344
|
+
const s = Math.floor(ch.start_time % 60);
|
|
345
|
+
console.log(` ${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")} ${ch.title}`);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
} catch (error) {
|
|
349
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
350
|
+
console.error(`Error: ${msg}`);
|
|
351
|
+
process.exit(1);
|
|
352
|
+
}
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
// ---------------------------------------------------------------------------
|
|
356
|
+
// download
|
|
357
|
+
// ---------------------------------------------------------------------------
|
|
358
|
+
|
|
359
|
+
program
|
|
360
|
+
.command("download <url>")
|
|
361
|
+
.description("Download audio from a URL without transcribing")
|
|
362
|
+
.option("--format <fmt>", "Audio format: mp3 (default), m4a, wav", "mp3")
|
|
363
|
+
.option("--output <path>", "Output file path (overrides auto-naming)")
|
|
364
|
+
.option("--start <seconds>", "Start time in seconds", parseFloat)
|
|
365
|
+
.option("--end <seconds>", "End time in seconds", parseFloat)
|
|
366
|
+
.option("--json", "Output as JSON")
|
|
367
|
+
.action(async (url: string, opts) => {
|
|
368
|
+
if (!opts.json) console.log(`Downloading audio from ${url}...`);
|
|
369
|
+
|
|
370
|
+
try {
|
|
371
|
+
const trim = opts.start !== undefined || opts.end !== undefined
|
|
372
|
+
? { start: opts.start, end: opts.end }
|
|
373
|
+
: undefined;
|
|
374
|
+
|
|
375
|
+
const result = await downloadAudio(url, {
|
|
376
|
+
format: opts.format,
|
|
377
|
+
outputPath: opts.output,
|
|
378
|
+
trim,
|
|
379
|
+
});
|
|
380
|
+
|
|
381
|
+
if (opts.json) {
|
|
382
|
+
console.log(JSON.stringify(result, null, 2));
|
|
383
|
+
} else {
|
|
384
|
+
console.log(`Saved: ${result.filePath}`);
|
|
385
|
+
if (result.title) console.log(`Title: ${result.title}`);
|
|
386
|
+
if (result.duration) {
|
|
387
|
+
const m = Math.floor(result.duration / 60);
|
|
388
|
+
const s = Math.floor(result.duration % 60);
|
|
389
|
+
console.log(`Duration: ${m}m ${s}s`);
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
} catch (error) {
|
|
393
|
+
console.error(`Error: ${error instanceof Error ? error.message : String(error)}`);
|
|
394
|
+
process.exit(1);
|
|
395
|
+
}
|
|
396
|
+
});
|
|
397
|
+
|
|
398
|
+
// ---------------------------------------------------------------------------
|
|
399
|
+
// list
|
|
400
|
+
// ---------------------------------------------------------------------------
|
|
401
|
+
|
|
402
|
+
program
|
|
403
|
+
.command("list")
|
|
404
|
+
.description("List transcripts")
|
|
405
|
+
.option("--status <status>", "Filter by status: pending, processing, completed, failed")
|
|
406
|
+
.option("--provider <provider>", "Filter by provider: elevenlabs, openai")
|
|
407
|
+
.option("--source-type <type>", "Filter by source type: file, youtube, vimeo, wistia, url")
|
|
408
|
+
.option("--tag <tag>", "Filter by tag")
|
|
409
|
+
.option("--limit <n>", "Max results", "20")
|
|
410
|
+
.option("--offset <n>", "Offset", "0")
|
|
411
|
+
.option("--json", "Output as JSON")
|
|
412
|
+
.action((opts) => {
|
|
413
|
+
const transcripts = opts.tag
|
|
414
|
+
? listTranscriptsByTag(opts.tag, parseInt(opts.limit))
|
|
415
|
+
: listTranscripts({
|
|
416
|
+
status: opts.status as TranscriptStatus | undefined,
|
|
417
|
+
provider: opts.provider as TranscriptProvider | undefined,
|
|
418
|
+
source_type: opts.sourceType as TranscriptSourceType | undefined,
|
|
419
|
+
limit: parseInt(opts.limit),
|
|
420
|
+
offset: parseInt(opts.offset),
|
|
421
|
+
});
|
|
422
|
+
|
|
423
|
+
if (opts.json) {
|
|
424
|
+
console.log(JSON.stringify(transcripts, null, 2));
|
|
425
|
+
return;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
if (transcripts.length === 0) {
|
|
429
|
+
console.log("No transcripts found.");
|
|
430
|
+
return;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
for (const t of transcripts) {
|
|
434
|
+
const title = t.title || t.source_url?.slice(0, 60) || "(no source)";
|
|
435
|
+
const duration = t.duration_seconds ? ` [${t.duration_seconds.toFixed(0)}s]` : "";
|
|
436
|
+
const words = t.word_count ? ` ${t.word_count}w` : "";
|
|
437
|
+
console.log(`${t.id.slice(0, 8)} ${t.status.padEnd(12)} ${t.provider.padEnd(11)} ${t.source_type.padEnd(8)}${duration}${words} ${title}`);
|
|
438
|
+
}
|
|
439
|
+
});
|
|
440
|
+
|
|
441
|
+
// ---------------------------------------------------------------------------
|
|
442
|
+
// get
|
|
443
|
+
// ---------------------------------------------------------------------------
|
|
444
|
+
|
|
445
|
+
program
|
|
446
|
+
.command("get <id>")
|
|
447
|
+
.description("Get a transcript by ID")
|
|
448
|
+
.option("--show-confidence", "Flag low-confidence words with [?word?] markers (ElevenLabs only)")
|
|
449
|
+
.option("--confidence-threshold <n>", "Confidence threshold 0-1 (default 0.7)", parseFloat)
|
|
450
|
+
.option("--json", "Output as JSON")
|
|
451
|
+
.action((id: string, opts) => {
|
|
452
|
+
const t = getTranscript(id);
|
|
453
|
+
if (!t) {
|
|
454
|
+
console.error(`Transcript '${id}' not found.`);
|
|
455
|
+
process.exit(1);
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
if (opts.json) {
|
|
459
|
+
console.log(JSON.stringify(t, null, 2));
|
|
460
|
+
return;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
console.log(`ID: ${t.id}`);
|
|
464
|
+
console.log(`Title: ${t.title ?? "(none)"}`);
|
|
465
|
+
console.log(`Source: ${t.source_url ?? "(none)"} [${t.source_type}]`);
|
|
466
|
+
console.log(`Provider: ${t.provider}`);
|
|
467
|
+
console.log(`Language: ${t.language}`);
|
|
468
|
+
console.log(`Status: ${t.status}`);
|
|
469
|
+
if (t.duration_seconds) console.log(`Duration: ${t.duration_seconds.toFixed(1)}s`);
|
|
470
|
+
if (t.word_count) console.log(`Words: ${t.word_count}`);
|
|
471
|
+
console.log(`Created: ${t.created_at}`);
|
|
472
|
+
if (t.metadata?.cost_usd) console.log(`Cost: $${t.metadata.cost_usd.toFixed(4)}`);
|
|
473
|
+
const annos = listAnnotations(id);
|
|
474
|
+
if (annos.length > 0) {
|
|
475
|
+
console.log(`\n--- Annotations (${annos.length}) ---\n`);
|
|
476
|
+
for (const a of annos) console.log(` [${fmtAnnoTs(a.timestamp_sec)}] ${a.note}`);
|
|
477
|
+
}
|
|
478
|
+
if (t.error_message) console.log(`Error: ${t.error_message}`);
|
|
479
|
+
if (t.metadata?.summary) {
|
|
480
|
+
console.log(`\n--- Summary ---\n`);
|
|
481
|
+
console.log(t.metadata.summary);
|
|
482
|
+
}
|
|
483
|
+
if (t.metadata?.highlights && t.metadata.highlights.length > 0) {
|
|
484
|
+
console.log(`\n--- Highlights (${t.metadata.highlights.length}) ---\n`);
|
|
485
|
+
for (const h of t.metadata.highlights) {
|
|
486
|
+
console.log(` "${h.quote}"${h.speaker ? ` (${h.speaker})` : ""}`);
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
if (t.metadata?.chapters && t.metadata.chapters.length > 0) {
|
|
490
|
+
console.log(`\n--- Chapters (${t.metadata.chapters.length}) ---\n`);
|
|
491
|
+
for (const ch of t.metadata.chapters) {
|
|
492
|
+
const m = Math.floor(ch.start_time / 60);
|
|
493
|
+
const s = Math.floor(ch.start_time % 60);
|
|
494
|
+
console.log(`${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")} ${ch.title}`);
|
|
495
|
+
console.log(` ${ch.text.slice(0, 120)}${ch.text.length > 120 ? "…" : ""}`);
|
|
496
|
+
}
|
|
497
|
+
} else if (t.transcript_text) {
|
|
498
|
+
console.log(`\n--- Transcript ---\n`);
|
|
499
|
+
if (opts.showConfidence && t.metadata?.words?.length) {
|
|
500
|
+
console.log(formatWithConfidence(t.metadata.words, opts.confidenceThreshold ?? 0.7));
|
|
501
|
+
} else {
|
|
502
|
+
console.log(t.transcript_text);
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
});
|
|
506
|
+
|
|
507
|
+
// ---------------------------------------------------------------------------
|
|
508
|
+
// search
|
|
509
|
+
// ---------------------------------------------------------------------------
|
|
510
|
+
|
|
511
|
+
program
|
|
512
|
+
.command("search <query>")
|
|
513
|
+
.description("Search transcript text, titles, and source URLs")
|
|
514
|
+
.option("--context <n>", "Show N sentences of context around each match", parseInt)
|
|
515
|
+
.option("--json", "Output as JSON")
|
|
516
|
+
.action((query: string, opts) => {
|
|
517
|
+
if (opts.context !== undefined) {
|
|
518
|
+
// Contextual search with excerpts + timestamps
|
|
519
|
+
const matches = searchWithContext(query, opts.context);
|
|
520
|
+
if (opts.json) { console.log(JSON.stringify(matches, null, 2)); return; }
|
|
521
|
+
if (matches.length === 0) { console.log(`No transcripts matching '${query}'.`); return; }
|
|
522
|
+
console.log(`Found ${matches.length} match(es):\n`);
|
|
523
|
+
for (const m of matches) {
|
|
524
|
+
const ts = m.timestamp ? ` ${m.timestamp}` : "";
|
|
525
|
+
console.log(`${m.transcript_id.slice(0, 8)}${ts} ${m.title ?? "(untitled)"}`);
|
|
526
|
+
console.log(` ${m.excerpt}\n`);
|
|
527
|
+
}
|
|
528
|
+
return;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
const results = searchTranscripts(query);
|
|
532
|
+
if (opts.json) { console.log(JSON.stringify(results, null, 2)); return; }
|
|
533
|
+
if (results.length === 0) { console.log(`No transcripts matching '${query}'.`); return; }
|
|
534
|
+
console.log(`Found ${results.length} transcript(s):\n`);
|
|
535
|
+
for (const t of results) {
|
|
536
|
+
const title = t.title || t.source_url?.slice(0, 60) || "(no source)";
|
|
537
|
+
console.log(`${t.id.slice(0, 8)} ${title}`);
|
|
538
|
+
}
|
|
539
|
+
});
|
|
540
|
+
|
|
541
|
+
// ---------------------------------------------------------------------------
|
|
542
|
+
// retry
|
|
543
|
+
// ---------------------------------------------------------------------------
|
|
544
|
+
|
|
545
|
+
program
|
|
546
|
+
.command("retry <id>")
|
|
547
|
+
.description("Retry a failed or pending transcription (re-uses original source URL)")
|
|
548
|
+
.option("--provider <provider>", "Override provider: elevenlabs or openai")
|
|
549
|
+
.option("--diarize", "Identify different speakers (ElevenLabs only)")
|
|
550
|
+
.option("--json", "Output as JSON")
|
|
551
|
+
.action(async (id: string, opts) => {
|
|
552
|
+
const t = getTranscript(id);
|
|
553
|
+
if (!t) {
|
|
554
|
+
console.error(`Transcript '${id}' not found.`);
|
|
555
|
+
process.exit(1);
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
if (!t.source_url) {
|
|
559
|
+
console.error(`Transcript '${id}' has no source URL to retry from.`);
|
|
560
|
+
process.exit(1);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
const provider = (opts.provider as TranscriptProvider | undefined) ?? t.provider;
|
|
564
|
+
const providers = checkProviders();
|
|
565
|
+
|
|
566
|
+
if (provider === "elevenlabs" && !providers.elevenlabs) {
|
|
567
|
+
console.error("Error: ELEVENLABS_API_KEY is not set.");
|
|
568
|
+
process.exit(1);
|
|
569
|
+
}
|
|
570
|
+
if (provider === "openai" && !providers.openai) {
|
|
571
|
+
console.error("Error: OPENAI_API_KEY is not set.");
|
|
572
|
+
process.exit(1);
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
if (!opts.json) {
|
|
576
|
+
console.log(`Retrying transcript ${id} [${t.source_url}] with ${provider}...`);
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
updateTranscript(id, { status: "processing", error_message: null });
|
|
580
|
+
|
|
581
|
+
let audio: Awaited<ReturnType<typeof prepareAudio>> | null = null;
|
|
582
|
+
try {
|
|
583
|
+
const trim = t.metadata?.trim_start !== undefined || t.metadata?.trim_end !== undefined
|
|
584
|
+
? { start: t.metadata.trim_start, end: t.metadata.trim_end }
|
|
585
|
+
: undefined;
|
|
586
|
+
|
|
587
|
+
audio = await prepareAudio(t.source_url, trim);
|
|
588
|
+
const result = await transcribeFile(audio.filePath, {
|
|
589
|
+
provider,
|
|
590
|
+
language: t.language,
|
|
591
|
+
diarize: opts.diarize && provider === "elevenlabs",
|
|
592
|
+
});
|
|
593
|
+
|
|
594
|
+
const updated = updateTranscript(id, {
|
|
595
|
+
status: "completed",
|
|
596
|
+
transcript_text: result.text,
|
|
597
|
+
duration_seconds: result.duration_seconds ?? undefined,
|
|
598
|
+
word_count: result.text.split(/\s+/).filter(Boolean).length,
|
|
599
|
+
metadata: {
|
|
600
|
+
...result.metadata,
|
|
601
|
+
...(trim ? { trim_start: trim.start, trim_end: trim.end } : {}),
|
|
602
|
+
},
|
|
603
|
+
});
|
|
604
|
+
|
|
605
|
+
if (opts.json) {
|
|
606
|
+
console.log(JSON.stringify(updated, null, 2));
|
|
607
|
+
} else {
|
|
608
|
+
console.log(`\nRetry successful. Transcript ID: ${id}`);
|
|
609
|
+
console.log(result.text);
|
|
610
|
+
}
|
|
611
|
+
} catch (error) {
|
|
612
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
613
|
+
updateTranscript(id, { status: "failed", error_message: msg });
|
|
614
|
+
console.error(`Error: ${msg}`);
|
|
615
|
+
process.exit(1);
|
|
616
|
+
} finally {
|
|
617
|
+
audio?.cleanup();
|
|
618
|
+
}
|
|
619
|
+
});
|
|
620
|
+
|
|
621
|
+
// ---------------------------------------------------------------------------
|
|
622
|
+
// delete
|
|
623
|
+
// ---------------------------------------------------------------------------
|
|
624
|
+
|
|
625
|
+
program
|
|
626
|
+
.command("delete <id>")
|
|
627
|
+
.description("Delete a transcript")
|
|
628
|
+
.option("--json", "Output as JSON")
|
|
629
|
+
.action((id: string, opts) => {
|
|
630
|
+
const deleted = deleteTranscript(id);
|
|
631
|
+
if (!deleted) {
|
|
632
|
+
console.error(`Transcript '${id}' not found.`);
|
|
633
|
+
process.exit(1);
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
if (opts.json) {
|
|
637
|
+
console.log(JSON.stringify({ id, deleted: true }));
|
|
638
|
+
} else {
|
|
639
|
+
console.log(`Deleted transcript ${id}.`);
|
|
640
|
+
}
|
|
641
|
+
});
|
|
642
|
+
|
|
643
|
+
// ---------------------------------------------------------------------------
|
|
644
|
+
// export
|
|
645
|
+
// ---------------------------------------------------------------------------
|
|
646
|
+
|
|
647
|
+
program
|
|
648
|
+
.command("export <id>")
|
|
649
|
+
.description("Export a transcript in txt, srt, vtt, ass, md, or json format")
|
|
650
|
+
.option("--format <fmt>", "Format: txt (default), srt, vtt, ass, md, json", "txt")
|
|
651
|
+
.option("--output <file>", "Write to file instead of stdout")
|
|
652
|
+
.option("--font <name>", "Font name for ASS format", "Arial")
|
|
653
|
+
.option("--font-size <n>", "Font size for ASS format", "20")
|
|
654
|
+
.option("--color <hex>", "Text color hex for ASS format (default: FFFFFF)")
|
|
655
|
+
.option("--outline <n>", "Outline size for ASS format", "2")
|
|
656
|
+
.option("--shadow <n>", "Shadow size for ASS format", "1")
|
|
657
|
+
.option("--show-confidence", "Flag low-confidence words in txt output (ElevenLabs only)")
|
|
658
|
+
.option("--confidence-threshold <n>", "Confidence threshold 0-1 (default 0.7)", parseFloat)
|
|
659
|
+
.option("--to <service>", "Push to external service: notion")
|
|
660
|
+
.option("--page <id>", "Notion parent page ID (required with --to notion)")
|
|
661
|
+
.action(async (id: string, opts) => {
|
|
662
|
+
// Handle Notion export
|
|
663
|
+
if (opts.to === "notion") {
|
|
664
|
+
if (!opts.page) { console.error("--page <notion-page-id> is required for Notion export."); process.exit(1); }
|
|
665
|
+
const t = getTranscript(id);
|
|
666
|
+
if (!t) { console.error(`Transcript '${id}' not found.`); process.exit(1); }
|
|
667
|
+
if (t.status !== "completed") { console.error(`Transcript not completed.`); process.exit(1); }
|
|
668
|
+
try {
|
|
669
|
+
console.log("Pushing to Notion...");
|
|
670
|
+
const result = await pushToNotion(t, opts.page);
|
|
671
|
+
console.log(`Created Notion page: ${result.url}`);
|
|
672
|
+
} catch (e) { console.error(`Error: ${e instanceof Error ? e.message : e}`); process.exit(1); }
|
|
673
|
+
return;
|
|
674
|
+
}
|
|
675
|
+
const t = getTranscript(id);
|
|
676
|
+
if (!t) {
|
|
677
|
+
console.error(`Transcript '${id}' not found.`);
|
|
678
|
+
process.exit(1);
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
if (t.status !== "completed" || !t.transcript_text) {
|
|
682
|
+
console.error(`Transcript '${id}' is not completed (status: ${t.status}).`);
|
|
683
|
+
process.exit(1);
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
let output: string;
|
|
687
|
+
|
|
688
|
+
if (opts.format === "json") {
|
|
689
|
+
output = JSON.stringify(t, null, 2);
|
|
690
|
+
} else if (opts.format === "md") {
|
|
691
|
+
output = toMarkdown(t);
|
|
692
|
+
} else if (opts.format === "srt" || opts.format === "vtt" || opts.format === "ass") {
|
|
693
|
+
const words = t.metadata?.words ?? [];
|
|
694
|
+
if (words.length === 0) {
|
|
695
|
+
console.error(`No word-level timestamps available for ${opts.format.toUpperCase()} export.`);
|
|
696
|
+
process.exit(1);
|
|
697
|
+
}
|
|
698
|
+
if (opts.format === "vtt") output = toVtt(words);
|
|
699
|
+
else if (opts.format === "ass") output = toAss(words, {
|
|
700
|
+
fontName: opts.font,
|
|
701
|
+
fontSize: parseInt(opts.fontSize ?? "20"),
|
|
702
|
+
color: opts.color,
|
|
703
|
+
outline: parseInt(opts.outline ?? "2"),
|
|
704
|
+
shadow: parseInt(opts.shadow ?? "1"),
|
|
705
|
+
});
|
|
706
|
+
else output = toSrt(words);
|
|
707
|
+
} else {
|
|
708
|
+
// txt format — optionally apply confidence markers
|
|
709
|
+
if (opts.showConfidence && t.metadata?.words?.length) {
|
|
710
|
+
output = formatWithConfidence(t.metadata.words, opts.confidenceThreshold ?? 0.7);
|
|
711
|
+
} else {
|
|
712
|
+
output = t.transcript_text;
|
|
713
|
+
}
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
if (opts.output) {
|
|
717
|
+
writeFileSync(opts.output, output, "utf8");
|
|
718
|
+
console.log(`Exported to ${opts.output}`);
|
|
719
|
+
} else {
|
|
720
|
+
console.log(output);
|
|
721
|
+
}
|
|
722
|
+
});
|
|
723
|
+
|
|
724
|
+
// ---------------------------------------------------------------------------
|
|
725
|
+
// providers
|
|
726
|
+
// ---------------------------------------------------------------------------
|
|
727
|
+
|
|
728
|
+
program
|
|
729
|
+
.command("providers")
|
|
730
|
+
.description("Check which transcription providers are configured")
|
|
731
|
+
.option("--json", "Output as JSON")
|
|
732
|
+
.action((opts) => {
|
|
733
|
+
const available = checkProviders();
|
|
734
|
+
|
|
735
|
+
if (opts.json) {
|
|
736
|
+
console.log(JSON.stringify(available, null, 2));
|
|
737
|
+
return;
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
console.log(`elevenlabs ${available.elevenlabs ? "✓ configured (ELEVENLABS_API_KEY)" : "✗ ELEVENLABS_API_KEY not set"}`);
|
|
741
|
+
console.log(`openai ${available.openai ? "✓ configured (OPENAI_API_KEY)" : "✗ OPENAI_API_KEY not set"}`);
|
|
742
|
+
console.log(`deepgram ${available.deepgram ? "✓ configured (DEEPGRAM_API_KEY)" : "✗ DEEPGRAM_API_KEY not set"}`);
|
|
743
|
+
});
|
|
744
|
+
|
|
745
|
+
// ---------------------------------------------------------------------------
|
|
746
|
+
// tag
|
|
747
|
+
// ---------------------------------------------------------------------------
|
|
748
|
+
|
|
749
|
+
const tagCmd = program
|
|
750
|
+
.command("tag <id>")
|
|
751
|
+
.description("Manage tags on a transcript")
|
|
752
|
+
.option("--add <tags>", "Add comma-separated tags")
|
|
753
|
+
.option("--remove <tags>", "Remove comma-separated tags")
|
|
754
|
+
.option("--json", "Output as JSON")
|
|
755
|
+
.action((id: string, opts) => {
|
|
756
|
+
if (opts.add) {
|
|
757
|
+
const tags = addTags(id, opts.add.split(",").map((t: string) => t.trim()));
|
|
758
|
+
if (opts.json) { console.log(JSON.stringify({ id, tags })); }
|
|
759
|
+
else { console.log(`Tags: ${tags.join(", ")}`); }
|
|
760
|
+
} else if (opts.remove) {
|
|
761
|
+
const tags = removeTags(id, opts.remove.split(",").map((t: string) => t.trim()));
|
|
762
|
+
if (opts.json) { console.log(JSON.stringify({ id, tags })); }
|
|
763
|
+
else { console.log(`Tags: ${tags.join(", ") || "(none)"}`); }
|
|
764
|
+
} else {
|
|
765
|
+
const tags = getTags(id);
|
|
766
|
+
if (opts.json) { console.log(JSON.stringify({ id, tags })); }
|
|
767
|
+
else { console.log(`Tags: ${tags.join(", ") || "(none)"}`); }
|
|
768
|
+
}
|
|
769
|
+
});
|
|
770
|
+
|
|
771
|
+
program
|
|
772
|
+
.command("tags")
|
|
773
|
+
.description("List all tags with counts")
|
|
774
|
+
.option("--json", "Output as JSON")
|
|
775
|
+
.action((opts) => {
|
|
776
|
+
const tags = listAllTags();
|
|
777
|
+
if (opts.json) { console.log(JSON.stringify(tags, null, 2)); return; }
|
|
778
|
+
if (tags.length === 0) { console.log("No tags."); return; }
|
|
779
|
+
for (const t of tags) console.log(` ${t.tag.padEnd(20)} ${t.count}`);
|
|
780
|
+
});
|
|
781
|
+
|
|
782
|
+
// ---------------------------------------------------------------------------
|
|
783
|
+
// rename-speaker
|
|
784
|
+
// ---------------------------------------------------------------------------
|
|
785
|
+
|
|
786
|
+
program
|
|
787
|
+
.command("rename-speaker <id>")
|
|
788
|
+
.description("Rename speaker labels (e.g. 'Speaker 1' → 'Andrej Karpathy')")
|
|
789
|
+
.option("--from <name>", "Original speaker label (e.g. 'Speaker 1')")
|
|
790
|
+
.option("--to <name>", "New speaker name")
|
|
791
|
+
.option("--map <json>", 'Bulk rename JSON: \'{"Speaker 1":"Name","Speaker 2":"Name"}\'')
|
|
792
|
+
.option("--json", "Output as JSON")
|
|
793
|
+
.action((id: string, opts) => {
|
|
794
|
+
let mapping: Record<string, string> = {};
|
|
795
|
+
|
|
796
|
+
if (opts.map) {
|
|
797
|
+
try { mapping = JSON.parse(opts.map); } catch {
|
|
798
|
+
console.error("Invalid JSON for --map"); process.exit(1);
|
|
799
|
+
}
|
|
800
|
+
} else if (opts.from && opts.to) {
|
|
801
|
+
mapping[opts.from] = opts.to;
|
|
802
|
+
} else {
|
|
803
|
+
console.error("Provide --from/--to or --map"); process.exit(1);
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
const updated = renameSpeakers(id, mapping);
|
|
807
|
+
if (!updated) { console.error(`Transcript '${id}' not found.`); process.exit(1); }
|
|
808
|
+
|
|
809
|
+
if (opts.json) {
|
|
810
|
+
console.log(JSON.stringify(updated, null, 2));
|
|
811
|
+
} else {
|
|
812
|
+
console.log(`Renamed ${Object.keys(mapping).length} speaker(s) in ${id}.`);
|
|
813
|
+
for (const [from, to] of Object.entries(mapping)) {
|
|
814
|
+
console.log(` ${from} → ${to}`);
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
});
|
|
818
|
+
|
|
819
|
+
// ---------------------------------------------------------------------------
|
|
820
|
+
// translate
|
|
821
|
+
// ---------------------------------------------------------------------------
|
|
822
|
+
|
|
823
|
+
program
|
|
824
|
+
.command("translate <id>")
|
|
825
|
+
.description("Translate a completed transcript to another language, creating a new linked record")
|
|
826
|
+
.requiredOption("--to <lang>", "Target language code or name (e.g. fr, de, Spanish)")
|
|
827
|
+
.option("--provider <provider>", "AI provider: openai or anthropic (auto-detected from env)")
|
|
828
|
+
.option("--json", "Output as JSON")
|
|
829
|
+
.action(async (id: string, opts) => {
|
|
830
|
+
const t = getTranscript(id);
|
|
831
|
+
if (!t) { console.error(`Transcript '${id}' not found.`); process.exit(1); }
|
|
832
|
+
if (t.status !== "completed" || !t.transcript_text) {
|
|
833
|
+
console.error(`Transcript '${id}' is not completed.`); process.exit(1);
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
const provider = opts.provider ?? getDefaultSummaryProvider();
|
|
837
|
+
if (!provider) {
|
|
838
|
+
console.error("No AI provider configured. Set OPENAI_API_KEY or ANTHROPIC_API_KEY.");
|
|
839
|
+
process.exit(1);
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
if (!opts.json) console.log(`Translating to ${opts.to} with ${provider}...`);
|
|
843
|
+
|
|
844
|
+
try {
|
|
845
|
+
const translatedText = await translateText(t.transcript_text, opts.to, opts.provider);
|
|
846
|
+
|
|
847
|
+
// Create a new transcript record linked to the original
|
|
848
|
+
const newRecord = createTranscript({
|
|
849
|
+
source_url: t.source_url ?? `translated:${id}`,
|
|
850
|
+
source_type: "translated",
|
|
851
|
+
provider: t.provider,
|
|
852
|
+
language: opts.to,
|
|
853
|
+
title: t.title ? `${t.title} [${opts.to}]` : null,
|
|
854
|
+
source_transcript_id: id,
|
|
855
|
+
});
|
|
856
|
+
|
|
857
|
+
updateTranscript(newRecord.id, {
|
|
858
|
+
status: "completed",
|
|
859
|
+
transcript_text: translatedText,
|
|
860
|
+
word_count: translatedText.split(/\s+/).filter(Boolean).length,
|
|
861
|
+
metadata: { model: provider },
|
|
862
|
+
});
|
|
863
|
+
|
|
864
|
+
const result = getTranscript(newRecord.id)!;
|
|
865
|
+
|
|
866
|
+
if (opts.json) {
|
|
867
|
+
console.log(JSON.stringify(result, null, 2));
|
|
868
|
+
} else {
|
|
869
|
+
console.log(`\nTranslation ID: ${newRecord.id} (source: ${id})`);
|
|
870
|
+
console.log(`\n--- Translation (${opts.to}) ---\n`);
|
|
871
|
+
console.log(translatedText);
|
|
872
|
+
}
|
|
873
|
+
} catch (error) {
|
|
874
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
875
|
+
process.exit(1);
|
|
876
|
+
}
|
|
877
|
+
});
|
|
878
|
+
|
|
879
|
+
// ---------------------------------------------------------------------------
|
|
880
|
+
// summarize
|
|
881
|
+
// ---------------------------------------------------------------------------
|
|
882
|
+
|
|
883
|
+
program
|
|
884
|
+
.command("summarize <id>")
|
|
885
|
+
.description("Summarize a completed transcript using AI (OpenAI or Anthropic)")
|
|
886
|
+
.option("--provider <provider>", "Provider: openai or anthropic (auto-detected from env)")
|
|
887
|
+
.option("--json", "Output as JSON")
|
|
888
|
+
.action(async (id: string, opts) => {
|
|
889
|
+
const t = getTranscript(id);
|
|
890
|
+
if (!t) { console.error(`Transcript '${id}' not found.`); process.exit(1); }
|
|
891
|
+
if (t.status !== "completed" || !t.transcript_text) {
|
|
892
|
+
console.error(`Transcript '${id}' is not completed.`); process.exit(1);
|
|
893
|
+
}
|
|
894
|
+
|
|
895
|
+
const provider = opts.provider ?? getDefaultSummaryProvider();
|
|
896
|
+
if (!provider) {
|
|
897
|
+
console.error("No AI provider configured. Set OPENAI_API_KEY or ANTHROPIC_API_KEY.");
|
|
898
|
+
process.exit(1);
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
if (!opts.json) console.log(`Summarizing with ${provider}...`);
|
|
902
|
+
|
|
903
|
+
try {
|
|
904
|
+
const summary = await summarizeText(t.transcript_text, opts.provider);
|
|
905
|
+
const updated = updateTranscript(id, {
|
|
906
|
+
metadata: { ...t.metadata, summary },
|
|
907
|
+
});
|
|
908
|
+
|
|
909
|
+
if (opts.json) {
|
|
910
|
+
console.log(JSON.stringify({ id, summary }, null, 2));
|
|
911
|
+
} else {
|
|
912
|
+
console.log(`\n--- Summary ---\n`);
|
|
913
|
+
console.log(summary);
|
|
914
|
+
}
|
|
915
|
+
} catch (error) {
|
|
916
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
917
|
+
process.exit(1);
|
|
918
|
+
}
|
|
919
|
+
});
|
|
920
|
+
|
|
921
|
+
// ---------------------------------------------------------------------------
|
|
922
|
+
// diff
|
|
923
|
+
// ---------------------------------------------------------------------------
|
|
924
|
+
|
|
925
|
+
program
|
|
926
|
+
.command("diff <id1> <id2>")
|
|
927
|
+
.description("Compare two transcripts word-by-word")
|
|
928
|
+
.option("--json", "Output as JSON")
|
|
929
|
+
.action((id1: string, id2: string, opts) => {
|
|
930
|
+
const t1 = getTranscript(id1);
|
|
931
|
+
const t2 = getTranscript(id2);
|
|
932
|
+
if (!t1) { console.error(`Transcript '${id1}' not found.`); process.exit(1); }
|
|
933
|
+
if (!t2) { console.error(`Transcript '${id2}' not found.`); process.exit(1); }
|
|
934
|
+
if (!t1.transcript_text || !t2.transcript_text) {
|
|
935
|
+
console.error("Both transcripts must be completed."); process.exit(1);
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
const entries = wordDiff(t1.transcript_text, t2.transcript_text);
|
|
939
|
+
const stats = diffStats(entries);
|
|
940
|
+
|
|
941
|
+
if (opts.json) {
|
|
942
|
+
console.log(JSON.stringify({ id1, id2, stats, diff: entries }, null, 2));
|
|
943
|
+
return;
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
console.log(`Comparing:`);
|
|
947
|
+
console.log(` A: ${t1.title ?? id1} (${t1.provider})`);
|
|
948
|
+
console.log(` B: ${t2.title ?? id2} (${t2.provider})`);
|
|
949
|
+
console.log(`\nSimilarity: ${stats.similarity}%`);
|
|
950
|
+
console.log(`Equal: ${stats.equal} words | Added: ${stats.added} | Removed: ${stats.removed}\n`);
|
|
951
|
+
console.log(formatDiff(entries));
|
|
952
|
+
});
|
|
953
|
+
|
|
954
|
+
// ---------------------------------------------------------------------------
|
|
955
|
+
// clip
|
|
956
|
+
// ---------------------------------------------------------------------------
|
|
957
|
+
|
|
958
|
+
program
|
|
959
|
+
.command("clip <id>")
|
|
960
|
+
.description("Extract a video/audio clip with optional burned-in subtitles")
|
|
961
|
+
.requiredOption("--start <seconds>", "Start time in seconds", parseFloat)
|
|
962
|
+
.requiredOption("--end <seconds>", "End time in seconds", parseFloat)
|
|
963
|
+
.option("--output <path>", "Output file path (default: clip-{id}.mp4)")
|
|
964
|
+
.option("--no-subtitles", "Don't burn in subtitles")
|
|
965
|
+
.action(async (id: string, opts) => {
|
|
966
|
+
const t = getTranscript(id);
|
|
967
|
+
if (!t) { console.error(`Transcript '${id}' not found.`); process.exit(1); }
|
|
968
|
+
if (!t.source_url || t.source_type === "file") {
|
|
969
|
+
console.error("Clip extraction requires a URL source."); process.exit(1);
|
|
970
|
+
}
|
|
971
|
+
|
|
972
|
+
const outputPath = opts.output ?? `clip-${id.slice(0, 8)}.mp4`;
|
|
973
|
+
console.log(`Downloading video from ${t.source_url}...`);
|
|
974
|
+
|
|
975
|
+
let video: Awaited<ReturnType<typeof downloadVideo>> | null = null;
|
|
976
|
+
let subsFile: string | null = null;
|
|
977
|
+
try {
|
|
978
|
+
video = await downloadVideo(t.source_url);
|
|
979
|
+
|
|
980
|
+
// Generate ASS subtitles for the time range
|
|
981
|
+
if (opts.subtitles !== false && t.metadata?.words?.length) {
|
|
982
|
+
const rangeWords = t.metadata.words.filter((w) => w.start >= opts.start && w.end <= opts.end);
|
|
983
|
+
if (rangeWords.length > 0) {
|
|
984
|
+
const { toAss } = await import("../lib/providers.js");
|
|
985
|
+
// Offset words to start from 0 for the clip
|
|
986
|
+
const offsetWords = rangeWords.map((w) => ({ ...w, start: w.start - opts.start, end: w.end - opts.start }));
|
|
987
|
+
const assContent = toAss(offsetWords);
|
|
988
|
+
subsFile = `/tmp/transcriber-clip-subs-${crypto.randomUUID()}.ass`;
|
|
989
|
+
const { writeFileSync } = await import("node:fs");
|
|
990
|
+
writeFileSync(subsFile, assContent, "utf8");
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
console.log(`Creating clip [${opts.start}s → ${opts.end}s]...`);
|
|
995
|
+
await createClip({
|
|
996
|
+
videoPath: video.path,
|
|
997
|
+
start: opts.start,
|
|
998
|
+
end: opts.end,
|
|
999
|
+
subtitlePath: subsFile ?? undefined,
|
|
1000
|
+
outputPath,
|
|
1001
|
+
});
|
|
1002
|
+
|
|
1003
|
+
console.log(`Saved: ${outputPath}`);
|
|
1004
|
+
} catch (error) {
|
|
1005
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
1006
|
+
process.exit(1);
|
|
1007
|
+
} finally {
|
|
1008
|
+
video?.cleanup();
|
|
1009
|
+
if (subsFile) { try { const { unlinkSync } = await import("node:fs"); unlinkSync(subsFile); } catch {} }
|
|
1010
|
+
}
|
|
1011
|
+
});
|
|
1012
|
+
|
|
1013
|
+
// ---------------------------------------------------------------------------
|
|
1014
|
+
// meeting-notes
|
|
1015
|
+
// ---------------------------------------------------------------------------
|
|
1016
|
+
|
|
1017
|
+
program
|
|
1018
|
+
.command("meeting-notes <id>")
|
|
1019
|
+
.description("Generate structured meeting notes from a transcript using AI")
|
|
1020
|
+
.option("--provider <provider>", "AI provider: openai or anthropic")
|
|
1021
|
+
.option("--json", "Output as JSON")
|
|
1022
|
+
.action(async (id: string, opts) => {
|
|
1023
|
+
const t = getTranscript(id);
|
|
1024
|
+
if (!t) { console.error(`Transcript '${id}' not found.`); process.exit(1); }
|
|
1025
|
+
if (t.status !== "completed" || !t.transcript_text) {
|
|
1026
|
+
console.error(`Transcript '${id}' is not completed.`); process.exit(1);
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
const provider = opts.provider ?? getDefaultSummaryProvider();
|
|
1030
|
+
if (!provider) { console.error("No AI provider. Set OPENAI_API_KEY or ANTHROPIC_API_KEY."); process.exit(1); }
|
|
1031
|
+
|
|
1032
|
+
if (!opts.json) console.log(`Generating meeting notes with ${provider}...`);
|
|
1033
|
+
|
|
1034
|
+
try {
|
|
1035
|
+
const notes = await generateMeetingNotes(t.transcript_text, opts.provider);
|
|
1036
|
+
updateTranscript(id, { metadata: { ...t.metadata, meeting_notes: notes } });
|
|
1037
|
+
|
|
1038
|
+
if (opts.json) {
|
|
1039
|
+
console.log(JSON.stringify({ id, meeting_notes: notes }, null, 2));
|
|
1040
|
+
} else {
|
|
1041
|
+
console.log(`\n${notes}`);
|
|
1042
|
+
}
|
|
1043
|
+
} catch (error) {
|
|
1044
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
1045
|
+
process.exit(1);
|
|
1046
|
+
}
|
|
1047
|
+
});
|
|
1048
|
+
|
|
1049
|
+
// ---------------------------------------------------------------------------
|
|
1050
|
+
// highlights
|
|
1051
|
+
// ---------------------------------------------------------------------------
|
|
1052
|
+
|
|
1053
|
+
program
|
|
1054
|
+
.command("highlights <id>")
|
|
1055
|
+
.description("Extract 5-10 key moments/quotes from a transcript using AI")
|
|
1056
|
+
.option("--provider <provider>", "AI provider: openai or anthropic")
|
|
1057
|
+
.option("--json", "Output as JSON")
|
|
1058
|
+
.action(async (id: string, opts) => {
|
|
1059
|
+
const t = getTranscript(id);
|
|
1060
|
+
if (!t) { console.error(`Transcript '${id}' not found.`); process.exit(1); }
|
|
1061
|
+
if (t.status !== "completed" || !t.transcript_text) {
|
|
1062
|
+
console.error(`Transcript '${id}' is not completed.`); process.exit(1);
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
const provider = opts.provider ?? getDefaultSummaryProvider();
|
|
1066
|
+
if (!provider) { console.error("No AI provider. Set OPENAI_API_KEY or ANTHROPIC_API_KEY."); process.exit(1); }
|
|
1067
|
+
|
|
1068
|
+
if (!opts.json) console.log(`Extracting highlights with ${provider}...`);
|
|
1069
|
+
|
|
1070
|
+
try {
|
|
1071
|
+
const highlights = await extractHighlights(t.transcript_text, opts.provider);
|
|
1072
|
+
updateTranscript(id, { metadata: { ...t.metadata, highlights } });
|
|
1073
|
+
|
|
1074
|
+
if (opts.json) {
|
|
1075
|
+
console.log(JSON.stringify({ id, highlights }, null, 2));
|
|
1076
|
+
} else {
|
|
1077
|
+
console.log(`\n--- ${highlights.length} Key Moments ---\n`);
|
|
1078
|
+
for (let i = 0; i < highlights.length; i++) {
|
|
1079
|
+
const h = highlights[i];
|
|
1080
|
+
const speaker = h.speaker ? ` (${h.speaker})` : "";
|
|
1081
|
+
console.log(`${i + 1}. "${h.quote}"${speaker}`);
|
|
1082
|
+
console.log(` ${h.context}\n`);
|
|
1083
|
+
}
|
|
1084
|
+
}
|
|
1085
|
+
} catch (error) {
|
|
1086
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
1087
|
+
process.exit(1);
|
|
1088
|
+
}
|
|
1089
|
+
});
|
|
1090
|
+
|
|
1091
|
+
// ---------------------------------------------------------------------------
|
|
1092
|
+
// stats
|
|
1093
|
+
// ---------------------------------------------------------------------------
|
|
1094
|
+
|
|
1095
|
+
program
|
|
1096
|
+
.command("stats")
|
|
1097
|
+
.description("Show transcript counts by status, provider, and costs")
|
|
1098
|
+
.option("--json", "Output as JSON")
|
|
1099
|
+
.action((opts) => {
|
|
1100
|
+
const counts = countTranscripts();
|
|
1101
|
+
|
|
1102
|
+
// Calculate total cost across all transcripts
|
|
1103
|
+
const allTranscripts = listTranscripts({ limit: 10000 });
|
|
1104
|
+
const totalCost = allTranscripts.reduce((sum, t) => sum + (t.metadata?.cost_usd ?? 0), 0);
|
|
1105
|
+
const costByProvider: Record<string, number> = {};
|
|
1106
|
+
for (const t of allTranscripts) {
|
|
1107
|
+
if (t.metadata?.cost_usd) {
|
|
1108
|
+
costByProvider[t.provider] = (costByProvider[t.provider] ?? 0) + t.metadata.cost_usd;
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
if (opts.json) {
|
|
1113
|
+
console.log(JSON.stringify({ ...counts, total_cost_usd: totalCost, cost_by_provider: costByProvider }, null, 2));
|
|
1114
|
+
return;
|
|
1115
|
+
}
|
|
1116
|
+
|
|
1117
|
+
console.log(`Total: ${counts.total}`);
|
|
1118
|
+
console.log("\nBy status:");
|
|
1119
|
+
for (const [status, n] of Object.entries(counts.by_status)) {
|
|
1120
|
+
console.log(` ${status.padEnd(12)} ${n}`);
|
|
1121
|
+
}
|
|
1122
|
+
console.log("\nBy provider:");
|
|
1123
|
+
for (const [provider, n] of Object.entries(counts.by_provider)) {
|
|
1124
|
+
const cost = costByProvider[provider];
|
|
1125
|
+
const costStr = cost ? ` ($${cost.toFixed(4)})` : "";
|
|
1126
|
+
console.log(` ${provider.padEnd(12)} ${n}${costStr}`);
|
|
1127
|
+
}
|
|
1128
|
+
if (totalCost > 0) {
|
|
1129
|
+
console.log(`\nTotal cost: $${totalCost.toFixed(4)}`);
|
|
1130
|
+
}
|
|
1131
|
+
});
|
|
1132
|
+
|
|
1133
|
+
// ---------------------------------------------------------------------------
|
|
1134
|
+
// annotate
|
|
1135
|
+
// ---------------------------------------------------------------------------
|
|
1136
|
+
|
|
1137
|
+
const annoCmd = program
|
|
1138
|
+
.command("annotate")
|
|
1139
|
+
.description("Manage timestamped annotations/bookmarks on transcripts");
|
|
1140
|
+
|
|
1141
|
+
annoCmd
|
|
1142
|
+
.command("add <transcript-id>")
|
|
1143
|
+
.description("Add an annotation at a timestamp")
|
|
1144
|
+
.requiredOption("--at <seconds>", "Timestamp in seconds", parseFloat)
|
|
1145
|
+
.requiredOption("--note <text>", "Annotation note")
|
|
1146
|
+
.option("--json", "Output as JSON")
|
|
1147
|
+
.action((transcriptId: string, opts) => {
|
|
1148
|
+
const anno = createAnnotation(transcriptId, opts.at, opts.note);
|
|
1149
|
+
if (opts.json) { console.log(JSON.stringify(anno, null, 2)); }
|
|
1150
|
+
else { console.log(`Added annotation at ${fmtAnnoTs(opts.at)}: ${opts.note} (${anno.id.slice(0, 8)})`); }
|
|
1151
|
+
});
|
|
1152
|
+
|
|
1153
|
+
annoCmd
|
|
1154
|
+
.command("list <transcript-id>")
|
|
1155
|
+
.description("List all annotations for a transcript")
|
|
1156
|
+
.option("--json", "Output as JSON")
|
|
1157
|
+
.action((transcriptId: string, opts) => {
|
|
1158
|
+
const annos = listAnnotations(transcriptId);
|
|
1159
|
+
if (opts.json) { console.log(JSON.stringify(annos, null, 2)); return; }
|
|
1160
|
+
if (annos.length === 0) { console.log("No annotations."); return; }
|
|
1161
|
+
for (const a of annos) {
|
|
1162
|
+
console.log(` [${fmtAnnoTs(a.timestamp_sec)}] ${a.note} (${a.id.slice(0, 8)})`);
|
|
1163
|
+
}
|
|
1164
|
+
});
|
|
1165
|
+
|
|
1166
|
+
annoCmd
|
|
1167
|
+
.command("remove <id>")
|
|
1168
|
+
.description("Delete an annotation by ID")
|
|
1169
|
+
.action((id: string) => {
|
|
1170
|
+
if (deleteAnnotation(id)) console.log(`Deleted annotation ${id}.`);
|
|
1171
|
+
else { console.error("Annotation not found."); process.exit(1); }
|
|
1172
|
+
});
|
|
1173
|
+
|
|
1174
|
+
// ---------------------------------------------------------------------------
|
|
1175
|
+
// watch-feed
|
|
1176
|
+
// ---------------------------------------------------------------------------
|
|
1177
|
+
|
|
1178
|
+
const feedCmd = program
|
|
1179
|
+
.command("feed")
|
|
1180
|
+
.description("Manage podcast RSS feeds for auto-transcription");
|
|
1181
|
+
|
|
1182
|
+
feedCmd
|
|
1183
|
+
.command("add <url>")
|
|
1184
|
+
.description("Add a podcast RSS feed to watch")
|
|
1185
|
+
.action(async (url: string) => {
|
|
1186
|
+
try {
|
|
1187
|
+
const { feedTitle } = await fetchFeedEpisodes(url);
|
|
1188
|
+
const cfg = getConfig();
|
|
1189
|
+
if (cfg.feeds.some((f) => f.url === url)) {
|
|
1190
|
+
console.log(`Feed already added: ${feedTitle ?? url}`);
|
|
1191
|
+
return;
|
|
1192
|
+
}
|
|
1193
|
+
cfg.feeds.push({ url, title: feedTitle, lastChecked: null });
|
|
1194
|
+
setConfig({ feeds: cfg.feeds });
|
|
1195
|
+
console.log(`Added feed: ${feedTitle ?? url}`);
|
|
1196
|
+
} catch (e) {
|
|
1197
|
+
console.error(`Failed to add feed: ${e instanceof Error ? e.message : e}`);
|
|
1198
|
+
process.exit(1);
|
|
1199
|
+
}
|
|
1200
|
+
});
|
|
1201
|
+
|
|
1202
|
+
feedCmd
|
|
1203
|
+
.command("list")
|
|
1204
|
+
.description("List watched feeds")
|
|
1205
|
+
.option("--json", "Output as JSON")
|
|
1206
|
+
.action((opts) => {
|
|
1207
|
+
const cfg = getConfig();
|
|
1208
|
+
if (opts.json) { console.log(JSON.stringify(cfg.feeds, null, 2)); return; }
|
|
1209
|
+
if (cfg.feeds.length === 0) { console.log("No feeds."); return; }
|
|
1210
|
+
for (const f of cfg.feeds) {
|
|
1211
|
+
const checked = f.lastChecked ? ` (last checked: ${f.lastChecked})` : " (never checked)";
|
|
1212
|
+
console.log(` ${f.title ?? f.url}${checked}`);
|
|
1213
|
+
console.log(` ${f.url}`);
|
|
1214
|
+
}
|
|
1215
|
+
});
|
|
1216
|
+
|
|
1217
|
+
feedCmd
|
|
1218
|
+
.command("check")
|
|
1219
|
+
.description("Check all feeds for new episodes and transcribe them")
|
|
1220
|
+
.option("--provider <provider>", "Provider override")
|
|
1221
|
+
.option("--dry-run", "List new episodes without transcribing")
|
|
1222
|
+
.option("--json", "Output as JSON")
|
|
1223
|
+
.action(async (opts) => {
|
|
1224
|
+
const cfg = getConfig();
|
|
1225
|
+
if (cfg.feeds.length === 0) { console.log("No feeds to check."); return; }
|
|
1226
|
+
|
|
1227
|
+
const provider = (opts.provider ?? cfg.defaultProvider) as TranscriptProvider;
|
|
1228
|
+
let totalNew = 0;
|
|
1229
|
+
|
|
1230
|
+
for (const feed of cfg.feeds) {
|
|
1231
|
+
console.log(`Checking ${feed.title ?? feed.url}...`);
|
|
1232
|
+
try {
|
|
1233
|
+
const { episodes } = await fetchFeedEpisodes(feed.url);
|
|
1234
|
+
const newEpisodes = episodes.filter((ep) => !findBySourceUrl(ep.url));
|
|
1235
|
+
|
|
1236
|
+
if (newEpisodes.length === 0) {
|
|
1237
|
+
console.log(" No new episodes.");
|
|
1238
|
+
} else {
|
|
1239
|
+
console.log(` ${newEpisodes.length} new episode(s)`);
|
|
1240
|
+
totalNew += newEpisodes.length;
|
|
1241
|
+
|
|
1242
|
+
if (!opts.dryRun) {
|
|
1243
|
+
for (const ep of newEpisodes) {
|
|
1244
|
+
console.log(` Transcribing: ${ep.title ?? ep.url}`);
|
|
1245
|
+
const record = createTranscript({
|
|
1246
|
+
source_url: ep.url,
|
|
1247
|
+
source_type: "url",
|
|
1248
|
+
provider,
|
|
1249
|
+
title: ep.title,
|
|
1250
|
+
});
|
|
1251
|
+
updateTranscript(record.id, { status: "processing" });
|
|
1252
|
+
let audio: Awaited<ReturnType<typeof prepareAudio>> | null = null;
|
|
1253
|
+
try {
|
|
1254
|
+
audio = await prepareAudio(ep.url);
|
|
1255
|
+
const result = await transcribeFile(audio.filePath, { provider });
|
|
1256
|
+
updateTranscript(record.id, {
|
|
1257
|
+
status: "completed",
|
|
1258
|
+
transcript_text: result.text,
|
|
1259
|
+
duration_seconds: result.duration_seconds ?? undefined,
|
|
1260
|
+
word_count: result.text.split(/\s+/).filter(Boolean).length,
|
|
1261
|
+
metadata: result.metadata,
|
|
1262
|
+
});
|
|
1263
|
+
console.log(` ✓ ${record.id.slice(0, 8)}`);
|
|
1264
|
+
} catch (e) {
|
|
1265
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
1266
|
+
updateTranscript(record.id, { status: "failed", error_message: msg });
|
|
1267
|
+
console.error(` ✗ ${msg}`);
|
|
1268
|
+
} finally {
|
|
1269
|
+
audio?.cleanup();
|
|
1270
|
+
}
|
|
1271
|
+
}
|
|
1272
|
+
}
|
|
1273
|
+
}
|
|
1274
|
+
|
|
1275
|
+
// Update lastChecked
|
|
1276
|
+
feed.lastChecked = new Date().toISOString();
|
|
1277
|
+
} catch (e) {
|
|
1278
|
+
console.error(` Error: ${e instanceof Error ? e.message : e}`);
|
|
1279
|
+
}
|
|
1280
|
+
}
|
|
1281
|
+
|
|
1282
|
+
setConfig({ feeds: cfg.feeds });
|
|
1283
|
+
console.log(`\nDone. ${totalNew} new episode(s) found.`);
|
|
1284
|
+
});
|
|
1285
|
+
|
|
1286
|
+
feedCmd
|
|
1287
|
+
.command("remove <url>")
|
|
1288
|
+
.description("Remove a feed")
|
|
1289
|
+
.action((url: string) => {
|
|
1290
|
+
const cfg = getConfig();
|
|
1291
|
+
cfg.feeds = cfg.feeds.filter((f) => f.url !== url);
|
|
1292
|
+
setConfig({ feeds: cfg.feeds });
|
|
1293
|
+
console.log(`Removed feed: ${url}`);
|
|
1294
|
+
});
|
|
1295
|
+
|
|
1296
|
+
// ---------------------------------------------------------------------------
|
|
1297
|
+
// config
|
|
1298
|
+
// ---------------------------------------------------------------------------
|
|
1299
|
+
|
|
1300
|
+
const configCmd = program
|
|
1301
|
+
.command("config")
|
|
1302
|
+
.description("View or change persistent configuration defaults");
|
|
1303
|
+
|
|
1304
|
+
configCmd
|
|
1305
|
+
.command("view")
|
|
1306
|
+
.description("Show current config")
|
|
1307
|
+
.option("--json", "Output as JSON")
|
|
1308
|
+
.action((opts) => {
|
|
1309
|
+
const cfg = getConfig();
|
|
1310
|
+
if (opts.json) {
|
|
1311
|
+
console.log(JSON.stringify(cfg, null, 2));
|
|
1312
|
+
return;
|
|
1313
|
+
}
|
|
1314
|
+
console.log(`defaultProvider ${cfg.defaultProvider}`);
|
|
1315
|
+
console.log(`defaultLanguage ${cfg.defaultLanguage}`);
|
|
1316
|
+
console.log(`defaultFormat ${cfg.defaultFormat}`);
|
|
1317
|
+
console.log(`diarize ${cfg.diarize}`);
|
|
1318
|
+
console.log(`vocab ${cfg.vocab?.length ? cfg.vocab.join(", ") : "(none)"}`);
|
|
1319
|
+
});
|
|
1320
|
+
|
|
1321
|
+
configCmd
|
|
1322
|
+
.command("set <key> <value>")
|
|
1323
|
+
.description(`Set a config value. Keys: ${CONFIG_KEYS.join(", ")}`)
|
|
1324
|
+
.action((key: string, value: string) => {
|
|
1325
|
+
if (!(CONFIG_KEYS as readonly string[]).includes(key)) {
|
|
1326
|
+
console.error(`Unknown config key: '${key}'. Valid keys: ${CONFIG_KEYS.join(", ")}`);
|
|
1327
|
+
process.exit(1);
|
|
1328
|
+
}
|
|
1329
|
+
|
|
1330
|
+
const k = key as ConfigKey;
|
|
1331
|
+
let parsed: unknown = value;
|
|
1332
|
+
if (k === "diarize") parsed = value === "true";
|
|
1333
|
+
if (k === "vocab") parsed = value.split(",").map((v: string) => v.trim()).filter(Boolean);
|
|
1334
|
+
|
|
1335
|
+
const updated = setConfig({ [k]: parsed } as Partial<typeof CONFIG_DEFAULTS>);
|
|
1336
|
+
console.log(`Set ${key} = ${updated[k]}`);
|
|
1337
|
+
});
|
|
1338
|
+
|
|
1339
|
+
configCmd
|
|
1340
|
+
.command("reset")
|
|
1341
|
+
.description("Reset all config to defaults")
|
|
1342
|
+
.action(() => {
|
|
1343
|
+
resetConfig();
|
|
1344
|
+
console.log("Config reset to defaults.");
|
|
1345
|
+
});
|
|
1346
|
+
|
|
1347
|
+
program.parse();
|