@hasna/microservices 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/microservices/microservice-transcriber/src/cli/index.ts +247 -1
- package/microservices/microservice-transcriber/src/db/comments.ts +166 -0
- package/microservices/microservice-transcriber/src/db/migrations.ts +46 -0
- package/microservices/microservice-transcriber/src/db/proofread.ts +119 -0
- package/microservices/microservice-transcriber/src/lib/downloader.ts +68 -0
- package/microservices/microservice-transcriber/src/lib/proofread.ts +296 -0
- package/microservices/microservice-transcriber/src/mcp/index.ts +263 -3
- package/package.json +1 -1
|
@@ -22,7 +22,8 @@ import {
|
|
|
22
22
|
type TranscriptStatus,
|
|
23
23
|
type TranscriptSourceType,
|
|
24
24
|
} from "../db/transcripts.js";
|
|
25
|
-
import { prepareAudio, detectSourceType, getVideoInfo, downloadAudio, downloadVideo, createClip, isPlaylistUrl, getPlaylistUrls, type TrimOptions } from "../lib/downloader.js";
|
|
25
|
+
import { prepareAudio, detectSourceType, getVideoInfo, downloadAudio, downloadVideo, createClip, isPlaylistUrl, getPlaylistUrls, fetchComments, type TrimOptions } from "../lib/downloader.js";
|
|
26
|
+
import { createComment, listComments, searchComments, getCommentStats, getTopComments, importComments } from "../db/comments.js";
|
|
26
27
|
import { transcribeFile, checkProviders, toSrt, toVtt, toAss, toMarkdown, segmentByChapters, formatWithConfidence, estimateCost } from "../lib/providers.js";
|
|
27
28
|
import { getConfig, setConfig, resetConfig, CONFIG_DEFAULTS, CONFIG_KEYS, type ConfigKey } from "../lib/config.js";
|
|
28
29
|
import { summarizeText, extractHighlights, generateMeetingNotes, getDefaultSummaryProvider } from "../lib/summarizer.js";
|
|
@@ -33,6 +34,7 @@ import { createAnnotation, listAnnotations, deleteAnnotation, formatTimestamp as
|
|
|
33
34
|
import { pushToNotion } from "../lib/notion.js";
|
|
34
35
|
import { startLiveTranscription } from "../lib/live.js";
|
|
35
36
|
import { wordDiff, formatDiff, diffStats } from "../lib/diff.js";
|
|
37
|
+
import { proofreadTranscript, listIssues, applySuggestion, dismissIssue, getProofreadStats, exportAnnotated, type IssueType } from "../lib/proofread.js";
|
|
36
38
|
|
|
37
39
|
const program = new Command();
|
|
38
40
|
|
|
@@ -56,6 +58,7 @@ program
|
|
|
56
58
|
.option("--diarize", "Identify different speakers (ElevenLabs only)")
|
|
57
59
|
.option("--vocab <words>", "Custom vocabulary hints (comma-separated, e.g. 'Karpathy,MicroGPT,SABR')")
|
|
58
60
|
.option("--summarize", "Auto-summarize after transcription using AI")
|
|
61
|
+
.option("--comments", "Also fetch and store YouTube/Vimeo comments")
|
|
59
62
|
.option("--force", "Re-transcribe even if URL was already transcribed")
|
|
60
63
|
.option("--json", "Output as JSON")
|
|
61
64
|
.action(async (rawSources: string[], opts) => {
|
|
@@ -202,6 +205,33 @@ program
|
|
|
202
205
|
word_count: result.text.split(/\s+/).filter(Boolean).length, timestamp: new Date().toISOString(),
|
|
203
206
|
});
|
|
204
207
|
|
|
208
|
+
// Fetch comments if requested
|
|
209
|
+
if (opts.comments && (sourceType === "youtube" || sourceType === "vimeo")) {
|
|
210
|
+
try {
|
|
211
|
+
if (!opts.json) process.stdout.write(" Fetching comments...");
|
|
212
|
+
const rawComments = await fetchComments(source);
|
|
213
|
+
if (rawComments.length > 0) {
|
|
214
|
+
const mapped = rawComments.map((c) => ({
|
|
215
|
+
platform: sourceType,
|
|
216
|
+
author: c.author,
|
|
217
|
+
author_handle: c.author_id,
|
|
218
|
+
comment_text: c.text,
|
|
219
|
+
likes: c.like_count,
|
|
220
|
+
reply_count: 0,
|
|
221
|
+
is_reply: c.parent !== null,
|
|
222
|
+
parent_comment_id: c.parent,
|
|
223
|
+
published_at: c.timestamp ? new Date(c.timestamp * 1000).toISOString() : null,
|
|
224
|
+
}));
|
|
225
|
+
importComments(record.id, mapped);
|
|
226
|
+
if (!opts.json) console.log(` ${rawComments.length} comment(s) imported.`);
|
|
227
|
+
} else {
|
|
228
|
+
if (!opts.json) console.log(" no comments found.");
|
|
229
|
+
}
|
|
230
|
+
} catch (e) {
|
|
231
|
+
if (!opts.json) console.error(` Warning: comment fetch failed — ${e instanceof Error ? e.message : e}`);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
205
235
|
results.push({ source, id: record.id, success: true });
|
|
206
236
|
|
|
207
237
|
if (opts.json && !isBatch) {
|
|
@@ -1171,6 +1201,92 @@ annoCmd
|
|
|
1171
1201
|
else { console.error("Annotation not found."); process.exit(1); }
|
|
1172
1202
|
});
|
|
1173
1203
|
|
|
1204
|
+
// ---------------------------------------------------------------------------
|
|
1205
|
+
// comments
|
|
1206
|
+
// ---------------------------------------------------------------------------
|
|
1207
|
+
|
|
1208
|
+
const commentsCmd = program
|
|
1209
|
+
.command("comments")
|
|
1210
|
+
.description("Manage video comments extracted from YouTube/Vimeo");
|
|
1211
|
+
|
|
1212
|
+
commentsCmd
|
|
1213
|
+
.command("list <transcript-id>")
|
|
1214
|
+
.description("List comments for a transcript")
|
|
1215
|
+
.option("--top", "Sort by most liked")
|
|
1216
|
+
.option("--limit <n>", "Max results", "20")
|
|
1217
|
+
.option("--json", "Output as JSON")
|
|
1218
|
+
.action((transcriptId: string, opts) => {
|
|
1219
|
+
const comments = listComments(transcriptId, {
|
|
1220
|
+
limit: parseInt(opts.limit),
|
|
1221
|
+
top: opts.top,
|
|
1222
|
+
});
|
|
1223
|
+
|
|
1224
|
+
if (opts.json) {
|
|
1225
|
+
console.log(JSON.stringify(comments, null, 2));
|
|
1226
|
+
return;
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
if (comments.length === 0) {
|
|
1230
|
+
console.log("No comments found.");
|
|
1231
|
+
return;
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
for (const c of comments) {
|
|
1235
|
+
const likesStr = c.likes > 0 ? ` [${c.likes} likes]` : "";
|
|
1236
|
+
const replyStr = c.is_reply ? " (reply)" : "";
|
|
1237
|
+
console.log(`${c.author ?? "Anonymous"}${replyStr}${likesStr}`);
|
|
1238
|
+
console.log(` ${c.comment_text.slice(0, 200)}${c.comment_text.length > 200 ? "..." : ""}`);
|
|
1239
|
+
console.log();
|
|
1240
|
+
}
|
|
1241
|
+
});
|
|
1242
|
+
|
|
1243
|
+
commentsCmd
|
|
1244
|
+
.command("search <query>")
|
|
1245
|
+
.description("Search comment text across all transcripts")
|
|
1246
|
+
.option("--json", "Output as JSON")
|
|
1247
|
+
.action((query: string, opts) => {
|
|
1248
|
+
const results = searchComments(query);
|
|
1249
|
+
|
|
1250
|
+
if (opts.json) {
|
|
1251
|
+
console.log(JSON.stringify(results, null, 2));
|
|
1252
|
+
return;
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
if (results.length === 0) {
|
|
1256
|
+
console.log(`No comments matching '${query}'.`);
|
|
1257
|
+
return;
|
|
1258
|
+
}
|
|
1259
|
+
|
|
1260
|
+
console.log(`Found ${results.length} comment(s):\n`);
|
|
1261
|
+
for (const c of results) {
|
|
1262
|
+
const likesStr = c.likes > 0 ? ` [${c.likes} likes]` : "";
|
|
1263
|
+
console.log(`${c.author ?? "Anonymous"}${likesStr} (transcript: ${c.transcript_id.slice(0, 8)})`);
|
|
1264
|
+
console.log(` ${c.comment_text.slice(0, 200)}${c.comment_text.length > 200 ? "..." : ""}`);
|
|
1265
|
+
console.log();
|
|
1266
|
+
}
|
|
1267
|
+
});
|
|
1268
|
+
|
|
1269
|
+
commentsCmd
|
|
1270
|
+
.command("stats <transcript-id>")
|
|
1271
|
+
.description("Show comment statistics for a transcript")
|
|
1272
|
+
.option("--json", "Output as JSON")
|
|
1273
|
+
.action((transcriptId: string, opts) => {
|
|
1274
|
+
const stats = getCommentStats(transcriptId);
|
|
1275
|
+
|
|
1276
|
+
if (opts.json) {
|
|
1277
|
+
console.log(JSON.stringify(stats, null, 2));
|
|
1278
|
+
return;
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1281
|
+
console.log(`Total comments: ${stats.total}`);
|
|
1282
|
+
console.log(`Replies: ${stats.replies}`);
|
|
1283
|
+
console.log(`Unique authors: ${stats.unique_authors}`);
|
|
1284
|
+
console.log(`Avg likes: ${stats.avg_likes}`);
|
|
1285
|
+
if (stats.top_commenter) {
|
|
1286
|
+
console.log(`Top commenter: ${stats.top_commenter}`);
|
|
1287
|
+
}
|
|
1288
|
+
});
|
|
1289
|
+
|
|
1174
1290
|
// ---------------------------------------------------------------------------
|
|
1175
1291
|
// watch-feed
|
|
1176
1292
|
// ---------------------------------------------------------------------------
|
|
@@ -1344,4 +1460,134 @@ configCmd
|
|
|
1344
1460
|
console.log("Config reset to defaults.");
|
|
1345
1461
|
});
|
|
1346
1462
|
|
|
1463
|
+
// ---------------------------------------------------------------------------
|
|
1464
|
+
// proofread
|
|
1465
|
+
// ---------------------------------------------------------------------------
|
|
1466
|
+
|
|
1467
|
+
const proofreadCmd = program
|
|
1468
|
+
.command("proofread")
|
|
1469
|
+
.description("AI-powered spellcheck and proofreading for transcripts");
|
|
1470
|
+
|
|
1471
|
+
proofreadCmd
|
|
1472
|
+
.command("run <transcript-id>")
|
|
1473
|
+
.description("Run AI proofreading on a transcript (non-destructive)")
|
|
1474
|
+
.option("--types <types>", "Comma-separated issue types: spelling,grammar,punctuation,clarity")
|
|
1475
|
+
.option("--confidence <n>", "Minimum confidence threshold 0-1 (default 0.7)", parseFloat)
|
|
1476
|
+
.option("--provider <provider>", "AI provider: openai or anthropic")
|
|
1477
|
+
.option("--json", "Output as JSON")
|
|
1478
|
+
.action(async (transcriptId: string, opts) => {
|
|
1479
|
+
const types = opts.types ? opts.types.split(",").map((t: string) => t.trim()) as IssueType[] : undefined;
|
|
1480
|
+
const confidence = opts.confidence ?? 0.7;
|
|
1481
|
+
|
|
1482
|
+
if (!opts.json) console.log(`Proofreading transcript ${transcriptId}...`);
|
|
1483
|
+
|
|
1484
|
+
try {
|
|
1485
|
+
const issues = await proofreadTranscript(transcriptId, { types, confidence_threshold: confidence, provider: opts.provider });
|
|
1486
|
+
|
|
1487
|
+
if (opts.json) {
|
|
1488
|
+
console.log(JSON.stringify(issues, null, 2));
|
|
1489
|
+
} else {
|
|
1490
|
+
console.log(`Found ${issues.length} issue(s):\n`);
|
|
1491
|
+
for (const issue of issues) {
|
|
1492
|
+
console.log(` [${issue.issue_type}] "${issue.original_text}" -> "${issue.suggestion ?? "(no suggestion)"}" (${((issue.confidence ?? 0) * 100).toFixed(0)}%)`);
|
|
1493
|
+
if (issue.explanation) console.log(` ${issue.explanation}`);
|
|
1494
|
+
}
|
|
1495
|
+
}
|
|
1496
|
+
} catch (error) {
|
|
1497
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
1498
|
+
process.exit(1);
|
|
1499
|
+
}
|
|
1500
|
+
});
|
|
1501
|
+
|
|
1502
|
+
proofreadCmd
|
|
1503
|
+
.command("issues <transcript-id>")
|
|
1504
|
+
.description("List proofread issues for a transcript")
|
|
1505
|
+
.option("--type <type>", "Filter by issue type: spelling, grammar, punctuation, clarity")
|
|
1506
|
+
.option("--pending", "Show only pending issues")
|
|
1507
|
+
.option("--json", "Output as JSON")
|
|
1508
|
+
.action((transcriptId: string, opts) => {
|
|
1509
|
+
const filters: { issue_type?: IssueType; status?: "pending" } = {};
|
|
1510
|
+
if (opts.type) filters.issue_type = opts.type as IssueType;
|
|
1511
|
+
if (opts.pending) filters.status = "pending";
|
|
1512
|
+
|
|
1513
|
+
const issues = listIssues(transcriptId, filters);
|
|
1514
|
+
|
|
1515
|
+
if (opts.json) {
|
|
1516
|
+
console.log(JSON.stringify(issues, null, 2));
|
|
1517
|
+
return;
|
|
1518
|
+
}
|
|
1519
|
+
|
|
1520
|
+
if (issues.length === 0) { console.log("No issues found."); return; }
|
|
1521
|
+
|
|
1522
|
+
for (const issue of issues) {
|
|
1523
|
+
const conf = issue.confidence !== null ? ` ${(issue.confidence * 100).toFixed(0)}%` : "";
|
|
1524
|
+
console.log(`${issue.id.slice(0, 8)} [${issue.status.padEnd(9)}] [${issue.issue_type.padEnd(11)}]${conf} "${issue.original_text}" -> "${issue.suggestion ?? "-"}"`);
|
|
1525
|
+
}
|
|
1526
|
+
});
|
|
1527
|
+
|
|
1528
|
+
proofreadCmd
|
|
1529
|
+
.command("apply <issue-id>")
|
|
1530
|
+
.description("Apply a proofread suggestion (modifies transcript text)")
|
|
1531
|
+
.option("--json", "Output as JSON")
|
|
1532
|
+
.action((issueId: string, opts) => {
|
|
1533
|
+
const updated = applySuggestion(issueId);
|
|
1534
|
+
if (!updated) { console.error(`Issue '${issueId}' not found.`); process.exit(1); }
|
|
1535
|
+
|
|
1536
|
+
if (opts.json) {
|
|
1537
|
+
console.log(JSON.stringify(updated, null, 2));
|
|
1538
|
+
} else {
|
|
1539
|
+
console.log(`Applied: "${updated.original_text}" -> "${updated.suggestion}"`);
|
|
1540
|
+
}
|
|
1541
|
+
});
|
|
1542
|
+
|
|
1543
|
+
proofreadCmd
|
|
1544
|
+
.command("dismiss <issue-id>")
|
|
1545
|
+
.description("Dismiss a proofread issue without changing text")
|
|
1546
|
+
.option("--json", "Output as JSON")
|
|
1547
|
+
.action((issueId: string, opts) => {
|
|
1548
|
+
const updated = dismissIssue(issueId);
|
|
1549
|
+
if (!updated) { console.error(`Issue '${issueId}' not found.`); process.exit(1); }
|
|
1550
|
+
|
|
1551
|
+
if (opts.json) {
|
|
1552
|
+
console.log(JSON.stringify(updated, null, 2));
|
|
1553
|
+
} else {
|
|
1554
|
+
console.log(`Dismissed: "${updated.original_text}"`);
|
|
1555
|
+
}
|
|
1556
|
+
});
|
|
1557
|
+
|
|
1558
|
+
proofreadCmd
|
|
1559
|
+
.command("export <transcript-id>")
|
|
1560
|
+
.description("Export transcript with inline proofread annotations")
|
|
1561
|
+
.action((transcriptId: string) => {
|
|
1562
|
+
try {
|
|
1563
|
+
const annotated = exportAnnotated(transcriptId);
|
|
1564
|
+
console.log(annotated);
|
|
1565
|
+
} catch (error) {
|
|
1566
|
+
console.error(`Error: ${error instanceof Error ? error.message : error}`);
|
|
1567
|
+
process.exit(1);
|
|
1568
|
+
}
|
|
1569
|
+
});
|
|
1570
|
+
|
|
1571
|
+
proofreadCmd
|
|
1572
|
+
.command("stats <transcript-id>")
|
|
1573
|
+
.description("Show proofread issue statistics")
|
|
1574
|
+
.option("--json", "Output as JSON")
|
|
1575
|
+
.action((transcriptId: string, opts) => {
|
|
1576
|
+
const stats = getProofreadStats(transcriptId);
|
|
1577
|
+
|
|
1578
|
+
if (opts.json) {
|
|
1579
|
+
console.log(JSON.stringify(stats, null, 2));
|
|
1580
|
+
return;
|
|
1581
|
+
}
|
|
1582
|
+
|
|
1583
|
+
console.log(`Total issues: ${stats.total}`);
|
|
1584
|
+
console.log(`Pending: ${stats.pending} | Applied: ${stats.applied} | Dismissed: ${stats.dismissed}`);
|
|
1585
|
+
if (Object.keys(stats.by_type).length > 0) {
|
|
1586
|
+
console.log("\nBy type:");
|
|
1587
|
+
for (const [type, count] of Object.entries(stats.by_type)) {
|
|
1588
|
+
console.log(` ${type.padEnd(12)} ${count}`);
|
|
1589
|
+
}
|
|
1590
|
+
}
|
|
1591
|
+
});
|
|
1592
|
+
|
|
1347
1593
|
program.parse();
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import { getDatabase } from "./database.js";
|
|
2
|
+
|
|
3
|
+
export interface Comment {
|
|
4
|
+
id: string;
|
|
5
|
+
transcript_id: string;
|
|
6
|
+
platform: string;
|
|
7
|
+
author: string | null;
|
|
8
|
+
author_handle: string | null;
|
|
9
|
+
comment_text: string;
|
|
10
|
+
likes: number;
|
|
11
|
+
reply_count: number;
|
|
12
|
+
is_reply: number;
|
|
13
|
+
parent_comment_id: string | null;
|
|
14
|
+
published_at: string | null;
|
|
15
|
+
created_at: string;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface CreateCommentInput {
|
|
19
|
+
transcript_id: string;
|
|
20
|
+
platform?: string;
|
|
21
|
+
author?: string | null;
|
|
22
|
+
author_handle?: string | null;
|
|
23
|
+
comment_text: string;
|
|
24
|
+
likes?: number;
|
|
25
|
+
reply_count?: number;
|
|
26
|
+
is_reply?: boolean;
|
|
27
|
+
parent_comment_id?: string | null;
|
|
28
|
+
published_at?: string | null;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface ListCommentsOptions {
|
|
32
|
+
limit?: number;
|
|
33
|
+
offset?: number;
|
|
34
|
+
top?: boolean;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface CommentStats {
|
|
38
|
+
total: number;
|
|
39
|
+
replies: number;
|
|
40
|
+
unique_authors: number;
|
|
41
|
+
avg_likes: number;
|
|
42
|
+
top_commenter: string | null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function createComment(data: CreateCommentInput): Comment {
|
|
46
|
+
const db = getDatabase();
|
|
47
|
+
const id = crypto.randomUUID();
|
|
48
|
+
|
|
49
|
+
db.prepare(`
|
|
50
|
+
INSERT INTO transcript_comments (id, transcript_id, platform, author, author_handle, comment_text, likes, reply_count, is_reply, parent_comment_id, published_at)
|
|
51
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
52
|
+
`).run(
|
|
53
|
+
id,
|
|
54
|
+
data.transcript_id,
|
|
55
|
+
data.platform ?? "youtube",
|
|
56
|
+
data.author ?? null,
|
|
57
|
+
data.author_handle ?? null,
|
|
58
|
+
data.comment_text,
|
|
59
|
+
data.likes ?? 0,
|
|
60
|
+
data.reply_count ?? 0,
|
|
61
|
+
data.is_reply ? 1 : 0,
|
|
62
|
+
data.parent_comment_id ?? null,
|
|
63
|
+
data.published_at ?? null,
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
return getComment(id)!;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function getComment(id: string): Comment | null {
|
|
70
|
+
const db = getDatabase();
|
|
71
|
+
return db.prepare("SELECT * FROM transcript_comments WHERE id = ?").get(id) as Comment | null;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
export function listComments(transcriptId: string, options: ListCommentsOptions = {}): Comment[] {
|
|
75
|
+
const db = getDatabase();
|
|
76
|
+
const limit = options.limit ?? 50;
|
|
77
|
+
const offset = options.offset ?? 0;
|
|
78
|
+
const orderBy = options.top ? "likes DESC" : "created_at ASC";
|
|
79
|
+
|
|
80
|
+
return db
|
|
81
|
+
.prepare(`SELECT * FROM transcript_comments WHERE transcript_id = ? ORDER BY ${orderBy} LIMIT ? OFFSET ?`)
|
|
82
|
+
.all(transcriptId, limit, offset) as Comment[];
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
export function deleteComment(id: string): boolean {
|
|
86
|
+
const db = getDatabase();
|
|
87
|
+
return db.prepare("DELETE FROM transcript_comments WHERE id = ?").run(id).changes > 0;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export function getTopComments(transcriptId: string, limit = 10): Comment[] {
|
|
91
|
+
const db = getDatabase();
|
|
92
|
+
return db
|
|
93
|
+
.prepare("SELECT * FROM transcript_comments WHERE transcript_id = ? ORDER BY likes DESC LIMIT ?")
|
|
94
|
+
.all(transcriptId, limit) as Comment[];
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export function searchComments(query: string): Comment[] {
|
|
98
|
+
const db = getDatabase();
|
|
99
|
+
const q = `%${query}%`;
|
|
100
|
+
return db
|
|
101
|
+
.prepare("SELECT * FROM transcript_comments WHERE comment_text LIKE ? ORDER BY likes DESC LIMIT 50")
|
|
102
|
+
.all(q) as Comment[];
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export function getCommentStats(transcriptId: string): CommentStats {
|
|
106
|
+
const db = getDatabase();
|
|
107
|
+
|
|
108
|
+
const total = (
|
|
109
|
+
db.prepare("SELECT COUNT(*) as n FROM transcript_comments WHERE transcript_id = ?").get(transcriptId) as { n: number }
|
|
110
|
+
).n;
|
|
111
|
+
|
|
112
|
+
const replies = (
|
|
113
|
+
db.prepare("SELECT COUNT(*) as n FROM transcript_comments WHERE transcript_id = ? AND is_reply = 1").get(transcriptId) as { n: number }
|
|
114
|
+
).n;
|
|
115
|
+
|
|
116
|
+
const uniqueAuthors = (
|
|
117
|
+
db.prepare("SELECT COUNT(DISTINCT author) as n FROM transcript_comments WHERE transcript_id = ? AND author IS NOT NULL").get(transcriptId) as { n: number }
|
|
118
|
+
).n;
|
|
119
|
+
|
|
120
|
+
const avgLikes = (
|
|
121
|
+
db.prepare("SELECT AVG(likes) as avg FROM transcript_comments WHERE transcript_id = ?").get(transcriptId) as { avg: number | null }
|
|
122
|
+
).avg ?? 0;
|
|
123
|
+
|
|
124
|
+
const topRow = db
|
|
125
|
+
.prepare("SELECT author, COUNT(*) as cnt FROM transcript_comments WHERE transcript_id = ? AND author IS NOT NULL GROUP BY author ORDER BY cnt DESC LIMIT 1")
|
|
126
|
+
.get(transcriptId) as { author: string; cnt: number } | null;
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
total,
|
|
130
|
+
replies,
|
|
131
|
+
unique_authors: uniqueAuthors,
|
|
132
|
+
avg_likes: Math.round(avgLikes * 100) / 100,
|
|
133
|
+
top_commenter: topRow?.author ?? null,
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export function importComments(transcriptId: string, comments: Array<Omit<CreateCommentInput, "transcript_id">>): number {
|
|
138
|
+
const db = getDatabase();
|
|
139
|
+
const stmt = db.prepare(`
|
|
140
|
+
INSERT INTO transcript_comments (id, transcript_id, platform, author, author_handle, comment_text, likes, reply_count, is_reply, parent_comment_id, published_at)
|
|
141
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
142
|
+
`);
|
|
143
|
+
|
|
144
|
+
let count = 0;
|
|
145
|
+
const transaction = db.transaction(() => {
|
|
146
|
+
for (const c of comments) {
|
|
147
|
+
stmt.run(
|
|
148
|
+
crypto.randomUUID(),
|
|
149
|
+
transcriptId,
|
|
150
|
+
c.platform ?? "youtube",
|
|
151
|
+
c.author ?? null,
|
|
152
|
+
c.author_handle ?? null,
|
|
153
|
+
c.comment_text,
|
|
154
|
+
c.likes ?? 0,
|
|
155
|
+
c.reply_count ?? 0,
|
|
156
|
+
c.is_reply ? 1 : 0,
|
|
157
|
+
c.parent_comment_id ?? null,
|
|
158
|
+
c.published_at ?? null,
|
|
159
|
+
);
|
|
160
|
+
count++;
|
|
161
|
+
}
|
|
162
|
+
});
|
|
163
|
+
transaction();
|
|
164
|
+
|
|
165
|
+
return count;
|
|
166
|
+
}
|
|
@@ -69,4 +69,50 @@ export const MIGRATIONS: MigrationEntry[] = [
|
|
|
69
69
|
CREATE INDEX IF NOT EXISTS idx_annotations_transcript ON annotations(transcript_id);
|
|
70
70
|
`,
|
|
71
71
|
},
|
|
72
|
+
{
|
|
73
|
+
id: 5,
|
|
74
|
+
name: "add_transcript_comments",
|
|
75
|
+
sql: `
|
|
76
|
+
CREATE TABLE IF NOT EXISTS transcript_comments (
|
|
77
|
+
id TEXT PRIMARY KEY,
|
|
78
|
+
transcript_id TEXT NOT NULL,
|
|
79
|
+
platform TEXT NOT NULL DEFAULT 'youtube',
|
|
80
|
+
author TEXT,
|
|
81
|
+
author_handle TEXT,
|
|
82
|
+
comment_text TEXT NOT NULL,
|
|
83
|
+
likes INTEGER DEFAULT 0,
|
|
84
|
+
reply_count INTEGER DEFAULT 0,
|
|
85
|
+
is_reply INTEGER DEFAULT 0,
|
|
86
|
+
parent_comment_id TEXT,
|
|
87
|
+
published_at TEXT,
|
|
88
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
89
|
+
FOREIGN KEY (transcript_id) REFERENCES transcripts(id) ON DELETE CASCADE
|
|
90
|
+
);
|
|
91
|
+
CREATE INDEX IF NOT EXISTS idx_comments_transcript ON transcript_comments(transcript_id);
|
|
92
|
+
CREATE INDEX IF NOT EXISTS idx_comments_likes ON transcript_comments(likes DESC);
|
|
93
|
+
`,
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
id: 6,
|
|
97
|
+
name: "add_proofread_issues",
|
|
98
|
+
sql: `
|
|
99
|
+
CREATE TABLE proofread_issues (
|
|
100
|
+
id TEXT PRIMARY KEY,
|
|
101
|
+
transcript_id TEXT NOT NULL,
|
|
102
|
+
issue_type TEXT NOT NULL CHECK(issue_type IN ('spelling','grammar','punctuation','clarity')),
|
|
103
|
+
position_start INTEGER,
|
|
104
|
+
position_end INTEGER,
|
|
105
|
+
original_text TEXT NOT NULL,
|
|
106
|
+
suggestion TEXT,
|
|
107
|
+
confidence REAL,
|
|
108
|
+
explanation TEXT,
|
|
109
|
+
status TEXT DEFAULT 'pending' CHECK(status IN ('pending','applied','dismissed')),
|
|
110
|
+
created_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
111
|
+
FOREIGN KEY (transcript_id) REFERENCES transcripts(id) ON DELETE CASCADE
|
|
112
|
+
);
|
|
113
|
+
CREATE INDEX idx_proofread_transcript ON proofread_issues(transcript_id);
|
|
114
|
+
CREATE INDEX idx_proofread_type ON proofread_issues(issue_type);
|
|
115
|
+
CREATE INDEX idx_proofread_status ON proofread_issues(status);
|
|
116
|
+
`,
|
|
117
|
+
},
|
|
72
118
|
];
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import { getDatabase } from "./database.js";
|
|
2
|
+
|
|
3
|
+
export type IssueType = "spelling" | "grammar" | "punctuation" | "clarity";
|
|
4
|
+
export type IssueStatus = "pending" | "applied" | "dismissed";
|
|
5
|
+
|
|
6
|
+
export interface ProofreadIssue {
|
|
7
|
+
id: string;
|
|
8
|
+
transcript_id: string;
|
|
9
|
+
issue_type: IssueType;
|
|
10
|
+
position_start: number | null;
|
|
11
|
+
position_end: number | null;
|
|
12
|
+
original_text: string;
|
|
13
|
+
suggestion: string | null;
|
|
14
|
+
confidence: number | null;
|
|
15
|
+
explanation: string | null;
|
|
16
|
+
status: IssueStatus;
|
|
17
|
+
created_at: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface CreateProofreadIssueInput {
|
|
21
|
+
transcript_id: string;
|
|
22
|
+
issue_type: IssueType;
|
|
23
|
+
position_start?: number;
|
|
24
|
+
position_end?: number;
|
|
25
|
+
original_text: string;
|
|
26
|
+
suggestion?: string;
|
|
27
|
+
confidence?: number;
|
|
28
|
+
explanation?: string;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export interface ListProofreadIssuesOptions {
|
|
32
|
+
issue_type?: IssueType;
|
|
33
|
+
status?: IssueStatus;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function createProofreadIssue(input: CreateProofreadIssueInput): ProofreadIssue {
|
|
37
|
+
const db = getDatabase();
|
|
38
|
+
const id = crypto.randomUUID();
|
|
39
|
+
db.prepare(`
|
|
40
|
+
INSERT INTO proofread_issues (id, transcript_id, issue_type, position_start, position_end, original_text, suggestion, confidence, explanation)
|
|
41
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
42
|
+
`).run(
|
|
43
|
+
id,
|
|
44
|
+
input.transcript_id,
|
|
45
|
+
input.issue_type,
|
|
46
|
+
input.position_start ?? null,
|
|
47
|
+
input.position_end ?? null,
|
|
48
|
+
input.original_text,
|
|
49
|
+
input.suggestion ?? null,
|
|
50
|
+
input.confidence ?? null,
|
|
51
|
+
input.explanation ?? null
|
|
52
|
+
);
|
|
53
|
+
return getProofreadIssue(id)!;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function getProofreadIssue(id: string): ProofreadIssue | null {
|
|
57
|
+
const db = getDatabase();
|
|
58
|
+
const row = db.prepare("SELECT * FROM proofread_issues WHERE id = ?").get(id) as ProofreadIssue | null;
|
|
59
|
+
return row ?? null;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export function listProofreadIssues(transcriptId: string, options: ListProofreadIssuesOptions = {}): ProofreadIssue[] {
|
|
63
|
+
const db = getDatabase();
|
|
64
|
+
const conditions: string[] = ["transcript_id = ?"];
|
|
65
|
+
const values: unknown[] = [transcriptId];
|
|
66
|
+
|
|
67
|
+
if (options.issue_type) { conditions.push("issue_type = ?"); values.push(options.issue_type); }
|
|
68
|
+
if (options.status) { conditions.push("status = ?"); values.push(options.status); }
|
|
69
|
+
|
|
70
|
+
const where = conditions.join(" AND ");
|
|
71
|
+
return db
|
|
72
|
+
.prepare(`SELECT * FROM proofread_issues WHERE ${where} ORDER BY position_start ASC, created_at ASC`)
|
|
73
|
+
.all(...values) as ProofreadIssue[];
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export function updateIssueStatus(id: string, status: IssueStatus): ProofreadIssue | null {
|
|
77
|
+
const db = getDatabase();
|
|
78
|
+
const existing = getProofreadIssue(id);
|
|
79
|
+
if (!existing) return null;
|
|
80
|
+
db.prepare("UPDATE proofread_issues SET status = ? WHERE id = ?").run(status, id);
|
|
81
|
+
return getProofreadIssue(id);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export function deleteProofreadIssuesByTranscript(transcriptId: string): number {
|
|
85
|
+
const db = getDatabase();
|
|
86
|
+
return db.prepare("DELETE FROM proofread_issues WHERE transcript_id = ?").run(transcriptId).changes;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export interface ProofreadStats {
|
|
90
|
+
total: number;
|
|
91
|
+
by_type: Record<string, number>;
|
|
92
|
+
pending: number;
|
|
93
|
+
applied: number;
|
|
94
|
+
dismissed: number;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export function getProofreadStats(transcriptId: string): ProofreadStats {
|
|
98
|
+
const db = getDatabase();
|
|
99
|
+
|
|
100
|
+
const total = (db.prepare("SELECT COUNT(*) as n FROM proofread_issues WHERE transcript_id = ?").get(transcriptId) as { n: number }).n;
|
|
101
|
+
|
|
102
|
+
const byType = db
|
|
103
|
+
.prepare("SELECT issue_type, COUNT(*) as n FROM proofread_issues WHERE transcript_id = ? GROUP BY issue_type")
|
|
104
|
+
.all(transcriptId) as { issue_type: string; n: number }[];
|
|
105
|
+
|
|
106
|
+
const byStatus = db
|
|
107
|
+
.prepare("SELECT status, COUNT(*) as n FROM proofread_issues WHERE transcript_id = ? GROUP BY status")
|
|
108
|
+
.all(transcriptId) as { status: string; n: number }[];
|
|
109
|
+
|
|
110
|
+
const statusMap = Object.fromEntries(byStatus.map((r) => [r.status, r.n]));
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
total,
|
|
114
|
+
by_type: Object.fromEntries(byType.map((r) => [r.issue_type, r.n])),
|
|
115
|
+
pending: statusMap["pending"] ?? 0,
|
|
116
|
+
applied: statusMap["applied"] ?? 0,
|
|
117
|
+
dismissed: statusMap["dismissed"] ?? 0,
|
|
118
|
+
};
|
|
119
|
+
}
|
|
@@ -556,6 +556,74 @@ export async function splitAudioIntoChunks(
|
|
|
556
556
|
return chunks;
|
|
557
557
|
}
|
|
558
558
|
|
|
559
|
+
/**
|
|
560
|
+
* Raw comment from yt-dlp .info.json comments array.
|
|
561
|
+
*/
|
|
562
|
+
export interface RawComment {
|
|
563
|
+
author: string | null;
|
|
564
|
+
author_id: string | null;
|
|
565
|
+
text: string;
|
|
566
|
+
like_count: number;
|
|
567
|
+
timestamp: number | null;
|
|
568
|
+
parent: string | null; // "root" for top-level, comment id for replies
|
|
569
|
+
id: string;
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
/**
|
|
573
|
+
* Fetch comments for a video URL using yt-dlp --write-comments.
|
|
574
|
+
* Downloads only the .info.json (no media) and parses the comments array.
|
|
575
|
+
*/
|
|
576
|
+
export async function fetchComments(url: string): Promise<RawComment[]> {
|
|
577
|
+
const tempId = crypto.randomUUID();
|
|
578
|
+
const outputTemplate = join(tmpdir(), `comments-${tempId}`);
|
|
579
|
+
|
|
580
|
+
const proc = Bun.spawn(
|
|
581
|
+
[ytdlp(), "--write-comments", "--skip-download", "--no-write-thumbnail", "-o", outputTemplate, url],
|
|
582
|
+
{ stdout: "pipe", stderr: "pipe" }
|
|
583
|
+
);
|
|
584
|
+
|
|
585
|
+
const [exitCode, , stderr] = await Promise.all([
|
|
586
|
+
proc.exited,
|
|
587
|
+
new Response(proc.stdout).text(),
|
|
588
|
+
new Response(proc.stderr).text(),
|
|
589
|
+
]);
|
|
590
|
+
|
|
591
|
+
if (exitCode !== 0) {
|
|
592
|
+
throw new Error(`yt-dlp comment fetch failed (exit ${exitCode}): ${stderr.trim()}`);
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
// yt-dlp writes <output>.info.json
|
|
596
|
+
const infoPath = `${outputTemplate}.info.json`;
|
|
597
|
+
const { readFileSync, unlinkSync: unlinkFile, existsSync: fileExists } = await import("node:fs");
|
|
598
|
+
|
|
599
|
+
if (!fileExists(infoPath)) {
|
|
600
|
+
throw new Error("yt-dlp did not produce an info.json file for comments");
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
try {
|
|
604
|
+
const raw = JSON.parse(readFileSync(infoPath, "utf8"));
|
|
605
|
+
const comments: RawComment[] = [];
|
|
606
|
+
|
|
607
|
+
if (Array.isArray(raw.comments)) {
|
|
608
|
+
for (const c of raw.comments) {
|
|
609
|
+
comments.push({
|
|
610
|
+
author: c.author ?? null,
|
|
611
|
+
author_id: c.author_id ?? null,
|
|
612
|
+
text: typeof c.text === "string" ? c.text : String(c.text ?? ""),
|
|
613
|
+
like_count: typeof c.like_count === "number" ? c.like_count : 0,
|
|
614
|
+
timestamp: typeof c.timestamp === "number" ? c.timestamp : null,
|
|
615
|
+
parent: c.parent === "root" ? null : (c.parent ?? null),
|
|
616
|
+
id: c.id ?? crypto.randomUUID(),
|
|
617
|
+
});
|
|
618
|
+
}
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
return comments;
|
|
622
|
+
} finally {
|
|
623
|
+
try { unlinkFile(infoPath); } catch {}
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
|
|
559
627
|
/**
|
|
560
628
|
* Check whether yt-dlp is available on the system.
|
|
561
629
|
*/
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AI-powered non-destructive spellcheck/proofread for transcripts.
|
|
3
|
+
* Uses OpenAI or Anthropic to find spelling, grammar, punctuation, and clarity issues.
|
|
4
|
+
* NEVER modifies transcript_text directly — issues are stored in proofread_issues table
|
|
5
|
+
* and must be explicitly applied one by one.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { getTranscript, updateTranscript } from "../db/transcripts.js";
|
|
9
|
+
import {
|
|
10
|
+
createProofreadIssue,
|
|
11
|
+
listProofreadIssues,
|
|
12
|
+
getProofreadIssue,
|
|
13
|
+
updateIssueStatus,
|
|
14
|
+
getProofreadStats as getDbProofreadStats,
|
|
15
|
+
type ProofreadIssue,
|
|
16
|
+
type IssueType,
|
|
17
|
+
type IssueStatus,
|
|
18
|
+
type ListProofreadIssuesOptions,
|
|
19
|
+
type ProofreadStats,
|
|
20
|
+
} from "../db/proofread.js";
|
|
21
|
+
import { getDefaultSummaryProvider, type SummaryProvider } from "./summarizer.js";
|
|
22
|
+
|
|
23
|
+
export type { ProofreadIssue, ProofreadStats, IssueType, IssueStatus };
|
|
24
|
+
|
|
25
|
+
export interface ProofreadOptions {
|
|
26
|
+
types?: IssueType[];
|
|
27
|
+
confidence_threshold?: number;
|
|
28
|
+
provider?: SummaryProvider;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
interface RawProofreadIssue {
|
|
32
|
+
issue_type: string;
|
|
33
|
+
position_start: number;
|
|
34
|
+
position_end: number;
|
|
35
|
+
original_text: string;
|
|
36
|
+
suggestion: string;
|
|
37
|
+
confidence: number;
|
|
38
|
+
explanation: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const PROOFREAD_PROMPT = (text: string, types?: IssueType[]) => {
|
|
42
|
+
const typeFilter = types && types.length > 0
|
|
43
|
+
? `Only check for these issue types: ${types.join(", ")}.`
|
|
44
|
+
: "Check for all issue types: spelling, grammar, punctuation, clarity.";
|
|
45
|
+
|
|
46
|
+
return `You are a professional proofreader. Analyze the following transcript text and find all issues.
|
|
47
|
+
|
|
48
|
+
${typeFilter}
|
|
49
|
+
|
|
50
|
+
For each issue found, return a JSON object with:
|
|
51
|
+
- "issue_type": one of "spelling", "grammar", "punctuation", "clarity"
|
|
52
|
+
- "position_start": character index where the issue starts in the original text
|
|
53
|
+
- "position_end": character index where the issue ends in the original text
|
|
54
|
+
- "original_text": the exact text that has the issue
|
|
55
|
+
- "suggestion": the corrected text
|
|
56
|
+
- "confidence": a number 0-1 indicating how confident you are this is an issue
|
|
57
|
+
- "explanation": brief explanation of the issue
|
|
58
|
+
|
|
59
|
+
Return ONLY a valid JSON array of issue objects. If no issues found, return [].
|
|
60
|
+
Do not wrap in markdown code fences.
|
|
61
|
+
|
|
62
|
+
Transcript text:
|
|
63
|
+
${text.slice(0, 15000)}`;
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
async function callOpenAI(prompt: string, maxTokens: number): Promise<string> {
|
|
67
|
+
const apiKey = process.env["OPENAI_API_KEY"];
|
|
68
|
+
if (!apiKey) throw new Error("OPENAI_API_KEY is not set");
|
|
69
|
+
|
|
70
|
+
const res = await fetch("https://api.openai.com/v1/chat/completions", {
|
|
71
|
+
method: "POST",
|
|
72
|
+
headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" },
|
|
73
|
+
body: JSON.stringify({
|
|
74
|
+
model: "gpt-4o-mini",
|
|
75
|
+
messages: [{ role: "user", content: prompt }],
|
|
76
|
+
max_tokens: maxTokens,
|
|
77
|
+
temperature: 0.2,
|
|
78
|
+
}),
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
if (!res.ok) { const body = await res.text(); throw new Error(`OpenAI API error ${res.status}: ${body}`); }
|
|
82
|
+
const data = (await res.json()) as { choices: Array<{ message: { content: string } }> };
|
|
83
|
+
return data.choices[0]?.message?.content?.trim() ?? "";
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
async function callAnthropic(prompt: string, maxTokens: number): Promise<string> {
|
|
87
|
+
const apiKey = process.env["ANTHROPIC_API_KEY"];
|
|
88
|
+
if (!apiKey) throw new Error("ANTHROPIC_API_KEY is not set");
|
|
89
|
+
|
|
90
|
+
const res = await fetch("https://api.anthropic.com/v1/messages", {
|
|
91
|
+
method: "POST",
|
|
92
|
+
headers: { "x-api-key": apiKey, "anthropic-version": "2023-06-01", "Content-Type": "application/json" },
|
|
93
|
+
body: JSON.stringify({
|
|
94
|
+
model: "claude-haiku-4-5-20251001",
|
|
95
|
+
max_tokens: maxTokens,
|
|
96
|
+
messages: [{ role: "user", content: prompt }],
|
|
97
|
+
}),
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
if (!res.ok) { const body = await res.text(); throw new Error(`Anthropic API error ${res.status}: ${body}`); }
|
|
101
|
+
const data = (await res.json()) as { content: Array<{ type: string; text: string }> };
|
|
102
|
+
return data.content.find((b) => b.type === "text")?.text?.trim() ?? "";
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function parseAIResponse(raw: string): RawProofreadIssue[] {
|
|
106
|
+
const cleaned = raw.replace(/```json\n?/g, "").replace(/```\n?/g, "").trim();
|
|
107
|
+
try {
|
|
108
|
+
const parsed = JSON.parse(cleaned);
|
|
109
|
+
if (!Array.isArray(parsed)) return [];
|
|
110
|
+
return parsed.filter(
|
|
111
|
+
(item: unknown) =>
|
|
112
|
+
typeof item === "object" &&
|
|
113
|
+
item !== null &&
|
|
114
|
+
"issue_type" in item &&
|
|
115
|
+
"original_text" in item
|
|
116
|
+
);
|
|
117
|
+
} catch {
|
|
118
|
+
return [];
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const VALID_ISSUE_TYPES: Set<string> = new Set(["spelling", "grammar", "punctuation", "clarity"]);
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Run AI proofreading on a transcript. Stores issues in DB. Never changes transcript_text.
|
|
126
|
+
*/
|
|
127
|
+
export async function proofreadTranscript(
|
|
128
|
+
transcriptId: string,
|
|
129
|
+
options: ProofreadOptions = {}
|
|
130
|
+
): Promise<ProofreadIssue[]> {
|
|
131
|
+
const transcript = getTranscript(transcriptId);
|
|
132
|
+
if (!transcript) throw new Error(`Transcript '${transcriptId}' not found.`);
|
|
133
|
+
if (!transcript.transcript_text) throw new Error(`Transcript '${transcriptId}' has no text.`);
|
|
134
|
+
|
|
135
|
+
const provider = options.provider ?? getDefaultSummaryProvider();
|
|
136
|
+
if (!provider) throw new Error("No AI provider configured. Set OPENAI_API_KEY or ANTHROPIC_API_KEY.");
|
|
137
|
+
|
|
138
|
+
const prompt = PROOFREAD_PROMPT(transcript.transcript_text, options.types);
|
|
139
|
+
const confidenceThreshold = options.confidence_threshold ?? 0.7;
|
|
140
|
+
|
|
141
|
+
let raw: string;
|
|
142
|
+
if (provider === "openai") {
|
|
143
|
+
raw = await callOpenAI(prompt, 3000);
|
|
144
|
+
} else {
|
|
145
|
+
raw = await callAnthropic(prompt, 3000);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const rawIssues = parseAIResponse(raw);
|
|
149
|
+
const created: ProofreadIssue[] = [];
|
|
150
|
+
|
|
151
|
+
for (const issue of rawIssues) {
|
|
152
|
+
// Validate issue_type
|
|
153
|
+
if (!VALID_ISSUE_TYPES.has(issue.issue_type)) continue;
|
|
154
|
+
|
|
155
|
+
// Filter by confidence threshold
|
|
156
|
+
const confidence = typeof issue.confidence === "number" ? issue.confidence : 0.8;
|
|
157
|
+
if (confidence < confidenceThreshold) continue;
|
|
158
|
+
|
|
159
|
+
// Filter by types if specified
|
|
160
|
+
if (options.types && options.types.length > 0 && !options.types.includes(issue.issue_type as IssueType)) continue;
|
|
161
|
+
|
|
162
|
+
const created_issue = createProofreadIssue({
|
|
163
|
+
transcript_id: transcriptId,
|
|
164
|
+
issue_type: issue.issue_type as IssueType,
|
|
165
|
+
position_start: typeof issue.position_start === "number" ? issue.position_start : undefined,
|
|
166
|
+
position_end: typeof issue.position_end === "number" ? issue.position_end : undefined,
|
|
167
|
+
original_text: String(issue.original_text),
|
|
168
|
+
suggestion: issue.suggestion ? String(issue.suggestion) : undefined,
|
|
169
|
+
confidence,
|
|
170
|
+
explanation: issue.explanation ? String(issue.explanation) : undefined,
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
created.push(created_issue);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return created;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* List proofread issues for a transcript with optional filters.
|
|
181
|
+
*/
|
|
182
|
+
export function listIssues(
|
|
183
|
+
transcriptId: string,
|
|
184
|
+
filters?: ListProofreadIssuesOptions
|
|
185
|
+
): ProofreadIssue[] {
|
|
186
|
+
return listProofreadIssues(transcriptId, filters);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Apply a suggestion: replaces the original_text in transcript_text at the
|
|
191
|
+
* specified position with the suggestion, and marks the issue as 'applied'.
|
|
192
|
+
*/
|
|
193
|
+
export function applySuggestion(issueId: string): ProofreadIssue | null {
|
|
194
|
+
const issue = getProofreadIssue(issueId);
|
|
195
|
+
if (!issue) return null;
|
|
196
|
+
if (issue.status !== "pending") return issue; // already handled
|
|
197
|
+
|
|
198
|
+
if (!issue.suggestion) {
|
|
199
|
+
// No suggestion to apply, just dismiss
|
|
200
|
+
return updateIssueStatus(issueId, "dismissed");
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const transcript = getTranscript(issue.transcript_id);
|
|
204
|
+
if (!transcript || !transcript.transcript_text) return null;
|
|
205
|
+
|
|
206
|
+
let newText: string;
|
|
207
|
+
|
|
208
|
+
if (issue.position_start !== null && issue.position_end !== null) {
|
|
209
|
+
// Apply at exact position if the text at that position matches
|
|
210
|
+
const textAtPosition = transcript.transcript_text.slice(issue.position_start, issue.position_end);
|
|
211
|
+
if (textAtPosition === issue.original_text) {
|
|
212
|
+
newText =
|
|
213
|
+
transcript.transcript_text.slice(0, issue.position_start) +
|
|
214
|
+
issue.suggestion +
|
|
215
|
+
transcript.transcript_text.slice(issue.position_end);
|
|
216
|
+
} else {
|
|
217
|
+
// Position mismatch (text may have shifted from prior edits), fall back to first occurrence
|
|
218
|
+
newText = transcript.transcript_text.replace(issue.original_text, issue.suggestion);
|
|
219
|
+
}
|
|
220
|
+
} else {
|
|
221
|
+
// No position info, replace first occurrence
|
|
222
|
+
newText = transcript.transcript_text.replace(issue.original_text, issue.suggestion);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Only update if text actually changed
|
|
226
|
+
if (newText !== transcript.transcript_text) {
|
|
227
|
+
updateTranscript(issue.transcript_id, { transcript_text: newText });
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
return updateIssueStatus(issueId, "applied");
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Dismiss an issue without changing the transcript text.
|
|
235
|
+
*/
|
|
236
|
+
export function dismissIssue(issueId: string): ProofreadIssue | null {
|
|
237
|
+
const issue = getProofreadIssue(issueId);
|
|
238
|
+
if (!issue) return null;
|
|
239
|
+
return updateIssueStatus(issueId, "dismissed");
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Get proofread statistics for a transcript.
|
|
244
|
+
*/
|
|
245
|
+
export { getDbProofreadStats as getProofreadStats };
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Export annotated transcript text with inline markers showing issues.
|
|
249
|
+
* Format: [TYPE: "original" -> "suggestion"]
|
|
250
|
+
*/
|
|
251
|
+
export function exportAnnotated(transcriptId: string): string {
|
|
252
|
+
const transcript = getTranscript(transcriptId);
|
|
253
|
+
if (!transcript || !transcript.transcript_text) {
|
|
254
|
+
throw new Error(`Transcript '${transcriptId}' not found or has no text.`);
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const issues = listProofreadIssues(transcriptId, { status: "pending" });
|
|
258
|
+
if (issues.length === 0) return transcript.transcript_text;
|
|
259
|
+
|
|
260
|
+
// Sort issues by position_start descending so we can safely replace from end to start
|
|
261
|
+
// without shifting positions. Issues without positions are handled via string replacement.
|
|
262
|
+
const positionalIssues = issues
|
|
263
|
+
.filter((i) => i.position_start !== null && i.position_end !== null)
|
|
264
|
+
.sort((a, b) => (b.position_start ?? 0) - (a.position_start ?? 0));
|
|
265
|
+
|
|
266
|
+
const nonPositionalIssues = issues.filter((i) => i.position_start === null || i.position_end === null);
|
|
267
|
+
|
|
268
|
+
let text = transcript.transcript_text;
|
|
269
|
+
|
|
270
|
+
// Apply positional annotations from end to start
|
|
271
|
+
for (const issue of positionalIssues) {
|
|
272
|
+
const start = issue.position_start!;
|
|
273
|
+
const end = issue.position_end!;
|
|
274
|
+
const marker = formatMarker(issue);
|
|
275
|
+
text = text.slice(0, start) + marker + text.slice(end);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
// Apply non-positional annotations via first occurrence replacement
|
|
279
|
+
for (const issue of nonPositionalIssues) {
|
|
280
|
+
const marker = formatMarker(issue);
|
|
281
|
+
const idx = text.indexOf(issue.original_text);
|
|
282
|
+
if (idx !== -1) {
|
|
283
|
+
text = text.slice(0, idx) + marker + text.slice(idx + issue.original_text.length);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
return text;
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
function formatMarker(issue: ProofreadIssue): string {
|
|
291
|
+
const type = issue.issue_type.toUpperCase();
|
|
292
|
+
if (issue.suggestion) {
|
|
293
|
+
return `[${type}: "${issue.original_text}" -> "${issue.suggestion}"]`;
|
|
294
|
+
}
|
|
295
|
+
return `[${type}: "${issue.original_text}"]`;
|
|
296
|
+
}
|
|
@@ -23,7 +23,8 @@ import {
|
|
|
23
23
|
type TranscriptStatus,
|
|
24
24
|
type TranscriptSourceType,
|
|
25
25
|
} from "../db/transcripts.js";
|
|
26
|
-
import { prepareAudio, detectSourceType, getVideoInfo, downloadAudio, downloadVideo, createClip, isPlaylistUrl, getPlaylistUrls, type TrimOptions } from "../lib/downloader.js";
|
|
26
|
+
import { prepareAudio, detectSourceType, getVideoInfo, downloadAudio, downloadVideo, createClip, isPlaylistUrl, getPlaylistUrls, fetchComments, type TrimOptions } from "../lib/downloader.js";
|
|
27
|
+
import { listComments, getTopComments, searchComments, getCommentStats, importComments } from "../db/comments.js";
|
|
27
28
|
import { getConfig, setConfig, resetConfig } from "../lib/config.js";
|
|
28
29
|
import { summarizeText, extractHighlights, generateMeetingNotes, getDefaultSummaryProvider } from "../lib/summarizer.js";
|
|
29
30
|
import { translateText } from "../lib/translator.js";
|
|
@@ -31,6 +32,7 @@ import { fetchFeedEpisodes } from "../lib/feeds.js";
|
|
|
31
32
|
import { createAnnotation, listAnnotations, deleteAnnotation } from "../db/annotations.js";
|
|
32
33
|
import { wordDiff, diffStats, formatDiff } from "../lib/diff.js";
|
|
33
34
|
import { transcribeFile, checkProviders, toSrt, toVtt, toAss, toMarkdown, segmentByChapters, formatWithConfidence } from "../lib/providers.js";
|
|
35
|
+
import { proofreadTranscript, listIssues, applySuggestion, dismissIssue, getProofreadStats, exportAnnotated, type IssueType } from "../lib/proofread.js";
|
|
34
36
|
|
|
35
37
|
const server = new McpServer({
|
|
36
38
|
name: "microservice-transcriber",
|
|
@@ -63,9 +65,10 @@ server.registerTool(
|
|
|
63
65
|
diarize: z.boolean().optional().describe("Identify different speakers — ElevenLabs only"),
|
|
64
66
|
vocab: z.array(z.string()).optional().describe("Custom vocabulary hints for accuracy (e.g. ['Karpathy', 'MicroGPT'])"),
|
|
65
67
|
force: z.boolean().optional().describe("Re-transcribe even if URL already exists in DB"),
|
|
68
|
+
comments: z.boolean().optional().describe("Also fetch and store YouTube/Vimeo comments"),
|
|
66
69
|
},
|
|
67
70
|
},
|
|
68
|
-
async ({ source, provider = "elevenlabs", language, title, start, end, diarize, vocab, force }) => {
|
|
71
|
+
async ({ source, provider = "elevenlabs", language, title, start, end, diarize, vocab, force, comments: fetchCommentsFlag }) => {
|
|
69
72
|
// Duplicate detection
|
|
70
73
|
if (!force) {
|
|
71
74
|
const existing = findBySourceUrl(source);
|
|
@@ -129,8 +132,33 @@ server.registerTool(
|
|
|
129
132
|
},
|
|
130
133
|
});
|
|
131
134
|
|
|
135
|
+
// Fetch comments if requested
|
|
136
|
+
let commentCount = 0;
|
|
137
|
+
if (fetchCommentsFlag && (sourceType === "youtube" || sourceType === "vimeo")) {
|
|
138
|
+
try {
|
|
139
|
+
const rawComments = await fetchComments(source);
|
|
140
|
+
if (rawComments.length > 0) {
|
|
141
|
+
const mapped = rawComments.map((c) => ({
|
|
142
|
+
platform: sourceType,
|
|
143
|
+
author: c.author,
|
|
144
|
+
author_handle: c.author_id,
|
|
145
|
+
comment_text: c.text,
|
|
146
|
+
likes: c.like_count,
|
|
147
|
+
reply_count: 0,
|
|
148
|
+
is_reply: c.parent !== null,
|
|
149
|
+
parent_comment_id: c.parent,
|
|
150
|
+
published_at: c.timestamp ? new Date(c.timestamp * 1000).toISOString() : null,
|
|
151
|
+
}));
|
|
152
|
+
commentCount = importComments(record.id, mapped);
|
|
153
|
+
}
|
|
154
|
+
} catch {
|
|
155
|
+
// Comment fetch is best-effort — don't fail the transcription
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
const finalResult = { ...getTranscript(record.id), comments_imported: commentCount };
|
|
132
160
|
return {
|
|
133
|
-
content: [{ type: "text", text: JSON.stringify(
|
|
161
|
+
content: [{ type: "text", text: JSON.stringify(finalResult, null, 2) }],
|
|
134
162
|
};
|
|
135
163
|
} catch (error) {
|
|
136
164
|
const msg = error instanceof Error ? error.message : String(error);
|
|
@@ -943,6 +971,86 @@ server.registerTool(
|
|
|
943
971
|
}
|
|
944
972
|
);
|
|
945
973
|
|
|
974
|
+
// ---------------------------------------------------------------------------
|
|
975
|
+
// list_comments
|
|
976
|
+
// ---------------------------------------------------------------------------
|
|
977
|
+
|
|
978
|
+
server.registerTool(
|
|
979
|
+
"list_comments",
|
|
980
|
+
{
|
|
981
|
+
title: "List Comments",
|
|
982
|
+
description: "List comments for a transcript, optionally sorted by likes.",
|
|
983
|
+
inputSchema: {
|
|
984
|
+
transcript_id: z.string().describe("Transcript ID"),
|
|
985
|
+
top: z.boolean().optional().describe("Sort by most liked"),
|
|
986
|
+
limit: z.number().optional().describe("Max results (default 50)"),
|
|
987
|
+
offset: z.number().optional().describe("Offset for pagination"),
|
|
988
|
+
},
|
|
989
|
+
},
|
|
990
|
+
async ({ transcript_id, top, limit, offset }) => {
|
|
991
|
+
const comments = listComments(transcript_id, { top, limit, offset });
|
|
992
|
+
return { content: [{ type: "text", text: JSON.stringify(comments, null, 2) }] };
|
|
993
|
+
}
|
|
994
|
+
);
|
|
995
|
+
|
|
996
|
+
// ---------------------------------------------------------------------------
|
|
997
|
+
// top_comments
|
|
998
|
+
// ---------------------------------------------------------------------------
|
|
999
|
+
|
|
1000
|
+
server.registerTool(
|
|
1001
|
+
"top_comments",
|
|
1002
|
+
{
|
|
1003
|
+
title: "Top Comments",
|
|
1004
|
+
description: "Get the most liked comments for a transcript.",
|
|
1005
|
+
inputSchema: {
|
|
1006
|
+
transcript_id: z.string().describe("Transcript ID"),
|
|
1007
|
+
limit: z.number().optional().describe("Number of top comments (default 10)"),
|
|
1008
|
+
},
|
|
1009
|
+
},
|
|
1010
|
+
async ({ transcript_id, limit }) => {
|
|
1011
|
+
const comments = getTopComments(transcript_id, limit);
|
|
1012
|
+
return { content: [{ type: "text", text: JSON.stringify(comments, null, 2) }] };
|
|
1013
|
+
}
|
|
1014
|
+
);
|
|
1015
|
+
|
|
1016
|
+
// ---------------------------------------------------------------------------
|
|
1017
|
+
// search_comments
|
|
1018
|
+
// ---------------------------------------------------------------------------
|
|
1019
|
+
|
|
1020
|
+
server.registerTool(
|
|
1021
|
+
"search_comments",
|
|
1022
|
+
{
|
|
1023
|
+
title: "Search Comments",
|
|
1024
|
+
description: "Search comment text across all transcripts using LIKE matching.",
|
|
1025
|
+
inputSchema: {
|
|
1026
|
+
query: z.string().describe("Search query"),
|
|
1027
|
+
},
|
|
1028
|
+
},
|
|
1029
|
+
async ({ query }) => {
|
|
1030
|
+
const results = searchComments(query);
|
|
1031
|
+
return { content: [{ type: "text", text: JSON.stringify(results, null, 2) }] };
|
|
1032
|
+
}
|
|
1033
|
+
);
|
|
1034
|
+
|
|
1035
|
+
// ---------------------------------------------------------------------------
|
|
1036
|
+
// comment_stats
|
|
1037
|
+
// ---------------------------------------------------------------------------
|
|
1038
|
+
|
|
1039
|
+
server.registerTool(
|
|
1040
|
+
"comment_stats",
|
|
1041
|
+
{
|
|
1042
|
+
title: "Comment Stats",
|
|
1043
|
+
description: "Get comment statistics for a transcript: total, replies, unique authors, avg likes, top commenter.",
|
|
1044
|
+
inputSchema: {
|
|
1045
|
+
transcript_id: z.string().describe("Transcript ID"),
|
|
1046
|
+
},
|
|
1047
|
+
},
|
|
1048
|
+
async ({ transcript_id }) => {
|
|
1049
|
+
const stats = getCommentStats(transcript_id);
|
|
1050
|
+
return { content: [{ type: "text", text: JSON.stringify(stats, null, 2) }] };
|
|
1051
|
+
}
|
|
1052
|
+
);
|
|
1053
|
+
|
|
946
1054
|
// ---------------------------------------------------------------------------
|
|
947
1055
|
// get_config / set_config
|
|
948
1056
|
// ---------------------------------------------------------------------------
|
|
@@ -991,6 +1099,142 @@ server.registerTool(
|
|
|
991
1099
|
}
|
|
992
1100
|
);
|
|
993
1101
|
|
|
1102
|
+
// ---------------------------------------------------------------------------
|
|
1103
|
+
// proofread_transcript
|
|
1104
|
+
// ---------------------------------------------------------------------------
|
|
1105
|
+
|
|
1106
|
+
server.registerTool(
|
|
1107
|
+
"proofread_transcript",
|
|
1108
|
+
{
|
|
1109
|
+
title: "Proofread Transcript",
|
|
1110
|
+
description: "Run AI-powered spellcheck/proofread on a transcript. Finds spelling, grammar, punctuation, and clarity issues. Non-destructive: stores issues in DB without modifying transcript text.",
|
|
1111
|
+
inputSchema: {
|
|
1112
|
+
id: z.string().describe("Transcript ID"),
|
|
1113
|
+
types: z.array(z.enum(["spelling", "grammar", "punctuation", "clarity"])).optional().describe("Issue types to check (default: all)"),
|
|
1114
|
+
confidence_threshold: z.number().optional().describe("Minimum confidence 0-1 (default: 0.7)"),
|
|
1115
|
+
provider: z.enum(["openai", "anthropic"]).optional().describe("AI provider (auto-detected from env)"),
|
|
1116
|
+
},
|
|
1117
|
+
},
|
|
1118
|
+
async ({ id, types, confidence_threshold, provider }) => {
|
|
1119
|
+
try {
|
|
1120
|
+
const issues = await proofreadTranscript(id, {
|
|
1121
|
+
types: types as IssueType[] | undefined,
|
|
1122
|
+
confidence_threshold,
|
|
1123
|
+
provider: provider as "openai" | "anthropic" | undefined,
|
|
1124
|
+
});
|
|
1125
|
+
return { content: [{ type: "text", text: JSON.stringify(issues, null, 2) }] };
|
|
1126
|
+
} catch (error) {
|
|
1127
|
+
return { content: [{ type: "text", text: `Proofread failed: ${error instanceof Error ? error.message : error}` }], isError: true };
|
|
1128
|
+
}
|
|
1129
|
+
}
|
|
1130
|
+
);
|
|
1131
|
+
|
|
1132
|
+
// ---------------------------------------------------------------------------
|
|
1133
|
+
// list_proofread_issues
|
|
1134
|
+
// ---------------------------------------------------------------------------
|
|
1135
|
+
|
|
1136
|
+
server.registerTool(
|
|
1137
|
+
"list_proofread_issues",
|
|
1138
|
+
{
|
|
1139
|
+
title: "List Proofread Issues",
|
|
1140
|
+
description: "List proofread issues for a transcript with optional filters.",
|
|
1141
|
+
inputSchema: {
|
|
1142
|
+
transcript_id: z.string().describe("Transcript ID"),
|
|
1143
|
+
issue_type: z.enum(["spelling", "grammar", "punctuation", "clarity"]).optional().describe("Filter by issue type"),
|
|
1144
|
+
status: z.enum(["pending", "applied", "dismissed"]).optional().describe("Filter by status"),
|
|
1145
|
+
},
|
|
1146
|
+
},
|
|
1147
|
+
async ({ transcript_id, issue_type, status }) => {
|
|
1148
|
+
const issues = listIssues(transcript_id, {
|
|
1149
|
+
issue_type: issue_type as IssueType | undefined,
|
|
1150
|
+
status: status as "pending" | "applied" | "dismissed" | undefined,
|
|
1151
|
+
});
|
|
1152
|
+
return { content: [{ type: "text", text: JSON.stringify(issues, null, 2) }] };
|
|
1153
|
+
}
|
|
1154
|
+
);
|
|
1155
|
+
|
|
1156
|
+
// ---------------------------------------------------------------------------
|
|
1157
|
+
// apply_suggestion
|
|
1158
|
+
// ---------------------------------------------------------------------------
|
|
1159
|
+
|
|
1160
|
+
server.registerTool(
|
|
1161
|
+
"apply_suggestion",
|
|
1162
|
+
{
|
|
1163
|
+
title: "Apply Proofread Suggestion",
|
|
1164
|
+
description: "Apply a proofread suggestion to the transcript text. Replaces the original text with the suggestion and marks the issue as applied.",
|
|
1165
|
+
inputSchema: {
|
|
1166
|
+
issue_id: z.string().describe("Proofread issue ID"),
|
|
1167
|
+
},
|
|
1168
|
+
},
|
|
1169
|
+
async ({ issue_id }) => {
|
|
1170
|
+
const result = applySuggestion(issue_id);
|
|
1171
|
+
if (!result) return { content: [{ type: "text", text: `Issue '${issue_id}' not found.` }], isError: true };
|
|
1172
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
1173
|
+
}
|
|
1174
|
+
);
|
|
1175
|
+
|
|
1176
|
+
// ---------------------------------------------------------------------------
|
|
1177
|
+
// dismiss_issue
|
|
1178
|
+
// ---------------------------------------------------------------------------
|
|
1179
|
+
|
|
1180
|
+
server.registerTool(
|
|
1181
|
+
"dismiss_issue",
|
|
1182
|
+
{
|
|
1183
|
+
title: "Dismiss Proofread Issue",
|
|
1184
|
+
description: "Dismiss a proofread issue without modifying the transcript text.",
|
|
1185
|
+
inputSchema: {
|
|
1186
|
+
issue_id: z.string().describe("Proofread issue ID"),
|
|
1187
|
+
},
|
|
1188
|
+
},
|
|
1189
|
+
async ({ issue_id }) => {
|
|
1190
|
+
const result = dismissIssue(issue_id);
|
|
1191
|
+
if (!result) return { content: [{ type: "text", text: `Issue '${issue_id}' not found.` }], isError: true };
|
|
1192
|
+
return { content: [{ type: "text", text: JSON.stringify(result, null, 2) }] };
|
|
1193
|
+
}
|
|
1194
|
+
);
|
|
1195
|
+
|
|
1196
|
+
// ---------------------------------------------------------------------------
|
|
1197
|
+
// proofread_stats
|
|
1198
|
+
// ---------------------------------------------------------------------------
|
|
1199
|
+
|
|
1200
|
+
server.registerTool(
|
|
1201
|
+
"proofread_stats",
|
|
1202
|
+
{
|
|
1203
|
+
title: "Proofread Stats",
|
|
1204
|
+
description: "Get proofread issue statistics for a transcript: total, by type, pending/applied/dismissed counts.",
|
|
1205
|
+
inputSchema: {
|
|
1206
|
+
transcript_id: z.string().describe("Transcript ID"),
|
|
1207
|
+
},
|
|
1208
|
+
},
|
|
1209
|
+
async ({ transcript_id }) => {
|
|
1210
|
+
const stats = getProofreadStats(transcript_id);
|
|
1211
|
+
return { content: [{ type: "text", text: JSON.stringify(stats, null, 2) }] };
|
|
1212
|
+
}
|
|
1213
|
+
);
|
|
1214
|
+
|
|
1215
|
+
// ---------------------------------------------------------------------------
|
|
1216
|
+
// export_annotated
|
|
1217
|
+
// ---------------------------------------------------------------------------
|
|
1218
|
+
|
|
1219
|
+
server.registerTool(
|
|
1220
|
+
"export_annotated",
|
|
1221
|
+
{
|
|
1222
|
+
title: "Export Annotated Transcript",
|
|
1223
|
+
description: "Export transcript text with inline proofread annotations showing pending issues as [TYPE: \"original\" -> \"suggestion\"] markers.",
|
|
1224
|
+
inputSchema: {
|
|
1225
|
+
transcript_id: z.string().describe("Transcript ID"),
|
|
1226
|
+
},
|
|
1227
|
+
},
|
|
1228
|
+
async ({ transcript_id }) => {
|
|
1229
|
+
try {
|
|
1230
|
+
const text = exportAnnotated(transcript_id);
|
|
1231
|
+
return { content: [{ type: "text", text }] };
|
|
1232
|
+
} catch (error) {
|
|
1233
|
+
return { content: [{ type: "text", text: `Export failed: ${error instanceof Error ? error.message : error}` }], isError: true };
|
|
1234
|
+
}
|
|
1235
|
+
}
|
|
1236
|
+
);
|
|
1237
|
+
|
|
994
1238
|
// ---------------------------------------------------------------------------
|
|
995
1239
|
// search_tools / describe_tools
|
|
996
1240
|
// ---------------------------------------------------------------------------
|
|
@@ -1021,6 +1265,16 @@ server.registerTool(
|
|
|
1021
1265
|
"get_config",
|
|
1022
1266
|
"set_config",
|
|
1023
1267
|
"reset_config",
|
|
1268
|
+
"list_comments",
|
|
1269
|
+
"top_comments",
|
|
1270
|
+
"search_comments",
|
|
1271
|
+
"comment_stats",
|
|
1272
|
+
"proofread_transcript",
|
|
1273
|
+
"list_proofread_issues",
|
|
1274
|
+
"apply_suggestion",
|
|
1275
|
+
"dismiss_issue",
|
|
1276
|
+
"proofread_stats",
|
|
1277
|
+
"export_annotated",
|
|
1024
1278
|
"search_tools",
|
|
1025
1279
|
"describe_tools",
|
|
1026
1280
|
];
|
|
@@ -1048,6 +1302,12 @@ server.registerTool(
|
|
|
1048
1302
|
export_transcript: "Export as txt/srt/json. Params: id, format?",
|
|
1049
1303
|
transcript_stats: "Counts by status and provider.",
|
|
1050
1304
|
check_providers: "Check which API keys are configured.",
|
|
1305
|
+
proofread_transcript: "AI spellcheck/proofread. Params: id, types?, confidence_threshold?, provider?",
|
|
1306
|
+
list_proofread_issues: "List proofread issues. Params: transcript_id, issue_type?, status?",
|
|
1307
|
+
apply_suggestion: "Apply a proofread suggestion. Params: issue_id",
|
|
1308
|
+
dismiss_issue: "Dismiss a proofread issue. Params: issue_id",
|
|
1309
|
+
proofread_stats: "Proofread stats. Params: transcript_id",
|
|
1310
|
+
export_annotated: "Export with inline annotations. Params: transcript_id",
|
|
1051
1311
|
};
|
|
1052
1312
|
const result = names.map((n) => `${n}: ${descriptions[n] || "See tool schema"}`).join("\n");
|
|
1053
1313
|
return { content: [{ type: "text" as const, text: result }] };
|
package/package.json
CHANGED