@hasna/microservices 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. package/microservices/microservice-social/package.json +2 -1
  2. package/microservices/microservice-social/src/cli/index.ts +906 -12
  3. package/microservices/microservice-social/src/db/migrations.ts +72 -0
  4. package/microservices/microservice-social/src/db/social.ts +33 -3
  5. package/microservices/microservice-social/src/lib/audience.ts +353 -0
  6. package/microservices/microservice-social/src/lib/content-ai.ts +278 -0
  7. package/microservices/microservice-social/src/lib/media.ts +311 -0
  8. package/microservices/microservice-social/src/lib/mentions.ts +434 -0
  9. package/microservices/microservice-social/src/lib/metrics-sync.ts +264 -0
  10. package/microservices/microservice-social/src/lib/publisher.ts +377 -0
  11. package/microservices/microservice-social/src/lib/scheduler.ts +229 -0
  12. package/microservices/microservice-social/src/lib/sentiment.ts +256 -0
  13. package/microservices/microservice-social/src/lib/threads.ts +291 -0
  14. package/microservices/microservice-social/src/mcp/index.ts +776 -6
  15. package/microservices/microservice-social/src/server/index.ts +441 -0
  16. package/microservices/microservice-transcriber/src/cli/index.ts +247 -1
  17. package/microservices/microservice-transcriber/src/db/comments.ts +166 -0
  18. package/microservices/microservice-transcriber/src/db/migrations.ts +46 -0
  19. package/microservices/microservice-transcriber/src/db/proofread.ts +119 -0
  20. package/microservices/microservice-transcriber/src/lib/downloader.ts +68 -0
  21. package/microservices/microservice-transcriber/src/lib/proofread.ts +296 -0
  22. package/microservices/microservice-transcriber/src/mcp/index.ts +263 -3
  23. package/package.json +1 -1
@@ -22,7 +22,8 @@ import {
22
22
  type TranscriptStatus,
23
23
  type TranscriptSourceType,
24
24
  } from "../db/transcripts.js";
25
- import { prepareAudio, detectSourceType, getVideoInfo, downloadAudio, downloadVideo, createClip, isPlaylistUrl, getPlaylistUrls, type TrimOptions } from "../lib/downloader.js";
25
+ import { prepareAudio, detectSourceType, getVideoInfo, downloadAudio, downloadVideo, createClip, isPlaylistUrl, getPlaylistUrls, fetchComments, type TrimOptions } from "../lib/downloader.js";
26
+ import { createComment, listComments, searchComments, getCommentStats, getTopComments, importComments } from "../db/comments.js";
26
27
  import { transcribeFile, checkProviders, toSrt, toVtt, toAss, toMarkdown, segmentByChapters, formatWithConfidence, estimateCost } from "../lib/providers.js";
27
28
  import { getConfig, setConfig, resetConfig, CONFIG_DEFAULTS, CONFIG_KEYS, type ConfigKey } from "../lib/config.js";
28
29
  import { summarizeText, extractHighlights, generateMeetingNotes, getDefaultSummaryProvider } from "../lib/summarizer.js";
@@ -33,6 +34,7 @@ import { createAnnotation, listAnnotations, deleteAnnotation, formatTimestamp as
33
34
  import { pushToNotion } from "../lib/notion.js";
34
35
  import { startLiveTranscription } from "../lib/live.js";
35
36
  import { wordDiff, formatDiff, diffStats } from "../lib/diff.js";
37
+ import { proofreadTranscript, listIssues, applySuggestion, dismissIssue, getProofreadStats, exportAnnotated, type IssueType } from "../lib/proofread.js";
36
38
 
37
39
  const program = new Command();
38
40
 
@@ -56,6 +58,7 @@ program
56
58
  .option("--diarize", "Identify different speakers (ElevenLabs only)")
57
59
  .option("--vocab <words>", "Custom vocabulary hints (comma-separated, e.g. 'Karpathy,MicroGPT,SABR')")
58
60
  .option("--summarize", "Auto-summarize after transcription using AI")
61
+ .option("--comments", "Also fetch and store YouTube/Vimeo comments")
59
62
  .option("--force", "Re-transcribe even if URL was already transcribed")
60
63
  .option("--json", "Output as JSON")
61
64
  .action(async (rawSources: string[], opts) => {
@@ -202,6 +205,33 @@ program
202
205
  word_count: result.text.split(/\s+/).filter(Boolean).length, timestamp: new Date().toISOString(),
203
206
  });
204
207
 
208
+ // Fetch comments if requested
209
+ if (opts.comments && (sourceType === "youtube" || sourceType === "vimeo")) {
210
+ try {
211
+ if (!opts.json) process.stdout.write(" Fetching comments...");
212
+ const rawComments = await fetchComments(source);
213
+ if (rawComments.length > 0) {
214
+ const mapped = rawComments.map((c) => ({
215
+ platform: sourceType,
216
+ author: c.author,
217
+ author_handle: c.author_id,
218
+ comment_text: c.text,
219
+ likes: c.like_count,
220
+ reply_count: 0,
221
+ is_reply: c.parent !== null,
222
+ parent_comment_id: c.parent,
223
+ published_at: c.timestamp ? new Date(c.timestamp * 1000).toISOString() : null,
224
+ }));
225
+ importComments(record.id, mapped);
226
+ if (!opts.json) console.log(` ${rawComments.length} comment(s) imported.`);
227
+ } else {
228
+ if (!opts.json) console.log(" no comments found.");
229
+ }
230
+ } catch (e) {
231
+ if (!opts.json) console.error(` Warning: comment fetch failed — ${e instanceof Error ? e.message : e}`);
232
+ }
233
+ }
234
+
205
235
  results.push({ source, id: record.id, success: true });
206
236
 
207
237
  if (opts.json && !isBatch) {
@@ -1171,6 +1201,92 @@ annoCmd
1171
1201
  else { console.error("Annotation not found."); process.exit(1); }
1172
1202
  });
1173
1203
 
1204
+ // ---------------------------------------------------------------------------
1205
+ // comments
1206
+ // ---------------------------------------------------------------------------
1207
+
1208
+ const commentsCmd = program
1209
+ .command("comments")
1210
+ .description("Manage video comments extracted from YouTube/Vimeo");
1211
+
1212
+ commentsCmd
1213
+ .command("list <transcript-id>")
1214
+ .description("List comments for a transcript")
1215
+ .option("--top", "Sort by most liked")
1216
+ .option("--limit <n>", "Max results", "20")
1217
+ .option("--json", "Output as JSON")
1218
+ .action((transcriptId: string, opts) => {
1219
+ const comments = listComments(transcriptId, {
1220
+ limit: parseInt(opts.limit),
1221
+ top: opts.top,
1222
+ });
1223
+
1224
+ if (opts.json) {
1225
+ console.log(JSON.stringify(comments, null, 2));
1226
+ return;
1227
+ }
1228
+
1229
+ if (comments.length === 0) {
1230
+ console.log("No comments found.");
1231
+ return;
1232
+ }
1233
+
1234
+ for (const c of comments) {
1235
+ const likesStr = c.likes > 0 ? ` [${c.likes} likes]` : "";
1236
+ const replyStr = c.is_reply ? " (reply)" : "";
1237
+ console.log(`${c.author ?? "Anonymous"}${replyStr}${likesStr}`);
1238
+ console.log(` ${c.comment_text.slice(0, 200)}${c.comment_text.length > 200 ? "..." : ""}`);
1239
+ console.log();
1240
+ }
1241
+ });
1242
+
1243
+ commentsCmd
1244
+ .command("search <query>")
1245
+ .description("Search comment text across all transcripts")
1246
+ .option("--json", "Output as JSON")
1247
+ .action((query: string, opts) => {
1248
+ const results = searchComments(query);
1249
+
1250
+ if (opts.json) {
1251
+ console.log(JSON.stringify(results, null, 2));
1252
+ return;
1253
+ }
1254
+
1255
+ if (results.length === 0) {
1256
+ console.log(`No comments matching '${query}'.`);
1257
+ return;
1258
+ }
1259
+
1260
+ console.log(`Found ${results.length} comment(s):\n`);
1261
+ for (const c of results) {
1262
+ const likesStr = c.likes > 0 ? ` [${c.likes} likes]` : "";
1263
+ console.log(`${c.author ?? "Anonymous"}${likesStr} (transcript: ${c.transcript_id.slice(0, 8)})`);
1264
+ console.log(` ${c.comment_text.slice(0, 200)}${c.comment_text.length > 200 ? "..." : ""}`);
1265
+ console.log();
1266
+ }
1267
+ });
1268
+
1269
+ commentsCmd
1270
+ .command("stats <transcript-id>")
1271
+ .description("Show comment statistics for a transcript")
1272
+ .option("--json", "Output as JSON")
1273
+ .action((transcriptId: string, opts) => {
1274
+ const stats = getCommentStats(transcriptId);
1275
+
1276
+ if (opts.json) {
1277
+ console.log(JSON.stringify(stats, null, 2));
1278
+ return;
1279
+ }
1280
+
1281
+ console.log(`Total comments: ${stats.total}`);
1282
+ console.log(`Replies: ${stats.replies}`);
1283
+ console.log(`Unique authors: ${stats.unique_authors}`);
1284
+ console.log(`Avg likes: ${stats.avg_likes}`);
1285
+ if (stats.top_commenter) {
1286
+ console.log(`Top commenter: ${stats.top_commenter}`);
1287
+ }
1288
+ });
1289
+
1174
1290
  // ---------------------------------------------------------------------------
1175
1291
  // watch-feed
1176
1292
  // ---------------------------------------------------------------------------
@@ -1344,4 +1460,134 @@ configCmd
1344
1460
  console.log("Config reset to defaults.");
1345
1461
  });
1346
1462
 
1463
+ // ---------------------------------------------------------------------------
1464
+ // proofread
1465
+ // ---------------------------------------------------------------------------
1466
+
1467
+ const proofreadCmd = program
1468
+ .command("proofread")
1469
+ .description("AI-powered spellcheck and proofreading for transcripts");
1470
+
1471
+ proofreadCmd
1472
+ .command("run <transcript-id>")
1473
+ .description("Run AI proofreading on a transcript (non-destructive)")
1474
+ .option("--types <types>", "Comma-separated issue types: spelling,grammar,punctuation,clarity")
1475
+ .option("--confidence <n>", "Minimum confidence threshold 0-1 (default 0.7)", parseFloat)
1476
+ .option("--provider <provider>", "AI provider: openai or anthropic")
1477
+ .option("--json", "Output as JSON")
1478
+ .action(async (transcriptId: string, opts) => {
1479
+ const types = opts.types ? opts.types.split(",").map((t: string) => t.trim()) as IssueType[] : undefined;
1480
+ const confidence = opts.confidence ?? 0.7;
1481
+
1482
+ if (!opts.json) console.log(`Proofreading transcript ${transcriptId}...`);
1483
+
1484
+ try {
1485
+ const issues = await proofreadTranscript(transcriptId, { types, confidence_threshold: confidence, provider: opts.provider });
1486
+
1487
+ if (opts.json) {
1488
+ console.log(JSON.stringify(issues, null, 2));
1489
+ } else {
1490
+ console.log(`Found ${issues.length} issue(s):\n`);
1491
+ for (const issue of issues) {
1492
+ console.log(` [${issue.issue_type}] "${issue.original_text}" -> "${issue.suggestion ?? "(no suggestion)"}" (${((issue.confidence ?? 0) * 100).toFixed(0)}%)`);
1493
+ if (issue.explanation) console.log(` ${issue.explanation}`);
1494
+ }
1495
+ }
1496
+ } catch (error) {
1497
+ console.error(`Error: ${error instanceof Error ? error.message : error}`);
1498
+ process.exit(1);
1499
+ }
1500
+ });
1501
+
1502
+ proofreadCmd
1503
+ .command("issues <transcript-id>")
1504
+ .description("List proofread issues for a transcript")
1505
+ .option("--type <type>", "Filter by issue type: spelling, grammar, punctuation, clarity")
1506
+ .option("--pending", "Show only pending issues")
1507
+ .option("--json", "Output as JSON")
1508
+ .action((transcriptId: string, opts) => {
1509
+ const filters: { issue_type?: IssueType; status?: "pending" } = {};
1510
+ if (opts.type) filters.issue_type = opts.type as IssueType;
1511
+ if (opts.pending) filters.status = "pending";
1512
+
1513
+ const issues = listIssues(transcriptId, filters);
1514
+
1515
+ if (opts.json) {
1516
+ console.log(JSON.stringify(issues, null, 2));
1517
+ return;
1518
+ }
1519
+
1520
+ if (issues.length === 0) { console.log("No issues found."); return; }
1521
+
1522
+ for (const issue of issues) {
1523
+ const conf = issue.confidence !== null ? ` ${(issue.confidence * 100).toFixed(0)}%` : "";
1524
+ console.log(`${issue.id.slice(0, 8)} [${issue.status.padEnd(9)}] [${issue.issue_type.padEnd(11)}]${conf} "${issue.original_text}" -> "${issue.suggestion ?? "-"}"`);
1525
+ }
1526
+ });
1527
+
1528
+ proofreadCmd
1529
+ .command("apply <issue-id>")
1530
+ .description("Apply a proofread suggestion (modifies transcript text)")
1531
+ .option("--json", "Output as JSON")
1532
+ .action((issueId: string, opts) => {
1533
+ const updated = applySuggestion(issueId);
1534
+ if (!updated) { console.error(`Issue '${issueId}' not found.`); process.exit(1); }
1535
+
1536
+ if (opts.json) {
1537
+ console.log(JSON.stringify(updated, null, 2));
1538
+ } else {
1539
+ console.log(`Applied: "${updated.original_text}" -> "${updated.suggestion}"`);
1540
+ }
1541
+ });
1542
+
1543
+ proofreadCmd
1544
+ .command("dismiss <issue-id>")
1545
+ .description("Dismiss a proofread issue without changing text")
1546
+ .option("--json", "Output as JSON")
1547
+ .action((issueId: string, opts) => {
1548
+ const updated = dismissIssue(issueId);
1549
+ if (!updated) { console.error(`Issue '${issueId}' not found.`); process.exit(1); }
1550
+
1551
+ if (opts.json) {
1552
+ console.log(JSON.stringify(updated, null, 2));
1553
+ } else {
1554
+ console.log(`Dismissed: "${updated.original_text}"`);
1555
+ }
1556
+ });
1557
+
1558
+ proofreadCmd
1559
+ .command("export <transcript-id>")
1560
+ .description("Export transcript with inline proofread annotations")
1561
+ .action((transcriptId: string) => {
1562
+ try {
1563
+ const annotated = exportAnnotated(transcriptId);
1564
+ console.log(annotated);
1565
+ } catch (error) {
1566
+ console.error(`Error: ${error instanceof Error ? error.message : error}`);
1567
+ process.exit(1);
1568
+ }
1569
+ });
1570
+
1571
+ proofreadCmd
1572
+ .command("stats <transcript-id>")
1573
+ .description("Show proofread issue statistics")
1574
+ .option("--json", "Output as JSON")
1575
+ .action((transcriptId: string, opts) => {
1576
+ const stats = getProofreadStats(transcriptId);
1577
+
1578
+ if (opts.json) {
1579
+ console.log(JSON.stringify(stats, null, 2));
1580
+ return;
1581
+ }
1582
+
1583
+ console.log(`Total issues: ${stats.total}`);
1584
+ console.log(`Pending: ${stats.pending} | Applied: ${stats.applied} | Dismissed: ${stats.dismissed}`);
1585
+ if (Object.keys(stats.by_type).length > 0) {
1586
+ console.log("\nBy type:");
1587
+ for (const [type, count] of Object.entries(stats.by_type)) {
1588
+ console.log(` ${type.padEnd(12)} ${count}`);
1589
+ }
1590
+ }
1591
+ });
1592
+
1347
1593
  program.parse();
@@ -0,0 +1,166 @@
1
+ import { getDatabase } from "./database.js";
2
+
3
+ export interface Comment {
4
+ id: string;
5
+ transcript_id: string;
6
+ platform: string;
7
+ author: string | null;
8
+ author_handle: string | null;
9
+ comment_text: string;
10
+ likes: number;
11
+ reply_count: number;
12
+ is_reply: number;
13
+ parent_comment_id: string | null;
14
+ published_at: string | null;
15
+ created_at: string;
16
+ }
17
+
18
+ export interface CreateCommentInput {
19
+ transcript_id: string;
20
+ platform?: string;
21
+ author?: string | null;
22
+ author_handle?: string | null;
23
+ comment_text: string;
24
+ likes?: number;
25
+ reply_count?: number;
26
+ is_reply?: boolean;
27
+ parent_comment_id?: string | null;
28
+ published_at?: string | null;
29
+ }
30
+
31
+ export interface ListCommentsOptions {
32
+ limit?: number;
33
+ offset?: number;
34
+ top?: boolean;
35
+ }
36
+
37
+ export interface CommentStats {
38
+ total: number;
39
+ replies: number;
40
+ unique_authors: number;
41
+ avg_likes: number;
42
+ top_commenter: string | null;
43
+ }
44
+
45
+ export function createComment(data: CreateCommentInput): Comment {
46
+ const db = getDatabase();
47
+ const id = crypto.randomUUID();
48
+
49
+ db.prepare(`
50
+ INSERT INTO transcript_comments (id, transcript_id, platform, author, author_handle, comment_text, likes, reply_count, is_reply, parent_comment_id, published_at)
51
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
52
+ `).run(
53
+ id,
54
+ data.transcript_id,
55
+ data.platform ?? "youtube",
56
+ data.author ?? null,
57
+ data.author_handle ?? null,
58
+ data.comment_text,
59
+ data.likes ?? 0,
60
+ data.reply_count ?? 0,
61
+ data.is_reply ? 1 : 0,
62
+ data.parent_comment_id ?? null,
63
+ data.published_at ?? null,
64
+ );
65
+
66
+ return getComment(id)!;
67
+ }
68
+
69
+ export function getComment(id: string): Comment | null {
70
+ const db = getDatabase();
71
+ return db.prepare("SELECT * FROM transcript_comments WHERE id = ?").get(id) as Comment | null;
72
+ }
73
+
74
+ export function listComments(transcriptId: string, options: ListCommentsOptions = {}): Comment[] {
75
+ const db = getDatabase();
76
+ const limit = options.limit ?? 50;
77
+ const offset = options.offset ?? 0;
78
+ const orderBy = options.top ? "likes DESC" : "created_at ASC";
79
+
80
+ return db
81
+ .prepare(`SELECT * FROM transcript_comments WHERE transcript_id = ? ORDER BY ${orderBy} LIMIT ? OFFSET ?`)
82
+ .all(transcriptId, limit, offset) as Comment[];
83
+ }
84
+
85
+ export function deleteComment(id: string): boolean {
86
+ const db = getDatabase();
87
+ return db.prepare("DELETE FROM transcript_comments WHERE id = ?").run(id).changes > 0;
88
+ }
89
+
90
+ export function getTopComments(transcriptId: string, limit = 10): Comment[] {
91
+ const db = getDatabase();
92
+ return db
93
+ .prepare("SELECT * FROM transcript_comments WHERE transcript_id = ? ORDER BY likes DESC LIMIT ?")
94
+ .all(transcriptId, limit) as Comment[];
95
+ }
96
+
97
+ export function searchComments(query: string): Comment[] {
98
+ const db = getDatabase();
99
+ const q = `%${query}%`;
100
+ return db
101
+ .prepare("SELECT * FROM transcript_comments WHERE comment_text LIKE ? ORDER BY likes DESC LIMIT 50")
102
+ .all(q) as Comment[];
103
+ }
104
+
105
+ export function getCommentStats(transcriptId: string): CommentStats {
106
+ const db = getDatabase();
107
+
108
+ const total = (
109
+ db.prepare("SELECT COUNT(*) as n FROM transcript_comments WHERE transcript_id = ?").get(transcriptId) as { n: number }
110
+ ).n;
111
+
112
+ const replies = (
113
+ db.prepare("SELECT COUNT(*) as n FROM transcript_comments WHERE transcript_id = ? AND is_reply = 1").get(transcriptId) as { n: number }
114
+ ).n;
115
+
116
+ const uniqueAuthors = (
117
+ db.prepare("SELECT COUNT(DISTINCT author) as n FROM transcript_comments WHERE transcript_id = ? AND author IS NOT NULL").get(transcriptId) as { n: number }
118
+ ).n;
119
+
120
+ const avgLikes = (
121
+ db.prepare("SELECT AVG(likes) as avg FROM transcript_comments WHERE transcript_id = ?").get(transcriptId) as { avg: number | null }
122
+ ).avg ?? 0;
123
+
124
+ const topRow = db
125
+ .prepare("SELECT author, COUNT(*) as cnt FROM transcript_comments WHERE transcript_id = ? AND author IS NOT NULL GROUP BY author ORDER BY cnt DESC LIMIT 1")
126
+ .get(transcriptId) as { author: string; cnt: number } | null;
127
+
128
+ return {
129
+ total,
130
+ replies,
131
+ unique_authors: uniqueAuthors,
132
+ avg_likes: Math.round(avgLikes * 100) / 100,
133
+ top_commenter: topRow?.author ?? null,
134
+ };
135
+ }
136
+
137
+ export function importComments(transcriptId: string, comments: Array<Omit<CreateCommentInput, "transcript_id">>): number {
138
+ const db = getDatabase();
139
+ const stmt = db.prepare(`
140
+ INSERT INTO transcript_comments (id, transcript_id, platform, author, author_handle, comment_text, likes, reply_count, is_reply, parent_comment_id, published_at)
141
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
142
+ `);
143
+
144
+ let count = 0;
145
+ const transaction = db.transaction(() => {
146
+ for (const c of comments) {
147
+ stmt.run(
148
+ crypto.randomUUID(),
149
+ transcriptId,
150
+ c.platform ?? "youtube",
151
+ c.author ?? null,
152
+ c.author_handle ?? null,
153
+ c.comment_text,
154
+ c.likes ?? 0,
155
+ c.reply_count ?? 0,
156
+ c.is_reply ? 1 : 0,
157
+ c.parent_comment_id ?? null,
158
+ c.published_at ?? null,
159
+ );
160
+ count++;
161
+ }
162
+ });
163
+ transaction();
164
+
165
+ return count;
166
+ }
@@ -69,4 +69,50 @@ export const MIGRATIONS: MigrationEntry[] = [
69
69
  CREATE INDEX IF NOT EXISTS idx_annotations_transcript ON annotations(transcript_id);
70
70
  `,
71
71
  },
72
+ {
73
+ id: 5,
74
+ name: "add_transcript_comments",
75
+ sql: `
76
+ CREATE TABLE IF NOT EXISTS transcript_comments (
77
+ id TEXT PRIMARY KEY,
78
+ transcript_id TEXT NOT NULL,
79
+ platform TEXT NOT NULL DEFAULT 'youtube',
80
+ author TEXT,
81
+ author_handle TEXT,
82
+ comment_text TEXT NOT NULL,
83
+ likes INTEGER DEFAULT 0,
84
+ reply_count INTEGER DEFAULT 0,
85
+ is_reply INTEGER DEFAULT 0,
86
+ parent_comment_id TEXT,
87
+ published_at TEXT,
88
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
89
+ FOREIGN KEY (transcript_id) REFERENCES transcripts(id) ON DELETE CASCADE
90
+ );
91
+ CREATE INDEX IF NOT EXISTS idx_comments_transcript ON transcript_comments(transcript_id);
92
+ CREATE INDEX IF NOT EXISTS idx_comments_likes ON transcript_comments(likes DESC);
93
+ `,
94
+ },
95
+ {
96
+ id: 6,
97
+ name: "add_proofread_issues",
98
+ sql: `
99
+ CREATE TABLE proofread_issues (
100
+ id TEXT PRIMARY KEY,
101
+ transcript_id TEXT NOT NULL,
102
+ issue_type TEXT NOT NULL CHECK(issue_type IN ('spelling','grammar','punctuation','clarity')),
103
+ position_start INTEGER,
104
+ position_end INTEGER,
105
+ original_text TEXT NOT NULL,
106
+ suggestion TEXT,
107
+ confidence REAL,
108
+ explanation TEXT,
109
+ status TEXT DEFAULT 'pending' CHECK(status IN ('pending','applied','dismissed')),
110
+ created_at TEXT NOT NULL DEFAULT (datetime('now')),
111
+ FOREIGN KEY (transcript_id) REFERENCES transcripts(id) ON DELETE CASCADE
112
+ );
113
+ CREATE INDEX idx_proofread_transcript ON proofread_issues(transcript_id);
114
+ CREATE INDEX idx_proofread_type ON proofread_issues(issue_type);
115
+ CREATE INDEX idx_proofread_status ON proofread_issues(status);
116
+ `,
117
+ },
72
118
  ];
@@ -0,0 +1,119 @@
1
+ import { getDatabase } from "./database.js";
2
+
3
+ export type IssueType = "spelling" | "grammar" | "punctuation" | "clarity";
4
+ export type IssueStatus = "pending" | "applied" | "dismissed";
5
+
6
+ export interface ProofreadIssue {
7
+ id: string;
8
+ transcript_id: string;
9
+ issue_type: IssueType;
10
+ position_start: number | null;
11
+ position_end: number | null;
12
+ original_text: string;
13
+ suggestion: string | null;
14
+ confidence: number | null;
15
+ explanation: string | null;
16
+ status: IssueStatus;
17
+ created_at: string;
18
+ }
19
+
20
+ export interface CreateProofreadIssueInput {
21
+ transcript_id: string;
22
+ issue_type: IssueType;
23
+ position_start?: number;
24
+ position_end?: number;
25
+ original_text: string;
26
+ suggestion?: string;
27
+ confidence?: number;
28
+ explanation?: string;
29
+ }
30
+
31
+ export interface ListProofreadIssuesOptions {
32
+ issue_type?: IssueType;
33
+ status?: IssueStatus;
34
+ }
35
+
36
+ export function createProofreadIssue(input: CreateProofreadIssueInput): ProofreadIssue {
37
+ const db = getDatabase();
38
+ const id = crypto.randomUUID();
39
+ db.prepare(`
40
+ INSERT INTO proofread_issues (id, transcript_id, issue_type, position_start, position_end, original_text, suggestion, confidence, explanation)
41
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
42
+ `).run(
43
+ id,
44
+ input.transcript_id,
45
+ input.issue_type,
46
+ input.position_start ?? null,
47
+ input.position_end ?? null,
48
+ input.original_text,
49
+ input.suggestion ?? null,
50
+ input.confidence ?? null,
51
+ input.explanation ?? null
52
+ );
53
+ return getProofreadIssue(id)!;
54
+ }
55
+
56
+ export function getProofreadIssue(id: string): ProofreadIssue | null {
57
+ const db = getDatabase();
58
+ const row = db.prepare("SELECT * FROM proofread_issues WHERE id = ?").get(id) as ProofreadIssue | null;
59
+ return row ?? null;
60
+ }
61
+
62
+ export function listProofreadIssues(transcriptId: string, options: ListProofreadIssuesOptions = {}): ProofreadIssue[] {
63
+ const db = getDatabase();
64
+ const conditions: string[] = ["transcript_id = ?"];
65
+ const values: unknown[] = [transcriptId];
66
+
67
+ if (options.issue_type) { conditions.push("issue_type = ?"); values.push(options.issue_type); }
68
+ if (options.status) { conditions.push("status = ?"); values.push(options.status); }
69
+
70
+ const where = conditions.join(" AND ");
71
+ return db
72
+ .prepare(`SELECT * FROM proofread_issues WHERE ${where} ORDER BY position_start ASC, created_at ASC`)
73
+ .all(...values) as ProofreadIssue[];
74
+ }
75
+
76
+ export function updateIssueStatus(id: string, status: IssueStatus): ProofreadIssue | null {
77
+ const db = getDatabase();
78
+ const existing = getProofreadIssue(id);
79
+ if (!existing) return null;
80
+ db.prepare("UPDATE proofread_issues SET status = ? WHERE id = ?").run(status, id);
81
+ return getProofreadIssue(id);
82
+ }
83
+
84
+ export function deleteProofreadIssuesByTranscript(transcriptId: string): number {
85
+ const db = getDatabase();
86
+ return db.prepare("DELETE FROM proofread_issues WHERE transcript_id = ?").run(transcriptId).changes;
87
+ }
88
+
89
+ export interface ProofreadStats {
90
+ total: number;
91
+ by_type: Record<string, number>;
92
+ pending: number;
93
+ applied: number;
94
+ dismissed: number;
95
+ }
96
+
97
+ export function getProofreadStats(transcriptId: string): ProofreadStats {
98
+ const db = getDatabase();
99
+
100
+ const total = (db.prepare("SELECT COUNT(*) as n FROM proofread_issues WHERE transcript_id = ?").get(transcriptId) as { n: number }).n;
101
+
102
+ const byType = db
103
+ .prepare("SELECT issue_type, COUNT(*) as n FROM proofread_issues WHERE transcript_id = ? GROUP BY issue_type")
104
+ .all(transcriptId) as { issue_type: string; n: number }[];
105
+
106
+ const byStatus = db
107
+ .prepare("SELECT status, COUNT(*) as n FROM proofread_issues WHERE transcript_id = ? GROUP BY status")
108
+ .all(transcriptId) as { status: string; n: number }[];
109
+
110
+ const statusMap = Object.fromEntries(byStatus.map((r) => [r.status, r.n]));
111
+
112
+ return {
113
+ total,
114
+ by_type: Object.fromEntries(byType.map((r) => [r.issue_type, r.n])),
115
+ pending: statusMap["pending"] ?? 0,
116
+ applied: statusMap["applied"] ?? 0,
117
+ dismissed: statusMap["dismissed"] ?? 0,
118
+ };
119
+ }
@@ -556,6 +556,74 @@ export async function splitAudioIntoChunks(
556
556
  return chunks;
557
557
  }
558
558
 
559
+ /**
560
+ * Raw comment from yt-dlp .info.json comments array.
561
+ */
562
+ export interface RawComment {
563
+ author: string | null;
564
+ author_id: string | null;
565
+ text: string;
566
+ like_count: number;
567
+ timestamp: number | null;
568
+ parent: string | null; // "root" for top-level, comment id for replies
569
+ id: string;
570
+ }
571
+
572
+ /**
573
+ * Fetch comments for a video URL using yt-dlp --write-comments.
574
+ * Downloads only the .info.json (no media) and parses the comments array.
575
+ */
576
+ export async function fetchComments(url: string): Promise<RawComment[]> {
577
+ const tempId = crypto.randomUUID();
578
+ const outputTemplate = join(tmpdir(), `comments-${tempId}`);
579
+
580
+ const proc = Bun.spawn(
581
+ [ytdlp(), "--write-comments", "--skip-download", "--no-write-thumbnail", "-o", outputTemplate, url],
582
+ { stdout: "pipe", stderr: "pipe" }
583
+ );
584
+
585
+ const [exitCode, , stderr] = await Promise.all([
586
+ proc.exited,
587
+ new Response(proc.stdout).text(),
588
+ new Response(proc.stderr).text(),
589
+ ]);
590
+
591
+ if (exitCode !== 0) {
592
+ throw new Error(`yt-dlp comment fetch failed (exit ${exitCode}): ${stderr.trim()}`);
593
+ }
594
+
595
+ // yt-dlp writes <output>.info.json
596
+ const infoPath = `${outputTemplate}.info.json`;
597
+ const { readFileSync, unlinkSync: unlinkFile, existsSync: fileExists } = await import("node:fs");
598
+
599
+ if (!fileExists(infoPath)) {
600
+ throw new Error("yt-dlp did not produce an info.json file for comments");
601
+ }
602
+
603
+ try {
604
+ const raw = JSON.parse(readFileSync(infoPath, "utf8"));
605
+ const comments: RawComment[] = [];
606
+
607
+ if (Array.isArray(raw.comments)) {
608
+ for (const c of raw.comments) {
609
+ comments.push({
610
+ author: c.author ?? null,
611
+ author_id: c.author_id ?? null,
612
+ text: typeof c.text === "string" ? c.text : String(c.text ?? ""),
613
+ like_count: typeof c.like_count === "number" ? c.like_count : 0,
614
+ timestamp: typeof c.timestamp === "number" ? c.timestamp : null,
615
+ parent: c.parent === "root" ? null : (c.parent ?? null),
616
+ id: c.id ?? crypto.randomUUID(),
617
+ });
618
+ }
619
+ }
620
+
621
+ return comments;
622
+ } finally {
623
+ try { unlinkFile(infoPath); } catch {}
624
+ }
625
+ }
626
+
559
627
  /**
560
628
  * Check whether yt-dlp is available on the system.
561
629
  */