npm - @hasna/microservices - Versions diffs - 0.0.7 → 0.0.9 - Mend

@hasna/microservices 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/microservices/microservice-transcriber/src/cli/index.ts CHANGED Viewed

@@ -22,7 +22,8 @@ import {
   type TranscriptStatus,
   type TranscriptSourceType,
 } from "../db/transcripts.js";
-import { prepareAudio, detectSourceType, getVideoInfo, downloadAudio, downloadVideo, createClip, isPlaylistUrl, getPlaylistUrls, type TrimOptions } from "../lib/downloader.js";
+import { prepareAudio, detectSourceType, getVideoInfo, downloadAudio, downloadVideo, createClip, isPlaylistUrl, getPlaylistUrls, fetchComments, type TrimOptions } from "../lib/downloader.js";
+import { createComment, listComments, searchComments, getCommentStats, getTopComments, importComments } from "../db/comments.js";
 import { transcribeFile, checkProviders, toSrt, toVtt, toAss, toMarkdown, segmentByChapters, formatWithConfidence, estimateCost } from "../lib/providers.js";
 import { getConfig, setConfig, resetConfig, CONFIG_DEFAULTS, CONFIG_KEYS, type ConfigKey } from "../lib/config.js";
 import { summarizeText, extractHighlights, generateMeetingNotes, getDefaultSummaryProvider } from "../lib/summarizer.js";
@@ -33,6 +34,7 @@ import { createAnnotation, listAnnotations, deleteAnnotation, formatTimestamp as
 import { pushToNotion } from "../lib/notion.js";
 import { startLiveTranscription } from "../lib/live.js";
 import { wordDiff, formatDiff, diffStats } from "../lib/diff.js";
+import { proofreadTranscript, listIssues, applySuggestion, dismissIssue, getProofreadStats, exportAnnotated, type IssueType } from "../lib/proofread.js";
 const program = new Command();
@@ -56,6 +58,7 @@ program
   .option("--diarize", "Identify different speakers (ElevenLabs only)")
   .option("--vocab <words>", "Custom vocabulary hints (comma-separated, e.g. 'Karpathy,MicroGPT,SABR')")
   .option("--summarize", "Auto-summarize after transcription using AI")
+  .option("--comments", "Also fetch and store YouTube/Vimeo comments")
   .option("--force", "Re-transcribe even if URL was already transcribed")
   .option("--json", "Output as JSON")
   .action(async (rawSources: string[], opts) => {
@@ -202,6 +205,33 @@ program
           word_count: result.text.split(/\s+/).filter(Boolean).length, timestamp: new Date().toISOString(),
         });
+        // Fetch comments if requested
+        if (opts.comments && (sourceType === "youtube" || sourceType === "vimeo")) {
+          try {
+            if (!opts.json) process.stdout.write("  Fetching comments...");
+            const rawComments = await fetchComments(source);
+            if (rawComments.length > 0) {
+              const mapped = rawComments.map((c) => ({
+                platform: sourceType,
+                author: c.author,
+                author_handle: c.author_id,
+                comment_text: c.text,
+                likes: c.like_count,
+                reply_count: 0,
+                is_reply: c.parent !== null,
+                parent_comment_id: c.parent,
+                published_at: c.timestamp ? new Date(c.timestamp * 1000).toISOString() : null,
+              }));
+              importComments(record.id, mapped);
+              if (!opts.json) console.log(` ${rawComments.length} comment(s) imported.`);
+            } else {
+              if (!opts.json) console.log(" no comments found.");
+            }
+          } catch (e) {
+            if (!opts.json) console.error(`  Warning: comment fetch failed — ${e instanceof Error ? e.message : e}`);
+          }
+        }
         results.push({ source, id: record.id, success: true });
         if (opts.json && !isBatch) {
@@ -1171,6 +1201,92 @@ annoCmd
     else { console.error("Annotation not found."); process.exit(1); }
   });
+// ---------------------------------------------------------------------------
+// comments
+// ---------------------------------------------------------------------------
+const commentsCmd = program
+  .command("comments")
+  .description("Manage video comments extracted from YouTube/Vimeo");
+commentsCmd
+  .command("list <transcript-id>")
+  .description("List comments for a transcript")
+  .option("--top", "Sort by most liked")
+  .option("--limit <n>", "Max results", "20")
+  .option("--json", "Output as JSON")
+  .action((transcriptId: string, opts) => {
+    const comments = listComments(transcriptId, {
+      limit: parseInt(opts.limit),
+      top: opts.top,
+    });
+    if (opts.json) {
+      console.log(JSON.stringify(comments, null, 2));
+      return;
+    }
+    if (comments.length === 0) {
+      console.log("No comments found.");
+      return;
+    }
+    for (const c of comments) {
+      const likesStr = c.likes > 0 ? ` [${c.likes} likes]` : "";
+      const replyStr = c.is_reply ? " (reply)" : "";
+      console.log(`${c.author ?? "Anonymous"}${replyStr}${likesStr}`);
+      console.log(`  ${c.comment_text.slice(0, 200)}${c.comment_text.length > 200 ? "..." : ""}`);
+      console.log();
+    }
+  });
+commentsCmd
+  .command("search <query>")
+  .description("Search comment text across all transcripts")
+  .option("--json", "Output as JSON")
+  .action((query: string, opts) => {
+    const results = searchComments(query);
+    if (opts.json) {
+      console.log(JSON.stringify(results, null, 2));
+      return;
+    }
+    if (results.length === 0) {
+      console.log(`No comments matching '${query}'.`);
+      return;
+    }
+    console.log(`Found ${results.length} comment(s):\n`);
+    for (const c of results) {
+      const likesStr = c.likes > 0 ? ` [${c.likes} likes]` : "";
+      console.log(`${c.author ?? "Anonymous"}${likesStr} (transcript: ${c.transcript_id.slice(0, 8)})`);
+      console.log(`  ${c.comment_text.slice(0, 200)}${c.comment_text.length > 200 ? "..." : ""}`);
+      console.log();
+    }
+  });
+commentsCmd
+  .command("stats <transcript-id>")
+  .description("Show comment statistics for a transcript")
+  .option("--json", "Output as JSON")
+  .action((transcriptId: string, opts) => {
+    const stats = getCommentStats(transcriptId);
+    if (opts.json) {
+      console.log(JSON.stringify(stats, null, 2));
+      return;
+    }
+    console.log(`Total comments:  ${stats.total}`);
+    console.log(`Replies:         ${stats.replies}`);
+    console.log(`Unique authors:  ${stats.unique_authors}`);
+    console.log(`Avg likes:       ${stats.avg_likes}`);
+    if (stats.top_commenter) {
+      console.log(`Top commenter:   ${stats.top_commenter}`);
+    }
+  });
 // ---------------------------------------------------------------------------
 // watch-feed
 // ---------------------------------------------------------------------------
@@ -1344,4 +1460,134 @@ configCmd
     console.log("Config reset to defaults.");
   });
+// ---------------------------------------------------------------------------
+// proofread
+// ---------------------------------------------------------------------------
+const proofreadCmd = program
+  .command("proofread")
+  .description("AI-powered spellcheck and proofreading for transcripts");
+proofreadCmd
+  .command("run <transcript-id>")
+  .description("Run AI proofreading on a transcript (non-destructive)")
+  .option("--types <types>", "Comma-separated issue types: spelling,grammar,punctuation,clarity")
+  .option("--confidence <n>", "Minimum confidence threshold 0-1 (default 0.7)", parseFloat)
+  .option("--provider <provider>", "AI provider: openai or anthropic")
+  .option("--json", "Output as JSON")
+  .action(async (transcriptId: string, opts) => {
+    const types = opts.types ? opts.types.split(",").map((t: string) => t.trim()) as IssueType[] : undefined;
+    const confidence = opts.confidence ?? 0.7;
+    if (!opts.json) console.log(`Proofreading transcript ${transcriptId}...`);
+    try {
+      const issues = await proofreadTranscript(transcriptId, { types, confidence_threshold: confidence, provider: opts.provider });
+      if (opts.json) {
+        console.log(JSON.stringify(issues, null, 2));
+      } else {
+        console.log(`Found ${issues.length} issue(s):\n`);
+        for (const issue of issues) {
+          console.log(`  [${issue.issue_type}] "${issue.original_text}" -> "${issue.suggestion ?? "(no suggestion)"}" (${((issue.confidence ?? 0) * 100).toFixed(0)}%)`);
+          if (issue.explanation) console.log(`    ${issue.explanation}`);
+        }
+      }
+    } catch (error) {
+      console.error(`Error: ${error instanceof Error ? error.message : error}`);
+      process.exit(1);
+    }
+  });
+proofreadCmd
+  .command("issues <transcript-id>")
+  .description("List proofread issues for a transcript")
+  .option("--type <type>", "Filter by issue type: spelling, grammar, punctuation, clarity")
+  .option("--pending", "Show only pending issues")
+  .option("--json", "Output as JSON")
+  .action((transcriptId: string, opts) => {
+    const filters: { issue_type?: IssueType; status?: "pending" } = {};
+    if (opts.type) filters.issue_type = opts.type as IssueType;
+    if (opts.pending) filters.status = "pending";
+    const issues = listIssues(transcriptId, filters);
+    if (opts.json) {
+      console.log(JSON.stringify(issues, null, 2));
+      return;
+    }
+    if (issues.length === 0) { console.log("No issues found."); return; }
+    for (const issue of issues) {
+      const conf = issue.confidence !== null ? ` ${(issue.confidence * 100).toFixed(0)}%` : "";
+      console.log(`${issue.id.slice(0, 8)}  [${issue.status.padEnd(9)}] [${issue.issue_type.padEnd(11)}]${conf}  "${issue.original_text}" -> "${issue.suggestion ?? "-"}"`);
+    }
+  });
+proofreadCmd
+  .command("apply <issue-id>")
+  .description("Apply a proofread suggestion (modifies transcript text)")
+  .option("--json", "Output as JSON")
+  .action((issueId: string, opts) => {
+    const updated = applySuggestion(issueId);
+    if (!updated) { console.error(`Issue '${issueId}' not found.`); process.exit(1); }
+    if (opts.json) {
+      console.log(JSON.stringify(updated, null, 2));
+    } else {
+      console.log(`Applied: "${updated.original_text}" -> "${updated.suggestion}"`);
+    }
+  });
+proofreadCmd
+  .command("dismiss <issue-id>")
+  .description("Dismiss a proofread issue without changing text")
+  .option("--json", "Output as JSON")
+  .action((issueId: string, opts) => {
+    const updated = dismissIssue(issueId);
+    if (!updated) { console.error(`Issue '${issueId}' not found.`); process.exit(1); }
+    if (opts.json) {
+      console.log(JSON.stringify(updated, null, 2));
+    } else {
+      console.log(`Dismissed: "${updated.original_text}"`);
+    }
+  });
+proofreadCmd
+  .command("export <transcript-id>")
+  .description("Export transcript with inline proofread annotations")
+  .action((transcriptId: string) => {
+    try {
+      const annotated = exportAnnotated(transcriptId);
+      console.log(annotated);
+    } catch (error) {
+      console.error(`Error: ${error instanceof Error ? error.message : error}`);
+      process.exit(1);
+    }
+  });
+proofreadCmd
+  .command("stats <transcript-id>")
+  .description("Show proofread issue statistics")
+  .option("--json", "Output as JSON")
+  .action((transcriptId: string, opts) => {
+    const stats = getProofreadStats(transcriptId);
+    if (opts.json) {
+      console.log(JSON.stringify(stats, null, 2));
+      return;
+    }
+    console.log(`Total issues: ${stats.total}`);
+    console.log(`Pending: ${stats.pending} | Applied: ${stats.applied} | Dismissed: ${stats.dismissed}`);
+    if (Object.keys(stats.by_type).length > 0) {
+      console.log("\nBy type:");
+      for (const [type, count] of Object.entries(stats.by_type)) {
+        console.log(`  ${type.padEnd(12)} ${count}`);
+      }
+    }
+  });
 program.parse();

package/microservices/microservice-transcriber/src/db/comments.ts ADDED Viewed

@@ -0,0 +1,166 @@
+import { getDatabase } from "./database.js";
+export interface Comment {
+  id: string;
+  transcript_id: string;
+  platform: string;
+  author: string | null;
+  author_handle: string | null;
+  comment_text: string;
+  likes: number;
+  reply_count: number;
+  is_reply: number;
+  parent_comment_id: string | null;
+  published_at: string | null;
+  created_at: string;
+}
+export interface CreateCommentInput {
+  transcript_id: string;
+  platform?: string;
+  author?: string | null;
+  author_handle?: string | null;
+  comment_text: string;
+  likes?: number;
+  reply_count?: number;
+  is_reply?: boolean;
+  parent_comment_id?: string | null;
+  published_at?: string | null;
+}
+export interface ListCommentsOptions {
+  limit?: number;
+  offset?: number;
+  top?: boolean;
+}
+export interface CommentStats {
+  total: number;
+  replies: number;
+  unique_authors: number;
+  avg_likes: number;
+  top_commenter: string | null;
+}
+export function createComment(data: CreateCommentInput): Comment {
+  const db = getDatabase();
+  const id = crypto.randomUUID();
+  db.prepare(`
+    INSERT INTO transcript_comments (id, transcript_id, platform, author, author_handle, comment_text, likes, reply_count, is_reply, parent_comment_id, published_at)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+  `).run(
+    id,
+    data.transcript_id,
+    data.platform ?? "youtube",
+    data.author ?? null,
+    data.author_handle ?? null,
+    data.comment_text,
+    data.likes ?? 0,
+    data.reply_count ?? 0,
+    data.is_reply ? 1 : 0,
+    data.parent_comment_id ?? null,
+    data.published_at ?? null,
+  );
+  return getComment(id)!;
+}
+export function getComment(id: string): Comment | null {
+  const db = getDatabase();
+  return db.prepare("SELECT * FROM transcript_comments WHERE id = ?").get(id) as Comment | null;
+}
+export function listComments(transcriptId: string, options: ListCommentsOptions = {}): Comment[] {
+  const db = getDatabase();
+  const limit = options.limit ?? 50;
+  const offset = options.offset ?? 0;
+  const orderBy = options.top ? "likes DESC" : "created_at ASC";
+  return db
+    .prepare(`SELECT * FROM transcript_comments WHERE transcript_id = ? ORDER BY ${orderBy} LIMIT ? OFFSET ?`)
+    .all(transcriptId, limit, offset) as Comment[];
+}
+export function deleteComment(id: string): boolean {
+  const db = getDatabase();
+  return db.prepare("DELETE FROM transcript_comments WHERE id = ?").run(id).changes > 0;
+}
+export function getTopComments(transcriptId: string, limit = 10): Comment[] {
+  const db = getDatabase();
+  return db
+    .prepare("SELECT * FROM transcript_comments WHERE transcript_id = ? ORDER BY likes DESC LIMIT ?")
+    .all(transcriptId, limit) as Comment[];
+}
+export function searchComments(query: string): Comment[] {
+  const db = getDatabase();
+  const q = `%${query}%`;
+  return db
+    .prepare("SELECT * FROM transcript_comments WHERE comment_text LIKE ? ORDER BY likes DESC LIMIT 50")
+    .all(q) as Comment[];
+}
+export function getCommentStats(transcriptId: string): CommentStats {
+  const db = getDatabase();
+  const total = (
+    db.prepare("SELECT COUNT(*) as n FROM transcript_comments WHERE transcript_id = ?").get(transcriptId) as { n: number }
+  ).n;
+  const replies = (
+    db.prepare("SELECT COUNT(*) as n FROM transcript_comments WHERE transcript_id = ? AND is_reply = 1").get(transcriptId) as { n: number }
+  ).n;
+  const uniqueAuthors = (
+    db.prepare("SELECT COUNT(DISTINCT author) as n FROM transcript_comments WHERE transcript_id = ? AND author IS NOT NULL").get(transcriptId) as { n: number }
+  ).n;
+  const avgLikes = (
+    db.prepare("SELECT AVG(likes) as avg FROM transcript_comments WHERE transcript_id = ?").get(transcriptId) as { avg: number | null }
+  ).avg ?? 0;
+  const topRow = db
+    .prepare("SELECT author, COUNT(*) as cnt FROM transcript_comments WHERE transcript_id = ? AND author IS NOT NULL GROUP BY author ORDER BY cnt DESC LIMIT 1")
+    .get(transcriptId) as { author: string; cnt: number } | null;
+  return {
+    total,
+    replies,
+    unique_authors: uniqueAuthors,
+    avg_likes: Math.round(avgLikes * 100) / 100,
+    top_commenter: topRow?.author ?? null,
+  };
+}
+export function importComments(transcriptId: string, comments: Array<Omit<CreateCommentInput, "transcript_id">>): number {
+  const db = getDatabase();
+  const stmt = db.prepare(`
+    INSERT INTO transcript_comments (id, transcript_id, platform, author, author_handle, comment_text, likes, reply_count, is_reply, parent_comment_id, published_at)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+  `);
+  let count = 0;
+  const transaction = db.transaction(() => {
+    for (const c of comments) {
+      stmt.run(
+        crypto.randomUUID(),
+        transcriptId,
+        c.platform ?? "youtube",
+        c.author ?? null,
+        c.author_handle ?? null,
+        c.comment_text,
+        c.likes ?? 0,
+        c.reply_count ?? 0,
+        c.is_reply ? 1 : 0,
+        c.parent_comment_id ?? null,
+        c.published_at ?? null,
+      );
+      count++;
+    }
+  });
+  transaction();
+  return count;
+}

package/microservices/microservice-transcriber/src/db/migrations.ts CHANGED Viewed

@@ -69,4 +69,50 @@ export const MIGRATIONS: MigrationEntry[] = [
       CREATE INDEX IF NOT EXISTS idx_annotations_transcript ON annotations(transcript_id);
     `,
   },
+  {
+    id: 5,
+    name: "add_transcript_comments",
+    sql: `
+      CREATE TABLE IF NOT EXISTS transcript_comments (
+        id TEXT PRIMARY KEY,
+        transcript_id TEXT NOT NULL,
+        platform TEXT NOT NULL DEFAULT 'youtube',
+        author TEXT,
+        author_handle TEXT,
+        comment_text TEXT NOT NULL,
+        likes INTEGER DEFAULT 0,
+        reply_count INTEGER DEFAULT 0,
+        is_reply INTEGER DEFAULT 0,
+        parent_comment_id TEXT,
+        published_at TEXT,
+        created_at TEXT NOT NULL DEFAULT (datetime('now')),
+        FOREIGN KEY (transcript_id) REFERENCES transcripts(id) ON DELETE CASCADE
+      );
+      CREATE INDEX IF NOT EXISTS idx_comments_transcript ON transcript_comments(transcript_id);
+      CREATE INDEX IF NOT EXISTS idx_comments_likes ON transcript_comments(likes DESC);
+    `,
+  },
+  {
+    id: 6,
+    name: "add_proofread_issues",
+    sql: `
+      CREATE TABLE proofread_issues (
+        id TEXT PRIMARY KEY,
+        transcript_id TEXT NOT NULL,
+        issue_type TEXT NOT NULL CHECK(issue_type IN ('spelling','grammar','punctuation','clarity')),
+        position_start INTEGER,
+        position_end INTEGER,
+        original_text TEXT NOT NULL,
+        suggestion TEXT,
+        confidence REAL,
+        explanation TEXT,
+        status TEXT DEFAULT 'pending' CHECK(status IN ('pending','applied','dismissed')),
+        created_at TEXT NOT NULL DEFAULT (datetime('now')),
+        FOREIGN KEY (transcript_id) REFERENCES transcripts(id) ON DELETE CASCADE
+      );
+      CREATE INDEX idx_proofread_transcript ON proofread_issues(transcript_id);
+      CREATE INDEX idx_proofread_type ON proofread_issues(issue_type);
+      CREATE INDEX idx_proofread_status ON proofread_issues(status);
+    `,
+  },
 ];

package/microservices/microservice-transcriber/src/db/proofread.ts ADDED Viewed

@@ -0,0 +1,119 @@
+import { getDatabase } from "./database.js";
+export type IssueType = "spelling" | "grammar" | "punctuation" | "clarity";
+export type IssueStatus = "pending" | "applied" | "dismissed";
+export interface ProofreadIssue {
+  id: string;
+  transcript_id: string;
+  issue_type: IssueType;
+  position_start: number | null;
+  position_end: number | null;
+  original_text: string;
+  suggestion: string | null;
+  confidence: number | null;
+  explanation: string | null;
+  status: IssueStatus;
+  created_at: string;
+}
+export interface CreateProofreadIssueInput {
+  transcript_id: string;
+  issue_type: IssueType;
+  position_start?: number;
+  position_end?: number;
+  original_text: string;
+  suggestion?: string;
+  confidence?: number;
+  explanation?: string;
+}
+export interface ListProofreadIssuesOptions {
+  issue_type?: IssueType;
+  status?: IssueStatus;
+}
+export function createProofreadIssue(input: CreateProofreadIssueInput): ProofreadIssue {
+  const db = getDatabase();
+  const id = crypto.randomUUID();
+  db.prepare(`
+    INSERT INTO proofread_issues (id, transcript_id, issue_type, position_start, position_end, original_text, suggestion, confidence, explanation)
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+  `).run(
+    id,
+    input.transcript_id,
+    input.issue_type,
+    input.position_start ?? null,
+    input.position_end ?? null,
+    input.original_text,
+    input.suggestion ?? null,
+    input.confidence ?? null,
+    input.explanation ?? null
+  );
+  return getProofreadIssue(id)!;
+}
+export function getProofreadIssue(id: string): ProofreadIssue | null {
+  const db = getDatabase();
+  const row = db.prepare("SELECT * FROM proofread_issues WHERE id = ?").get(id) as ProofreadIssue | null;
+  return row ?? null;
+}
+export function listProofreadIssues(transcriptId: string, options: ListProofreadIssuesOptions = {}): ProofreadIssue[] {
+  const db = getDatabase();
+  const conditions: string[] = ["transcript_id = ?"];
+  const values: unknown[] = [transcriptId];
+  if (options.issue_type) { conditions.push("issue_type = ?"); values.push(options.issue_type); }
+  if (options.status) { conditions.push("status = ?"); values.push(options.status); }
+  const where = conditions.join(" AND ");
+  return db
+    .prepare(`SELECT * FROM proofread_issues WHERE ${where} ORDER BY position_start ASC, created_at ASC`)
+    .all(...values) as ProofreadIssue[];
+}
+export function updateIssueStatus(id: string, status: IssueStatus): ProofreadIssue | null {
+  const db = getDatabase();
+  const existing = getProofreadIssue(id);
+  if (!existing) return null;
+  db.prepare("UPDATE proofread_issues SET status = ? WHERE id = ?").run(status, id);
+  return getProofreadIssue(id);
+}
+export function deleteProofreadIssuesByTranscript(transcriptId: string): number {
+  const db = getDatabase();
+  return db.prepare("DELETE FROM proofread_issues WHERE transcript_id = ?").run(transcriptId).changes;
+}
+export interface ProofreadStats {
+  total: number;
+  by_type: Record<string, number>;
+  pending: number;
+  applied: number;
+  dismissed: number;
+}
+export function getProofreadStats(transcriptId: string): ProofreadStats {
+  const db = getDatabase();
+  const total = (db.prepare("SELECT COUNT(*) as n FROM proofread_issues WHERE transcript_id = ?").get(transcriptId) as { n: number }).n;
+  const byType = db
+    .prepare("SELECT issue_type, COUNT(*) as n FROM proofread_issues WHERE transcript_id = ? GROUP BY issue_type")
+    .all(transcriptId) as { issue_type: string; n: number }[];
+  const byStatus = db
+    .prepare("SELECT status, COUNT(*) as n FROM proofread_issues WHERE transcript_id = ? GROUP BY status")
+    .all(transcriptId) as { status: string; n: number }[];
+  const statusMap = Object.fromEntries(byStatus.map((r) => [r.status, r.n]));
+  return {
+    total,
+    by_type: Object.fromEntries(byType.map((r) => [r.issue_type, r.n])),
+    pending: statusMap["pending"] ?? 0,
+    applied: statusMap["applied"] ?? 0,
+    dismissed: statusMap["dismissed"] ?? 0,
+  };
+}

package/microservices/microservice-transcriber/src/lib/downloader.ts CHANGED Viewed

@@ -556,6 +556,74 @@ export async function splitAudioIntoChunks(
   return chunks;
 }
+/**
+ * Raw comment from yt-dlp .info.json comments array.
+ */
+export interface RawComment {
+  author: string | null;
+  author_id: string | null;
+  text: string;
+  like_count: number;
+  timestamp: number | null;
+  parent: string | null; // "root" for top-level, comment id for replies
+  id: string;
+}
+/**
+ * Fetch comments for a video URL using yt-dlp --write-comments.
+ * Downloads only the .info.json (no media) and parses the comments array.
+ */
+export async function fetchComments(url: string): Promise<RawComment[]> {
+  const tempId = crypto.randomUUID();
+  const outputTemplate = join(tmpdir(), `comments-${tempId}`);
+  const proc = Bun.spawn(
+    [ytdlp(), "--write-comments", "--skip-download", "--no-write-thumbnail", "-o", outputTemplate, url],
+    { stdout: "pipe", stderr: "pipe" }
+  );
+  const [exitCode, , stderr] = await Promise.all([
+    proc.exited,
+    new Response(proc.stdout).text(),
+    new Response(proc.stderr).text(),
+  ]);
+  if (exitCode !== 0) {
+    throw new Error(`yt-dlp comment fetch failed (exit ${exitCode}): ${stderr.trim()}`);
+  }
+  // yt-dlp writes <output>.info.json
+  const infoPath = `${outputTemplate}.info.json`;
+  const { readFileSync, unlinkSync: unlinkFile, existsSync: fileExists } = await import("node:fs");
+  if (!fileExists(infoPath)) {
+    throw new Error("yt-dlp did not produce an info.json file for comments");
+  }
+  try {
+    const raw = JSON.parse(readFileSync(infoPath, "utf8"));
+    const comments: RawComment[] = [];
+    if (Array.isArray(raw.comments)) {
+      for (const c of raw.comments) {
+        comments.push({
+          author: c.author ?? null,
+          author_id: c.author_id ?? null,
+          text: typeof c.text === "string" ? c.text : String(c.text ?? ""),
+          like_count: typeof c.like_count === "number" ? c.like_count : 0,
+          timestamp: typeof c.timestamp === "number" ? c.timestamp : null,
+          parent: c.parent === "root" ? null : (c.parent ?? null),
+          id: c.id ?? crypto.randomUUID(),
+        });
+      }
+    }
+    return comments;
+  } finally {
+    try { unlinkFile(infoPath); } catch {}
+  }
+}
 /**
  * Check whether yt-dlp is available on the system.
  */