vidistill 0.6.2 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +173 -454
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -76,6 +76,7 @@ CLASSIFICATION RULES:
76
76
  - "lecture": Academic lectures, talks, single-speaker educational content
77
77
  - "presentation": Slide-based presentations, keynotes, demo days
78
78
  - "conversation": Interviews, podcasts, panel discussions without slides
79
+ - "commentary": Single-speaker opinion, analysis, vlog, reaction, or informal educational content without formal slides or academic structure
79
80
  - "mixed": Cannot clearly classify into one category, or multiple types present
80
81
 
81
82
  2. DETECT visual content:
@@ -113,6 +114,7 @@ PASS RECOMMENDATIONS BY TYPE:
113
114
  - lecture: ["transcript", "visual", "implicit", "synthesis"]
114
115
  - presentation: ["transcript", "visual", "implicit", "synthesis"] (add "people" if multiple speakers)
115
116
  - conversation: ["transcript", "visual", "implicit", "synthesis"]
117
+ - commentary: ["transcript", "visual", "implicit", "synthesis"]
116
118
  - mixed: ["transcript", "visual", "code", "people", "chat", "implicit", "synthesis"]
117
119
  `;
118
120
  var SYSTEM_INSTRUCTION_PASS_3A = `
@@ -242,6 +244,23 @@ var LANGUAGE_NAMES = {
242
244
  ar: "Arabic",
243
245
  hi: "Hindi"
244
246
  };
247
+ var SYSTEM_INSTRUCTION_DEDUP = `
248
+ You are a transcript deduplication reviewer. You will receive a list of numbered transcript entries. Your task is to identify entries that are SEMANTIC DUPLICATES of an earlier entry.
249
+
250
+ A duplicate means:
251
+ - The entry says essentially the same thing as an earlier entry (same meaning, possibly different wording)
252
+ - The entry is a subset of an earlier entry (the earlier one already covers this content)
253
+ - The entry repeats the same quote or statement that already appeared earlier
254
+
255
+ NOT a duplicate:
256
+ - Entries where the speaker genuinely repeats themselves for emphasis (common in lectures/presentations)
257
+ - Similar topics discussed at different points with different details
258
+ - Call-and-response patterns (e.g. Q&A)
259
+
260
+ Return ONLY the indices of entries to REMOVE (the later duplicate, not the original).
261
+ If no duplicates are found, return an empty array.
262
+ Be conservative \u2014 only flag clear duplicates. When in doubt, keep the entry.
263
+ `;
245
264
  function withLanguage(prompt, lang) {
246
265
  if (!lang || lang === "en") return prompt;
247
266
  const languageName = LANGUAGE_NAMES[lang] ?? lang;
@@ -647,6 +666,25 @@ function normalizeYouTubeUrl(url) {
647
666
  if (!id) return null;
648
667
  return `https://www.youtube.com/watch?v=${id}`;
649
668
  }
669
+ async function fetchYouTubeMetadata(url) {
670
+ const normalized = normalizeYouTubeUrl(url);
671
+ if (!normalized) throw new Error("Invalid YouTube URL");
672
+ const oembedUrl = `https://www.youtube.com/oembed?url=${encodeURIComponent(normalized)}&format=json`;
673
+ const res = await fetch(oembedUrl);
674
+ if (!res.ok) {
675
+ if (res.status === 401 || res.status === 403) {
676
+ throw new Error("Video is private or unavailable");
677
+ }
678
+ throw new Error(`Failed to fetch video info (${res.status})`);
679
+ }
680
+ const data = await res.json();
681
+ const obj = data;
682
+ return {
683
+ title: typeof obj["title"] === "string" ? obj["title"] : "Untitled",
684
+ author: typeof obj["author_name"] === "string" ? obj["author_name"] : "Unknown",
685
+ thumbnailUrl: typeof obj["thumbnail_url"] === "string" ? obj["thumbnail_url"] : ""
686
+ };
687
+ }
650
688
  function fetchYtDlpDuration(url) {
651
689
  return new Promise((resolve3) => {
652
690
  execFile("yt-dlp", ["--dump-json", "--no-download", url], { timeout: 15e3 }, (err, stdout) => {
@@ -972,7 +1010,7 @@ var SCHEMA_PASS_0 = {
972
1010
  properties: {
973
1011
  type: {
974
1012
  type: Type.STRING,
975
- enum: ["coding", "meeting", "lecture", "presentation", "conversation", "mixed"],
1013
+ enum: ["coding", "meeting", "lecture", "presentation", "conversation", "commentary", "mixed"],
976
1014
  description: "Primary video type classification"
977
1015
  },
978
1016
  speakers: {
@@ -1545,6 +1583,17 @@ var SCHEMA_SYNTHESIS = {
1545
1583
  },
1546
1584
  required: ["overview", "key_decisions", "key_concepts", "action_items", "questions_raised", "suggestions", "topics", "files_to_generate", "prerequisites"]
1547
1585
  };
1586
+ var SCHEMA_DEDUP_REVIEW = {
1587
+ type: Type.OBJECT,
1588
+ properties: {
1589
+ duplicate_indices: {
1590
+ type: Type.ARRAY,
1591
+ items: { type: Type.INTEGER },
1592
+ description: "Zero-based indices of transcript entries that are semantic duplicates of an earlier entry and should be removed"
1593
+ }
1594
+ },
1595
+ required: ["duplicate_indices"]
1596
+ };
1548
1597
 
1549
1598
  // src/lib/utils.ts
1550
1599
  import { readFile } from "fs/promises";
@@ -1735,12 +1784,15 @@ function formatTranscriptForInjection(pass1a) {
1735
1784
  return pass1a.transcript_entries.map((e) => `[${e.timestamp}] ${e.text}`).join("\n");
1736
1785
  }
1737
1786
  async function runDiarization(params) {
1738
- const { client, fileUri, mimeType, segment, model, resolution, lang, pass1aResult } = params;
1787
+ const { client, fileUri, mimeType, segment, model, resolution, lang, pass1aResult, channelAuthor } = params;
1739
1788
  const transcriptText = formatTranscriptForInjection(pass1aResult);
1740
- const systemInstruction = withLanguage(
1741
- SYSTEM_INSTRUCTION_PASS_1B.replace("{INJECT_PASS1A_TRANSCRIPT_HERE}", transcriptText),
1742
- lang
1743
- );
1789
+ let baseInstruction = SYSTEM_INSTRUCTION_PASS_1B.replace("{INJECT_PASS1A_TRANSCRIPT_HERE}", transcriptText);
1790
+ if (channelAuthor) {
1791
+ baseInstruction += `
1792
+
1793
+ HINT: This video is from a YouTube channel by "${channelAuthor}". If only one speaker is present, consider using this as the speaker name.`;
1794
+ }
1795
+ const systemInstruction = withLanguage(baseInstruction, lang);
1744
1796
  const contents = [
1745
1797
  {
1746
1798
  role: "user",
@@ -2040,8 +2092,13 @@ function isNearDuplicate(a, b) {
2040
2092
  if (delta > DEDUP_WINDOW_S) return false;
2041
2093
  if (a.text === b.text) return true;
2042
2094
  const shared = tokenOverlap(a.text, b.text);
2043
- const maxTokens = Math.max(a.text.split(/\s+/).length, b.text.split(/\s+/).length);
2044
- return maxTokens > 0 && shared / maxTokens >= 0.8;
2095
+ const aTokens = a.text.split(/\s+/).length;
2096
+ const bTokens = b.text.split(/\s+/).length;
2097
+ const maxTokens = Math.max(aTokens, bTokens);
2098
+ const minTokens = Math.min(aTokens, bTokens);
2099
+ if (maxTokens > 0 && shared / maxTokens >= 0.8) return true;
2100
+ if (minTokens > 0 && shared / minTokens >= 0.8) return true;
2101
+ return false;
2045
2102
  }
2046
2103
  function deduplicateEntries(entries) {
2047
2104
  if (entries.length <= 1) return entries;
@@ -2708,6 +2765,9 @@ function determineStrategy(profile) {
2708
2765
  case "conversation":
2709
2766
  passes.add("implicit");
2710
2767
  break;
2768
+ case "commentary":
2769
+ passes.add("implicit");
2770
+ break;
2711
2771
  case "mixed":
2712
2772
  passes.add("code");
2713
2773
  passes.add("people");
@@ -3055,7 +3115,8 @@ async function runPipeline(config) {
3055
3115
  onProgress,
3056
3116
  onWait,
3057
3117
  isShuttingDown,
3058
- lang
3118
+ lang,
3119
+ channelAuthor
3059
3120
  } = config;
3060
3121
  const errors = [];
3061
3122
  const passesRun = [];
@@ -3123,7 +3184,7 @@ async function runPipeline(config) {
3123
3184
  const p1a = pass1aResult;
3124
3185
  const pass1bResult = await runDiarizationConsensus({
3125
3186
  config: { runs: transcriptConsensusRuns },
3126
- runFn: () => rateLimiter.execute(() => runDiarization({ client, fileUri, mimeType, segment, model, resolution, lang, pass1aResult: p1a }), { onWait }),
3187
+ runFn: () => rateLimiter.execute(() => runDiarization({ client, fileUri, mimeType, segment, model, resolution, lang, pass1aResult: p1a, channelAuthor }), { onWait }),
3127
3188
  mergedPass1a: p1a,
3128
3189
  onProgress: (_run, _total) => {
3129
3190
  currentStep++;
@@ -3286,6 +3347,55 @@ async function runPipeline(config) {
3286
3347
  const msg = e instanceof Error ? e.message : String(e);
3287
3348
  log5.warn(`speaker reconciliation failed, continuing with original labels: ${msg}`);
3288
3349
  }
3350
+ const allEntries = [];
3351
+ for (let segIdx = 0; segIdx < results.length; segIdx++) {
3352
+ const p1 = results[segIdx].pass1;
3353
+ if (p1 == null) continue;
3354
+ for (let entryIdx = 0; entryIdx < p1.transcript_entries.length; entryIdx++) {
3355
+ allEntries.push({ segIdx, entryIdx, entry: p1.transcript_entries[entryIdx] });
3356
+ }
3357
+ }
3358
+ if (allEntries.length > 20) {
3359
+ try {
3360
+ const numbered = allEntries.map(
3361
+ (e, i) => `[${i}] ${e.entry.timestamp} ${e.entry.speaker}: ${e.entry.text}`
3362
+ ).join("\n");
3363
+ const dedupResult = await rateLimiter.execute(
3364
+ () => client.generate({
3365
+ model: MODELS.flash,
3366
+ contents: [{ role: "user", parts: [{ text: numbered }] }],
3367
+ config: {
3368
+ systemInstruction: SYSTEM_INSTRUCTION_DEDUP,
3369
+ responseMimeType: "application/json",
3370
+ responseSchema: SCHEMA_DEDUP_REVIEW,
3371
+ temperature: 0
3372
+ }
3373
+ }),
3374
+ { onWait }
3375
+ );
3376
+ const parsed = dedupResult;
3377
+ if (parsed != null && Array.isArray(parsed.duplicate_indices)) {
3378
+ const indices = parsed.duplicate_indices;
3379
+ const toRemove = new Set(
3380
+ indices.filter((v) => typeof v === "number" && v >= 0 && v < allEntries.length)
3381
+ );
3382
+ if (toRemove.size > 0) {
3383
+ const segRemovals = /* @__PURE__ */ new Map();
3384
+ for (const globalIdx of toRemove) {
3385
+ const { segIdx, entryIdx } = allEntries[globalIdx];
3386
+ if (!segRemovals.has(segIdx)) segRemovals.set(segIdx, /* @__PURE__ */ new Set());
3387
+ segRemovals.get(segIdx).add(entryIdx);
3388
+ }
3389
+ for (const [segIdx, entryIndices] of segRemovals) {
3390
+ const p1 = results[segIdx].pass1;
3391
+ if (p1 == null) continue;
3392
+ p1.transcript_entries = p1.transcript_entries.filter((_, i) => !entryIndices.has(i));
3393
+ }
3394
+ }
3395
+ }
3396
+ } catch {
3397
+ }
3398
+ }
3289
3399
  let peopleExtraction = null;
3290
3400
  if (strategy.passes.includes("people")) {
3291
3401
  onProgress?.({ phase: "pass3b", segment: 0, totalSegments: 1, status: "running", totalSteps });
@@ -3507,7 +3617,7 @@ function renderIncompletePasses(pipelineResult) {
3507
3617
  return lines.join("\n");
3508
3618
  }
3509
3619
  function writeGuide(params) {
3510
- const { title, source, duration, pipelineResult, filesGenerated, speakerMapping } = params;
3620
+ const { title, source, duration, pipelineResult, filesGenerated, speakerMapping, channelAuthor } = params;
3511
3621
  const { synthesisResult, videoProfile } = pipelineResult;
3512
3622
  const rawOverview = synthesisResult?.overview ?? "_No summary available \u2014 synthesis pass did not run or produced no output._";
3513
3623
  const overview = replaceNamesInText(rawOverview, speakerMapping);
@@ -3518,6 +3628,7 @@ function writeGuide(params) {
3518
3628
  "## Source",
3519
3629
  "",
3520
3630
  `- **File/URL:** ${source}`,
3631
+ ...channelAuthor ? [`- **Author/Channel:** ${channelAuthor}`] : [],
3521
3632
  `- **Duration:** ${formatDuration(duration)}`,
3522
3633
  `- **Type:** ${videoType}`,
3523
3634
  "",
@@ -3580,6 +3691,15 @@ function writeTranscript(params) {
3580
3691
  sections.push("_No transcript data available._");
3581
3692
  return sections.join("\n");
3582
3693
  }
3694
+ for (let i = 1; i < segmentsWithPass1.length; i++) {
3695
+ const prev = segmentsWithPass1[i - 1].pass1;
3696
+ const curr = segmentsWithPass1[i].pass1;
3697
+ if (prev == null || curr == null) continue;
3698
+ const tail = prev.transcript_entries.slice(-5);
3699
+ curr.transcript_entries = curr.transcript_entries.filter(
3700
+ (entry) => !tail.some((prevEntry) => isNearDuplicate(entry, prevEntry))
3701
+ );
3702
+ }
3583
3703
  for (const seg of segmentsWithPass1) {
3584
3704
  if (seg.pass1 != null) {
3585
3705
  sections.push(renderPass1(seg.pass1, speakerMapping));
@@ -3599,6 +3719,7 @@ function renderSpeechEvent(entry, speakerMapping) {
3599
3719
  let text4 = entry.text;
3600
3720
  if (entry.emphasis_words != null && entry.emphasis_words.length > 0) {
3601
3721
  for (const word of entry.emphasis_words) {
3722
+ if (!word.includes(" ") && word.length < 4) continue;
3602
3723
  const escaped = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
3603
3724
  const re = new RegExp(`(?<![\\w*])${escaped}(?![\\w*])`, "gi");
3604
3725
  text4 = text4.replace(re, `**$&**`);
@@ -4218,433 +4339,29 @@ function renderAssignedTasks(tasks, speakerMapping) {
4218
4339
  lines.push("");
4219
4340
  return lines;
4220
4341
  }
4342
+ function isDuplicateTask(task, synthesisItems) {
4343
+ for (const item of synthesisItems) {
4344
+ const tsDelta = Math.abs(parseTimestamp(task.timestamp) - parseTimestamp(item.timestamp));
4345
+ if (tsDelta > 120) continue;
4346
+ const shared = tokenOverlap(task.task, item.item);
4347
+ const minLen = Math.min(task.task.split(/\s+/).length, item.item.split(/\s+/).length);
4348
+ if (minLen > 0 && shared / minLen >= 0.6) return true;
4349
+ }
4350
+ return false;
4351
+ }
4221
4352
  function writeActionItems(params) {
4222
4353
  const { segments, synthesisResult, speakerMapping } = params;
4223
4354
  const synthesisItems = synthesisResult?.action_items ?? [];
4224
4355
  const assignedTasks = collectTasksAssigned(segments);
4225
4356
  if (synthesisItems.length === 0 && assignedTasks.length === 0) return null;
4357
+ const dedupedTasks = synthesisItems.length > 0 ? assignedTasks.filter((t) => !isDuplicateTask(t, synthesisItems)) : assignedTasks;
4226
4358
  const sections = ["# Action Items", ""];
4227
4359
  sections.push(...renderSynthesisItems(synthesisItems, speakerMapping));
4228
- sections.push(...renderAssignedTasks(assignedTasks, speakerMapping));
4360
+ sections.push(...renderAssignedTasks(dedupedTasks, speakerMapping));
4229
4361
  while (sections[sections.length - 1] === "") sections.pop();
4230
4362
  return sections.join("\n");
4231
4363
  }
4232
4364
 
4233
- // src/output/timeline.ts
4234
- function toPercent(seconds, duration) {
4235
- if (duration <= 0) return "0";
4236
- const pct = Math.min(100, Math.max(0, seconds / duration * 100));
4237
- return pct.toFixed(3);
4238
- }
4239
- function escapeHtml(str) {
4240
- return str.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
4241
- }
4242
- function collectMarkers(pipelineResult, duration, speakerMapping) {
4243
- const markers = [];
4244
- const SPEECH_WINDOW_SECONDS = 30;
4245
- for (const segment of pipelineResult.segments) {
4246
- if (segment.pass1 != null) {
4247
- let windowStart = -1;
4248
- let windowLabel = "";
4249
- let windowDetail = "";
4250
- for (const entry of segment.pass1.transcript_entries) {
4251
- const seconds = parseTimestamp(entry.timestamp);
4252
- if (seconds > duration && duration > 0) continue;
4253
- if (windowStart < 0 || seconds - windowStart >= SPEECH_WINDOW_SECONDS) {
4254
- if (windowStart >= 0) {
4255
- markers.push({ seconds: windowStart, label: windowLabel, lane: "speech", detail: windowDetail });
4256
- }
4257
- windowStart = seconds;
4258
- windowLabel = applySpeakerMapping(entry.speaker || "Speech", speakerMapping);
4259
- windowDetail = entry.text.slice(0, 80) + (entry.text.length > 80 ? "\u2026" : "");
4260
- }
4261
- }
4262
- if (windowStart >= 0) {
4263
- markers.push({ seconds: windowStart, label: windowLabel, lane: "speech", detail: windowDetail });
4264
- }
4265
- }
4266
- if (segment.pass2 != null) {
4267
- for (const block of segment.pass2.code_blocks) {
4268
- const seconds = parseTimestamp(block.timestamp);
4269
- if (seconds > duration && duration > 0) continue;
4270
- markers.push({
4271
- seconds,
4272
- lane: "code",
4273
- label: block.filename,
4274
- detail: block.language + (block.change_type ? ` \xB7 ${block.change_type}` : "")
4275
- });
4276
- }
4277
- for (const note2 of segment.pass2.visual_notes) {
4278
- const seconds = parseTimestamp(note2.timestamp);
4279
- if (seconds > duration && duration > 0) continue;
4280
- markers.push({
4281
- seconds,
4282
- lane: "visual",
4283
- label: note2.visual_type,
4284
- detail: note2.description.slice(0, 80) + (note2.description.length > 80 ? "\u2026" : "")
4285
- });
4286
- }
4287
- }
4288
- }
4289
- const topics = pipelineResult.synthesisResult?.topics ?? [];
4290
- for (const topic of topics) {
4291
- for (const ts of topic.timestamps) {
4292
- const seconds = parseTimestamp(ts);
4293
- if (seconds > duration && duration > 0) continue;
4294
- markers.push({
4295
- seconds,
4296
- lane: "topic",
4297
- label: topic.title,
4298
- detail: topic.summary?.slice(0, 80) ?? ""
4299
- });
4300
- }
4301
- }
4302
- return markers;
4303
- }
4304
- function renderMarker(m, duration) {
4305
- const left = toPercent(m.seconds, duration);
4306
- const time = formatTime(m.seconds);
4307
- const tooltipRaw = `${time} \u2014 ${m.label}${m.detail ? ": " + m.detail : ""}`;
4308
- const tooltipAttr = escapeHtml(tooltipRaw);
4309
- return `<div class="marker marker-${m.lane}" style="left:${left}%" title="${tooltipAttr}" aria-label="${tooltipAttr}"></div>`;
4310
- }
4311
- function renderLane(laneId, laneLabel, markers, duration) {
4312
- const laneMarkers = markers.filter((m) => m.lane === laneId);
4313
- const renderedMarkers = laneMarkers.map((m) => renderMarker(m, duration)).join("\n ");
4314
- return `
4315
- <div class="lane">
4316
- <div class="lane-label">${laneLabel}</div>
4317
- <div class="lane-track" role="region" aria-label="${laneLabel} lane">
4318
- ${renderedMarkers}
4319
- </div>
4320
- </div>`;
4321
- }
4322
- function buildTimeAxis(duration) {
4323
- if (duration <= 0) return "";
4324
- const intervals = [30, 60, 120, 300, 600, 900, 1800, 3600];
4325
- const targetTicks = 10;
4326
- const ideal = duration / targetTicks;
4327
- const interval = intervals.find((i) => i >= ideal) ?? intervals[intervals.length - 1] ?? 3600;
4328
- const ticks = [];
4329
- for (let t = 0; t <= duration; t += interval) {
4330
- const left = toPercent(t, duration);
4331
- const label = formatTime(t);
4332
- ticks.push(`<div class="tick" style="left:${left}%"><span>${label}</span></div>`);
4333
- }
4334
- return ticks.join("\n ");
4335
- }
4336
- function generateTimeline(params) {
4337
- const { pipelineResult, duration, speakerMapping } = params;
4338
- const markers = collectMarkers(pipelineResult, duration, speakerMapping);
4339
- const effectiveDuration = duration > 0 ? duration : 1;
4340
- const speechLane = renderLane("speech", "Speech", markers, effectiveDuration);
4341
- const codeLane = renderLane("code", "Code", markers, effectiveDuration);
4342
- const visualLane = renderLane("visual", "Slides / Visuals", markers, effectiveDuration);
4343
- const topicLane = renderLane("topic", "Key Moments", markers, effectiveDuration);
4344
- const timeAxis = buildTimeAxis(effectiveDuration);
4345
- return `<!DOCTYPE html>
4346
- <html lang="en">
4347
- <head>
4348
- <meta charset="UTF-8">
4349
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
4350
- <title>Video Timeline</title>
4351
- <style>
4352
- /* ------------------------------------------------------------------ */
4353
- /* Reset + base */
4354
- /* ------------------------------------------------------------------ */
4355
- *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
4356
-
4357
- :root {
4358
- --bg: #f9fafb;
4359
- --surface: #ffffff;
4360
- --border: #e5e7eb;
4361
- --text: #111827;
4362
- --text-muted: #6b7280;
4363
- --shadow: 0 1px 3px rgba(0,0,0,.1);
4364
-
4365
- --speech-color: #3b82f6;
4366
- --code-color: #22c55e;
4367
- --visual-color: #a855f7;
4368
- --topic-color: #eab308;
4369
-
4370
- --lane-h: 36px;
4371
- --label-w: 120px;
4372
- }
4373
-
4374
- @media (prefers-color-scheme: dark) {
4375
- :root {
4376
- --bg: #0f172a;
4377
- --surface: #1e293b;
4378
- --border: #334155;
4379
- --text: #f1f5f9;
4380
- --text-muted: #94a3b8;
4381
- --shadow: 0 1px 3px rgba(0,0,0,.4);
4382
- }
4383
- }
4384
-
4385
- html, body {
4386
- height: 100%;
4387
- background: var(--bg);
4388
- color: var(--text);
4389
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
4390
- font-size: 14px;
4391
- line-height: 1.5;
4392
- }
4393
-
4394
- /* ------------------------------------------------------------------ */
4395
- /* Layout */
4396
- /* ------------------------------------------------------------------ */
4397
- .page {
4398
- max-width: 100%;
4399
- padding: 1.5rem 1rem;
4400
- }
4401
-
4402
- h1 {
4403
- font-size: 1.25rem;
4404
- font-weight: 700;
4405
- margin-bottom: 1rem;
4406
- }
4407
-
4408
- .legend {
4409
- display: flex;
4410
- flex-wrap: wrap;
4411
- gap: .5rem 1rem;
4412
- margin-bottom: 1.25rem;
4413
- }
4414
-
4415
- .legend-item {
4416
- display: flex;
4417
- align-items: center;
4418
- gap: .4rem;
4419
- font-size: .8125rem;
4420
- color: var(--text-muted);
4421
- }
4422
-
4423
- .legend-dot {
4424
- width: 10px;
4425
- height: 10px;
4426
- border-radius: 50%;
4427
- flex-shrink: 0;
4428
- }
4429
-
4430
- .legend-dot.speech { background: var(--speech-color); }
4431
- .legend-dot.code { background: var(--code-color); }
4432
- .legend-dot.visual { background: var(--visual-color); }
4433
- .legend-dot.topic { background: var(--topic-color); }
4434
-
4435
- /* ------------------------------------------------------------------ */
4436
- /* Scroll container */
4437
- /* ------------------------------------------------------------------ */
4438
- .scroll-wrapper {
4439
- overflow-x: auto;
4440
- -webkit-overflow-scrolling: touch;
4441
- border: 1px solid var(--border);
4442
- border-radius: .5rem;
4443
- background: var(--surface);
4444
- box-shadow: var(--shadow);
4445
- }
4446
-
4447
- .timeline {
4448
- min-width: 640px;
4449
- padding: 1rem;
4450
- }
4451
-
4452
- /* ------------------------------------------------------------------ */
4453
- /* Lanes */
4454
- /* ------------------------------------------------------------------ */
4455
- .lane {
4456
- display: flex;
4457
- align-items: center;
4458
- margin-bottom: .5rem;
4459
- }
4460
-
4461
- .lane-label {
4462
- width: var(--label-w);
4463
- flex-shrink: 0;
4464
- font-size: .75rem;
4465
- font-weight: 600;
4466
- color: var(--text-muted);
4467
- text-transform: uppercase;
4468
- letter-spacing: .04em;
4469
- padding-right: .75rem;
4470
- }
4471
-
4472
- .lane-track {
4473
- flex: 1;
4474
- height: var(--lane-h);
4475
- position: relative;
4476
- background: var(--bg);
4477
- border-radius: .25rem;
4478
- border: 1px solid var(--border);
4479
- }
4480
-
4481
- /* ------------------------------------------------------------------ */
4482
- /* Markers */
4483
- /* ------------------------------------------------------------------ */
4484
- .marker {
4485
- position: absolute;
4486
- top: 50%;
4487
- transform: translate(-50%, -50%);
4488
- width: 10px;
4489
- height: 10px;
4490
- border-radius: 50%;
4491
- cursor: pointer;
4492
- transition: transform .15s ease, box-shadow .15s ease;
4493
- outline: none;
4494
- }
4495
-
4496
- .marker:hover, .marker:focus {
4497
- transform: translate(-50%, -50%) scale(1.6);
4498
- box-shadow: 0 0 0 2px var(--surface), 0 0 0 4px currentColor;
4499
- z-index: 10;
4500
- }
4501
-
4502
- .marker-speech { background: var(--speech-color); color: var(--speech-color); }
4503
- .marker-code { background: var(--code-color); color: var(--code-color); }
4504
- .marker-visual { background: var(--visual-color); color: var(--visual-color); }
4505
- .marker-topic { background: var(--topic-color); color: var(--topic-color); }
4506
-
4507
- /* ------------------------------------------------------------------ */
4508
- /* Time axis */
4509
- /* ------------------------------------------------------------------ */
4510
- .time-axis {
4511
- display: flex;
4512
- align-items: center;
4513
- margin-bottom: .25rem;
4514
- }
4515
-
4516
- .axis-spacer {
4517
- width: var(--label-w);
4518
- flex-shrink: 0;
4519
- }
4520
-
4521
- .axis-track {
4522
- flex: 1;
4523
- height: 20px;
4524
- position: relative;
4525
- }
4526
-
4527
- .tick {
4528
- position: absolute;
4529
- top: 0;
4530
- transform: translateX(-50%);
4531
- }
4532
-
4533
- .tick span {
4534
- font-size: .6875rem;
4535
- color: var(--text-muted);
4536
- white-space: nowrap;
4537
- }
4538
-
4539
- /* ------------------------------------------------------------------ */
4540
- /* Tooltip (vanilla JS) */
4541
- /* ------------------------------------------------------------------ */
4542
- #tooltip {
4543
- position: fixed;
4544
- background: var(--surface);
4545
- border: 1px solid var(--border);
4546
- border-radius: .375rem;
4547
- padding: .375rem .625rem;
4548
- font-size: .8125rem;
4549
- color: var(--text);
4550
- pointer-events: none;
4551
- z-index: 999;
4552
- box-shadow: 0 4px 12px rgba(0,0,0,.15);
4553
- max-width: 280px;
4554
- word-break: break-word;
4555
- display: none;
4556
- }
4557
-
4558
- /* ------------------------------------------------------------------ */
4559
- /* Responsive */
4560
- /* ------------------------------------------------------------------ */
4561
- @media (max-width: 480px) {
4562
- :root { --label-w: 72px; --lane-h: 32px; }
4563
- .legend { gap: .35rem .75rem; }
4564
- h1 { font-size: 1.1rem; }
4565
- }
4566
- </style>
4567
- </head>
4568
- <body>
4569
- <div class="page">
4570
- <h1>Video Timeline</h1>
4571
-
4572
- <div class="legend" aria-label="Lane colour legend">
4573
- <span class="legend-item"><span class="legend-dot speech"></span>Speech</span>
4574
- <span class="legend-item"><span class="legend-dot code"></span>Code</span>
4575
- <span class="legend-item"><span class="legend-dot visual"></span>Slides / Visuals</span>
4576
- <span class="legend-item"><span class="legend-dot topic"></span>Key Moments</span>
4577
- </div>
4578
-
4579
- <div class="scroll-wrapper">
4580
- <div class="timeline">
4581
- <div class="time-axis">
4582
- <div class="axis-spacer"></div>
4583
- <div class="axis-track">
4584
- ${timeAxis}
4585
- </div>
4586
- </div>
4587
- ${speechLane}
4588
- ${codeLane}
4589
- ${visualLane}
4590
- ${topicLane}
4591
- </div>
4592
- </div>
4593
- </div>
4594
-
4595
- <div id="tooltip" role="tooltip"></div>
4596
-
4597
- <script>
4598
- (function () {
4599
- var tooltip = document.getElementById('tooltip');
4600
- var markers = document.querySelectorAll('.marker');
4601
-
4602
- function showTooltip(el, x, y) {
4603
- var text = el.getAttribute('title') || el.getAttribute('aria-label') || '';
4604
- if (!text) return;
4605
- tooltip.textContent = text;
4606
- tooltip.style.display = 'block';
4607
- positionTooltip(x, y);
4608
- }
4609
-
4610
- function positionTooltip(x, y) {
4611
- var tw = tooltip.offsetWidth;
4612
- var th = tooltip.offsetHeight;
4613
- var vw = window.innerWidth;
4614
- var vh = window.innerHeight;
4615
- var left = x + 12;
4616
- var top = y - th / 2;
4617
- if (left + tw > vw - 8) left = x - tw - 12;
4618
- if (top < 8) top = 8;
4619
- if (top + th > vh - 8) top = vh - th - 8;
4620
- tooltip.style.left = left + 'px';
4621
- tooltip.style.top = top + 'px';
4622
- }
4623
-
4624
- function hideTooltip() {
4625
- tooltip.style.display = 'none';
4626
- }
4627
-
4628
- markers.forEach(function (m) {
4629
- m.addEventListener('mouseenter', function (e) {
4630
- showTooltip(m, e.clientX, e.clientY);
4631
- });
4632
- m.addEventListener('mousemove', function (e) {
4633
- positionTooltip(e.clientX, e.clientY);
4634
- });
4635
- m.addEventListener('mouseleave', hideTooltip);
4636
- m.addEventListener('focus', function () {
4637
- var rect = m.getBoundingClientRect();
4638
- showTooltip(m, rect.right, rect.top + rect.height / 2);
4639
- });
4640
- m.addEventListener('blur', hideTooltip);
4641
- });
4642
- })();
4643
- </script>
4644
- </body>
4645
- </html>`;
4646
- }
4647
-
4648
4365
  // src/output/metadata.ts
4649
4366
  function writeMetadata(params) {
4650
4367
  const { title, source, duration, model, processingTimeMs, filesGenerated, pipelineResult, speakerMapping, declinedMerges } = params;
@@ -4722,12 +4439,10 @@ function resolveFilesToGenerate(params) {
4722
4439
  }
4723
4440
  if (synthesisResult != null || hasPass3d) optional.add("notes.md");
4724
4441
  if (peopleExtraction != null) optional.add("people.md");
4725
- const hasPass1 = segments.some((s) => s.pass1 != null);
4726
- if (hasPass1 || hasPass2) optional.add("timeline.html");
4727
4442
  return optional;
4728
4443
  }
4729
4444
  async function generateOutput(params) {
4730
- const { pipelineResult, outputDir, videoTitle, source, duration, model, processingTimeMs, speakerMapping, declinedMerges } = params;
4445
+ const { pipelineResult, outputDir, videoTitle, source, duration, model, processingTimeMs, channelAuthor, speakerMapping, declinedMerges } = params;
4731
4446
  const slug = slugify(videoTitle);
4732
4447
  const finalOutputDir = join3(outputDir, slug);
4733
4448
  await mkdir(finalOutputDir, { recursive: true });
@@ -4832,14 +4547,6 @@ async function generateOutput(params) {
4832
4547
  errors.push(`action-items.md: ${String(err)}`);
4833
4548
  }
4834
4549
  }
4835
- if (filesToGenerate.has("timeline.html")) {
4836
- try {
4837
- const content = generateTimeline({ pipelineResult, duration, speakerMapping: expandedMapping });
4838
- await writeOutputFile("timeline.html", content);
4839
- } catch (err) {
4840
- errors.push(`timeline.html: ${String(err)}`);
4841
- }
4842
- }
4843
4550
  try {
4844
4551
  const rawFiles = writeRawOutput(pipelineResult);
4845
4552
  await mkdir(join3(finalOutputDir, "raw"), { recursive: true });
@@ -4869,7 +4576,7 @@ async function generateOutput(params) {
4869
4576
  errors.push(`metadata.json: ${String(err)}`);
4870
4577
  }
4871
4578
  try {
4872
- const content = writeGuide({ title: videoTitle, source, duration, pipelineResult, filesGenerated, speakerMapping: expandedMapping });
4579
+ const content = writeGuide({ title: videoTitle, source, duration, pipelineResult, filesGenerated, speakerMapping: expandedMapping, channelAuthor });
4873
4580
  await writeOutputFile("guide.md", content);
4874
4581
  } catch (err) {
4875
4582
  errors.push(`guide.md: ${String(err)}`);
@@ -4982,14 +4689,6 @@ async function reRenderWithSpeakerMapping(params) {
4982
4689
  errors.push(`action-items.md: ${String(err)}`);
4983
4690
  }
4984
4691
  }
4985
- if (filesToReRender.has("timeline.html")) {
4986
- try {
4987
- const content = generateTimeline({ pipelineResult, duration, speakerMapping: expandedMapping });
4988
- await writeOutputFile("timeline.html", content);
4989
- } catch (err) {
4990
- errors.push(`timeline.html: ${String(err)}`);
4991
- }
4992
- }
4993
4692
  if (filesToReRender.has("guide.md")) {
4994
4693
  try {
4995
4694
  const content = writeGuide({ title: videoTitle, source, duration, pipelineResult, filesGenerated, speakerMapping: expandedMapping });
@@ -5168,6 +4867,7 @@ async function runDistill(args) {
5168
4867
  let duration;
5169
4868
  let videoTitle;
5170
4869
  let uploadedFileNames = [];
4870
+ let ytAuthor;
5171
4871
  if (resolved.type === "youtube") {
5172
4872
  const result = await handleYouTube(resolved.value, client);
5173
4873
  fileUri = result.fileUri;
@@ -5184,8 +4884,14 @@ async function runDistill(args) {
5184
4884
  if (result.uploadedFileName != null) {
5185
4885
  uploadedFileNames = [result.uploadedFileName];
5186
4886
  }
5187
- const videoId = extractVideoId(resolved.value);
5188
- videoTitle = videoId != null ? `youtube-${videoId}` : resolved.value;
4887
+ try {
4888
+ const meta = await fetchYouTubeMetadata(resolved.value);
4889
+ videoTitle = meta.title;
4890
+ ytAuthor = meta.author;
4891
+ } catch {
4892
+ const videoId = extractVideoId(resolved.value);
4893
+ videoTitle = videoId != null ? `youtube-${videoId}` : resolved.value;
4894
+ }
5189
4895
  } else {
5190
4896
  const result = await handleLocalFile(resolved.value, client);
5191
4897
  fileUri = result.fileUri;
@@ -5230,6 +4936,7 @@ async function runDistill(args) {
5230
4936
  model,
5231
4937
  context,
5232
4938
  lang: args.lang,
4939
+ channelAuthor: ytAuthor,
5233
4940
  rateLimiter,
5234
4941
  onProgress: (status) => {
5235
4942
  progress2.update(status);
@@ -5253,7 +4960,8 @@ async function runDistill(args) {
5253
4960
  source: rawInput,
5254
4961
  duration,
5255
4962
  model,
5256
- processingTimeMs: elapsedMs
4963
+ processingTimeMs: elapsedMs,
4964
+ channelAuthor: ytAuthor
5257
4965
  });
5258
4966
  const elapsedSecs = Math.round(elapsedMs / 1e3);
5259
4967
  const elapsedMins = Math.floor(elapsedSecs / 60);
@@ -5293,6 +5001,7 @@ async function analyzeVideo(input, context, lang) {
5293
5001
  let mimeType;
5294
5002
  let duration;
5295
5003
  let videoTitle;
5004
+ let ytAuthor;
5296
5005
  if (resolved.type === "youtube") {
5297
5006
  const result = await handleYouTube(resolved.value, client);
5298
5007
  fileUri = result.fileUri;
@@ -5307,8 +5016,14 @@ async function analyzeVideo(input, context, lang) {
5307
5016
  `);
5308
5017
  duration = 600;
5309
5018
  }
5310
- const videoId = extractVideoId(resolved.value);
5311
- videoTitle = videoId != null ? `youtube-${videoId}` : resolved.value;
5019
+ try {
5020
+ const meta = await fetchYouTubeMetadata(resolved.value);
5021
+ videoTitle = meta.title;
5022
+ ytAuthor = meta.author;
5023
+ } catch {
5024
+ const videoId = extractVideoId(resolved.value);
5025
+ videoTitle = videoId != null ? `youtube-${videoId}` : resolved.value;
5026
+ }
5312
5027
  } else {
5313
5028
  const result = await handleLocalFile(resolved.value, client);
5314
5029
  fileUri = result.fileUri;
@@ -5324,6 +5039,7 @@ async function analyzeVideo(input, context, lang) {
5324
5039
  const slug = slugify(videoTitle);
5325
5040
  const finalOutputDir = `${outputDir}/${slug}`;
5326
5041
  const rateLimiter = new RateLimiter();
5042
+ const startTime = Date.now();
5327
5043
  const pipelineResult = await runPipeline({
5328
5044
  client,
5329
5045
  fileUri,
@@ -5332,8 +5048,10 @@ async function analyzeVideo(input, context, lang) {
5332
5048
  model,
5333
5049
  context,
5334
5050
  lang,
5051
+ channelAuthor: ytAuthor,
5335
5052
  rateLimiter
5336
5053
  });
5054
+ const elapsedMs = Date.now() - startTime;
5337
5055
  await generateOutput({
5338
5056
  pipelineResult,
5339
5057
  outputDir,
@@ -5341,7 +5059,8 @@ async function analyzeVideo(input, context, lang) {
5341
5059
  source: input,
5342
5060
  duration,
5343
5061
  model,
5344
- processingTimeMs: 0
5062
+ processingTimeMs: elapsedMs,
5063
+ channelAuthor: ytAuthor
5345
5064
  });
5346
5065
  let summary = "Analysis complete.";
5347
5066
  const synthesisPath = join5(finalOutputDir, "raw", "synthesis.json");
@@ -5902,7 +5621,7 @@ async function run2(args) {
5902
5621
  }
5903
5622
 
5904
5623
  // src/cli/index.ts
5905
- var version = "0.6.2";
5624
+ var version = "0.6.4";
5906
5625
  var DEFAULT_OUTPUT2 = "./vidistill-output/";
5907
5626
  var SUBCOMMANDS = {
5908
5627
  mcp: run,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vidistill",
3
- "version": "0.6.2",
3
+ "version": "0.6.4",
4
4
  "description": "Video intelligence distiller — extract structured notes, transcripts, and insights from any video using Gemini",
5
5
  "type": "module",
6
6
  "license": "MIT",