vidistill 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +87 -454
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -76,6 +76,7 @@ CLASSIFICATION RULES:
76
76
  - "lecture": Academic lectures, talks, single-speaker educational content
77
77
  - "presentation": Slide-based presentations, keynotes, demo days
78
78
  - "conversation": Interviews, podcasts, panel discussions without slides
79
+ - "commentary": Single-speaker opinion, analysis, vlog, reaction, or informal educational content without formal slides or academic structure
79
80
  - "mixed": Cannot clearly classify into one category, or multiple types present
80
81
 
81
82
  2. DETECT visual content:
@@ -113,6 +114,7 @@ PASS RECOMMENDATIONS BY TYPE:
113
114
  - lecture: ["transcript", "visual", "implicit", "synthesis"]
114
115
  - presentation: ["transcript", "visual", "implicit", "synthesis"] (add "people" if multiple speakers)
115
116
  - conversation: ["transcript", "visual", "implicit", "synthesis"]
117
+ - commentary: ["transcript", "visual", "implicit", "synthesis"]
116
118
  - mixed: ["transcript", "visual", "code", "people", "chat", "implicit", "synthesis"]
117
119
  `;
118
120
  var SYSTEM_INSTRUCTION_PASS_3A = `
@@ -664,6 +666,25 @@ function normalizeYouTubeUrl(url) {
664
666
  if (!id) return null;
665
667
  return `https://www.youtube.com/watch?v=${id}`;
666
668
  }
669
+ async function fetchYouTubeMetadata(url) {
670
+ const normalized = normalizeYouTubeUrl(url);
671
+ if (!normalized) throw new Error("Invalid YouTube URL");
672
+ const oembedUrl = `https://www.youtube.com/oembed?url=${encodeURIComponent(normalized)}&format=json`;
673
+ const res = await fetch(oembedUrl);
674
+ if (!res.ok) {
675
+ if (res.status === 401 || res.status === 403) {
676
+ throw new Error("Video is private or unavailable");
677
+ }
678
+ throw new Error(`Failed to fetch video info (${res.status})`);
679
+ }
680
+ const data = await res.json();
681
+ const obj = data;
682
+ return {
683
+ title: typeof obj["title"] === "string" ? obj["title"] : "Untitled",
684
+ author: typeof obj["author_name"] === "string" ? obj["author_name"] : "Unknown",
685
+ thumbnailUrl: typeof obj["thumbnail_url"] === "string" ? obj["thumbnail_url"] : ""
686
+ };
687
+ }
667
688
  function fetchYtDlpDuration(url) {
668
689
  return new Promise((resolve3) => {
669
690
  execFile("yt-dlp", ["--dump-json", "--no-download", url], { timeout: 15e3 }, (err, stdout) => {
@@ -989,7 +1010,7 @@ var SCHEMA_PASS_0 = {
989
1010
  properties: {
990
1011
  type: {
991
1012
  type: Type.STRING,
992
- enum: ["coding", "meeting", "lecture", "presentation", "conversation", "mixed"],
1013
+ enum: ["coding", "meeting", "lecture", "presentation", "conversation", "commentary", "mixed"],
993
1014
  description: "Primary video type classification"
994
1015
  },
995
1016
  speakers: {
@@ -1763,12 +1784,15 @@ function formatTranscriptForInjection(pass1a) {
1763
1784
  return pass1a.transcript_entries.map((e) => `[${e.timestamp}] ${e.text}`).join("\n");
1764
1785
  }
1765
1786
  async function runDiarization(params) {
1766
- const { client, fileUri, mimeType, segment, model, resolution, lang, pass1aResult } = params;
1787
+ const { client, fileUri, mimeType, segment, model, resolution, lang, pass1aResult, channelAuthor } = params;
1767
1788
  const transcriptText = formatTranscriptForInjection(pass1aResult);
1768
- const systemInstruction = withLanguage(
1769
- SYSTEM_INSTRUCTION_PASS_1B.replace("{INJECT_PASS1A_TRANSCRIPT_HERE}", transcriptText),
1770
- lang
1771
- );
1789
+ let baseInstruction = SYSTEM_INSTRUCTION_PASS_1B.replace("{INJECT_PASS1A_TRANSCRIPT_HERE}", transcriptText);
1790
+ if (channelAuthor) {
1791
+ baseInstruction += `
1792
+
1793
+ HINT: This video is from a YouTube channel by "${channelAuthor}". If only one speaker is present, consider using this as the speaker name.`;
1794
+ }
1795
+ const systemInstruction = withLanguage(baseInstruction, lang);
1772
1796
  const contents = [
1773
1797
  {
1774
1798
  role: "user",
@@ -2068,8 +2092,13 @@ function isNearDuplicate(a, b) {
2068
2092
  if (delta > DEDUP_WINDOW_S) return false;
2069
2093
  if (a.text === b.text) return true;
2070
2094
  const shared = tokenOverlap(a.text, b.text);
2071
- const maxTokens = Math.max(a.text.split(/\s+/).length, b.text.split(/\s+/).length);
2072
- return maxTokens > 0 && shared / maxTokens >= 0.8;
2095
+ const aTokens = a.text.split(/\s+/).length;
2096
+ const bTokens = b.text.split(/\s+/).length;
2097
+ const maxTokens = Math.max(aTokens, bTokens);
2098
+ const minTokens = Math.min(aTokens, bTokens);
2099
+ if (maxTokens > 0 && shared / maxTokens >= 0.8) return true;
2100
+ if (minTokens > 0 && shared / minTokens >= 0.8) return true;
2101
+ return false;
2073
2102
  }
2074
2103
  function deduplicateEntries(entries) {
2075
2104
  if (entries.length <= 1) return entries;
@@ -2736,6 +2765,9 @@ function determineStrategy(profile) {
2736
2765
  case "conversation":
2737
2766
  passes.add("implicit");
2738
2767
  break;
2768
+ case "commentary":
2769
+ passes.add("implicit");
2770
+ break;
2739
2771
  case "mixed":
2740
2772
  passes.add("code");
2741
2773
  passes.add("people");
@@ -3083,7 +3115,8 @@ async function runPipeline(config) {
3083
3115
  onProgress,
3084
3116
  onWait,
3085
3117
  isShuttingDown,
3086
- lang
3118
+ lang,
3119
+ channelAuthor
3087
3120
  } = config;
3088
3121
  const errors = [];
3089
3122
  const passesRun = [];
@@ -3151,7 +3184,7 @@ async function runPipeline(config) {
3151
3184
  const p1a = pass1aResult;
3152
3185
  const pass1bResult = await runDiarizationConsensus({
3153
3186
  config: { runs: transcriptConsensusRuns },
3154
- runFn: () => rateLimiter.execute(() => runDiarization({ client, fileUri, mimeType, segment, model, resolution, lang, pass1aResult: p1a }), { onWait }),
3187
+ runFn: () => rateLimiter.execute(() => runDiarization({ client, fileUri, mimeType, segment, model, resolution, lang, pass1aResult: p1a, channelAuthor }), { onWait }),
3155
3188
  mergedPass1a: p1a,
3156
3189
  onProgress: (_run, _total) => {
3157
3190
  currentStep++;
@@ -3584,7 +3617,7 @@ function renderIncompletePasses(pipelineResult) {
3584
3617
  return lines.join("\n");
3585
3618
  }
3586
3619
  function writeGuide(params) {
3587
- const { title, source, duration, pipelineResult, filesGenerated, speakerMapping } = params;
3620
+ const { title, source, duration, pipelineResult, filesGenerated, speakerMapping, channelAuthor } = params;
3588
3621
  const { synthesisResult, videoProfile } = pipelineResult;
3589
3622
  const rawOverview = synthesisResult?.overview ?? "_No summary available \u2014 synthesis pass did not run or produced no output._";
3590
3623
  const overview = replaceNamesInText(rawOverview, speakerMapping);
@@ -3595,6 +3628,7 @@ function writeGuide(params) {
3595
3628
  "## Source",
3596
3629
  "",
3597
3630
  `- **File/URL:** ${source}`,
3631
+ ...channelAuthor ? [`- **Author/Channel:** ${channelAuthor}`] : [],
3598
3632
  `- **Duration:** ${formatDuration(duration)}`,
3599
3633
  `- **Type:** ${videoType}`,
3600
3634
  "",
@@ -3685,6 +3719,7 @@ function renderSpeechEvent(entry, speakerMapping) {
3685
3719
  let text4 = entry.text;
3686
3720
  if (entry.emphasis_words != null && entry.emphasis_words.length > 0) {
3687
3721
  for (const word of entry.emphasis_words) {
3722
+ if (!word.includes(" ") && word.length < 4) continue;
3688
3723
  const escaped = word.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
3689
3724
  const re = new RegExp(`(?<![\\w*])${escaped}(?![\\w*])`, "gi");
3690
3725
  text4 = text4.replace(re, `**$&**`);
@@ -4304,433 +4339,29 @@ function renderAssignedTasks(tasks, speakerMapping) {
4304
4339
  lines.push("");
4305
4340
  return lines;
4306
4341
  }
4342
+ function isDuplicateTask(task, synthesisItems) {
4343
+ for (const item of synthesisItems) {
4344
+ const tsDelta = Math.abs(parseTimestamp(task.timestamp) - parseTimestamp(item.timestamp));
4345
+ if (tsDelta > 120) continue;
4346
+ const shared = tokenOverlap(task.task, item.item);
4347
+ const minLen = Math.min(task.task.split(/\s+/).length, item.item.split(/\s+/).length);
4348
+ if (minLen > 0 && shared / minLen >= 0.6) return true;
4349
+ }
4350
+ return false;
4351
+ }
4307
4352
  function writeActionItems(params) {
4308
4353
  const { segments, synthesisResult, speakerMapping } = params;
4309
4354
  const synthesisItems = synthesisResult?.action_items ?? [];
4310
4355
  const assignedTasks = collectTasksAssigned(segments);
4311
4356
  if (synthesisItems.length === 0 && assignedTasks.length === 0) return null;
4357
+ const dedupedTasks = synthesisItems.length > 0 ? assignedTasks.filter((t) => !isDuplicateTask(t, synthesisItems)) : assignedTasks;
4312
4358
  const sections = ["# Action Items", ""];
4313
4359
  sections.push(...renderSynthesisItems(synthesisItems, speakerMapping));
4314
- sections.push(...renderAssignedTasks(assignedTasks, speakerMapping));
4360
+ sections.push(...renderAssignedTasks(dedupedTasks, speakerMapping));
4315
4361
  while (sections[sections.length - 1] === "") sections.pop();
4316
4362
  return sections.join("\n");
4317
4363
  }
4318
4364
 
4319
- // src/output/timeline.ts
4320
- function toPercent(seconds, duration) {
4321
- if (duration <= 0) return "0";
4322
- const pct = Math.min(100, Math.max(0, seconds / duration * 100));
4323
- return pct.toFixed(3);
4324
- }
4325
- function escapeHtml(str) {
4326
- return str.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;").replace(/'/g, "&#39;");
4327
- }
4328
- function collectMarkers(pipelineResult, duration, speakerMapping) {
4329
- const markers = [];
4330
- const SPEECH_WINDOW_SECONDS = 30;
4331
- for (const segment of pipelineResult.segments) {
4332
- if (segment.pass1 != null) {
4333
- let windowStart = -1;
4334
- let windowLabel = "";
4335
- let windowDetail = "";
4336
- for (const entry of segment.pass1.transcript_entries) {
4337
- const seconds = parseTimestamp(entry.timestamp);
4338
- if (seconds > duration && duration > 0) continue;
4339
- if (windowStart < 0 || seconds - windowStart >= SPEECH_WINDOW_SECONDS) {
4340
- if (windowStart >= 0) {
4341
- markers.push({ seconds: windowStart, label: windowLabel, lane: "speech", detail: windowDetail });
4342
- }
4343
- windowStart = seconds;
4344
- windowLabel = applySpeakerMapping(entry.speaker || "Speech", speakerMapping);
4345
- windowDetail = entry.text.slice(0, 80) + (entry.text.length > 80 ? "\u2026" : "");
4346
- }
4347
- }
4348
- if (windowStart >= 0) {
4349
- markers.push({ seconds: windowStart, label: windowLabel, lane: "speech", detail: windowDetail });
4350
- }
4351
- }
4352
- if (segment.pass2 != null) {
4353
- for (const block of segment.pass2.code_blocks) {
4354
- const seconds = parseTimestamp(block.timestamp);
4355
- if (seconds > duration && duration > 0) continue;
4356
- markers.push({
4357
- seconds,
4358
- lane: "code",
4359
- label: block.filename,
4360
- detail: block.language + (block.change_type ? ` \xB7 ${block.change_type}` : "")
4361
- });
4362
- }
4363
- for (const note2 of segment.pass2.visual_notes) {
4364
- const seconds = parseTimestamp(note2.timestamp);
4365
- if (seconds > duration && duration > 0) continue;
4366
- markers.push({
4367
- seconds,
4368
- lane: "visual",
4369
- label: note2.visual_type,
4370
- detail: note2.description.slice(0, 80) + (note2.description.length > 80 ? "\u2026" : "")
4371
- });
4372
- }
4373
- }
4374
- }
4375
- const topics = pipelineResult.synthesisResult?.topics ?? [];
4376
- for (const topic of topics) {
4377
- for (const ts of topic.timestamps) {
4378
- const seconds = parseTimestamp(ts);
4379
- if (seconds > duration && duration > 0) continue;
4380
- markers.push({
4381
- seconds,
4382
- lane: "topic",
4383
- label: topic.title,
4384
- detail: topic.summary?.slice(0, 80) ?? ""
4385
- });
4386
- }
4387
- }
4388
- return markers;
4389
- }
4390
- function renderMarker(m, duration) {
4391
- const left = toPercent(m.seconds, duration);
4392
- const time = formatTime(m.seconds);
4393
- const tooltipRaw = `${time} \u2014 ${m.label}${m.detail ? ": " + m.detail : ""}`;
4394
- const tooltipAttr = escapeHtml(tooltipRaw);
4395
- return `<div class="marker marker-${m.lane}" style="left:${left}%" title="${tooltipAttr}" aria-label="${tooltipAttr}"></div>`;
4396
- }
4397
- function renderLane(laneId, laneLabel, markers, duration) {
4398
- const laneMarkers = markers.filter((m) => m.lane === laneId);
4399
- const renderedMarkers = laneMarkers.map((m) => renderMarker(m, duration)).join("\n ");
4400
- return `
4401
- <div class="lane">
4402
- <div class="lane-label">${laneLabel}</div>
4403
- <div class="lane-track" role="region" aria-label="${laneLabel} lane">
4404
- ${renderedMarkers}
4405
- </div>
4406
- </div>`;
4407
- }
4408
- function buildTimeAxis(duration) {
4409
- if (duration <= 0) return "";
4410
- const intervals = [30, 60, 120, 300, 600, 900, 1800, 3600];
4411
- const targetTicks = 10;
4412
- const ideal = duration / targetTicks;
4413
- const interval = intervals.find((i) => i >= ideal) ?? intervals[intervals.length - 1] ?? 3600;
4414
- const ticks = [];
4415
- for (let t = 0; t <= duration; t += interval) {
4416
- const left = toPercent(t, duration);
4417
- const label = formatTime(t);
4418
- ticks.push(`<div class="tick" style="left:${left}%"><span>${label}</span></div>`);
4419
- }
4420
- return ticks.join("\n ");
4421
- }
4422
- function generateTimeline(params) {
4423
- const { pipelineResult, duration, speakerMapping } = params;
4424
- const markers = collectMarkers(pipelineResult, duration, speakerMapping);
4425
- const effectiveDuration = duration > 0 ? duration : 1;
4426
- const speechLane = renderLane("speech", "Speech", markers, effectiveDuration);
4427
- const codeLane = renderLane("code", "Code", markers, effectiveDuration);
4428
- const visualLane = renderLane("visual", "Slides / Visuals", markers, effectiveDuration);
4429
- const topicLane = renderLane("topic", "Key Moments", markers, effectiveDuration);
4430
- const timeAxis = buildTimeAxis(effectiveDuration);
4431
- return `<!DOCTYPE html>
4432
- <html lang="en">
4433
- <head>
4434
- <meta charset="UTF-8">
4435
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
4436
- <title>Video Timeline</title>
4437
- <style>
4438
- /* ------------------------------------------------------------------ */
4439
- /* Reset + base */
4440
- /* ------------------------------------------------------------------ */
4441
- *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
4442
-
4443
- :root {
4444
- --bg: #f9fafb;
4445
- --surface: #ffffff;
4446
- --border: #e5e7eb;
4447
- --text: #111827;
4448
- --text-muted: #6b7280;
4449
- --shadow: 0 1px 3px rgba(0,0,0,.1);
4450
-
4451
- --speech-color: #3b82f6;
4452
- --code-color: #22c55e;
4453
- --visual-color: #a855f7;
4454
- --topic-color: #eab308;
4455
-
4456
- --lane-h: 36px;
4457
- --label-w: 120px;
4458
- }
4459
-
4460
- @media (prefers-color-scheme: dark) {
4461
- :root {
4462
- --bg: #0f172a;
4463
- --surface: #1e293b;
4464
- --border: #334155;
4465
- --text: #f1f5f9;
4466
- --text-muted: #94a3b8;
4467
- --shadow: 0 1px 3px rgba(0,0,0,.4);
4468
- }
4469
- }
4470
-
4471
- html, body {
4472
- height: 100%;
4473
- background: var(--bg);
4474
- color: var(--text);
4475
- font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
4476
- font-size: 14px;
4477
- line-height: 1.5;
4478
- }
4479
-
4480
- /* ------------------------------------------------------------------ */
4481
- /* Layout */
4482
- /* ------------------------------------------------------------------ */
4483
- .page {
4484
- max-width: 100%;
4485
- padding: 1.5rem 1rem;
4486
- }
4487
-
4488
- h1 {
4489
- font-size: 1.25rem;
4490
- font-weight: 700;
4491
- margin-bottom: 1rem;
4492
- }
4493
-
4494
- .legend {
4495
- display: flex;
4496
- flex-wrap: wrap;
4497
- gap: .5rem 1rem;
4498
- margin-bottom: 1.25rem;
4499
- }
4500
-
4501
- .legend-item {
4502
- display: flex;
4503
- align-items: center;
4504
- gap: .4rem;
4505
- font-size: .8125rem;
4506
- color: var(--text-muted);
4507
- }
4508
-
4509
- .legend-dot {
4510
- width: 10px;
4511
- height: 10px;
4512
- border-radius: 50%;
4513
- flex-shrink: 0;
4514
- }
4515
-
4516
- .legend-dot.speech { background: var(--speech-color); }
4517
- .legend-dot.code { background: var(--code-color); }
4518
- .legend-dot.visual { background: var(--visual-color); }
4519
- .legend-dot.topic { background: var(--topic-color); }
4520
-
4521
- /* ------------------------------------------------------------------ */
4522
- /* Scroll container */
4523
- /* ------------------------------------------------------------------ */
4524
- .scroll-wrapper {
4525
- overflow-x: auto;
4526
- -webkit-overflow-scrolling: touch;
4527
- border: 1px solid var(--border);
4528
- border-radius: .5rem;
4529
- background: var(--surface);
4530
- box-shadow: var(--shadow);
4531
- }
4532
-
4533
- .timeline {
4534
- min-width: 640px;
4535
- padding: 1rem;
4536
- }
4537
-
4538
- /* ------------------------------------------------------------------ */
4539
- /* Lanes */
4540
- /* ------------------------------------------------------------------ */
4541
- .lane {
4542
- display: flex;
4543
- align-items: center;
4544
- margin-bottom: .5rem;
4545
- }
4546
-
4547
- .lane-label {
4548
- width: var(--label-w);
4549
- flex-shrink: 0;
4550
- font-size: .75rem;
4551
- font-weight: 600;
4552
- color: var(--text-muted);
4553
- text-transform: uppercase;
4554
- letter-spacing: .04em;
4555
- padding-right: .75rem;
4556
- }
4557
-
4558
- .lane-track {
4559
- flex: 1;
4560
- height: var(--lane-h);
4561
- position: relative;
4562
- background: var(--bg);
4563
- border-radius: .25rem;
4564
- border: 1px solid var(--border);
4565
- }
4566
-
4567
- /* ------------------------------------------------------------------ */
4568
- /* Markers */
4569
- /* ------------------------------------------------------------------ */
4570
- .marker {
4571
- position: absolute;
4572
- top: 50%;
4573
- transform: translate(-50%, -50%);
4574
- width: 10px;
4575
- height: 10px;
4576
- border-radius: 50%;
4577
- cursor: pointer;
4578
- transition: transform .15s ease, box-shadow .15s ease;
4579
- outline: none;
4580
- }
4581
-
4582
- .marker:hover, .marker:focus {
4583
- transform: translate(-50%, -50%) scale(1.6);
4584
- box-shadow: 0 0 0 2px var(--surface), 0 0 0 4px currentColor;
4585
- z-index: 10;
4586
- }
4587
-
4588
- .marker-speech { background: var(--speech-color); color: var(--speech-color); }
4589
- .marker-code { background: var(--code-color); color: var(--code-color); }
4590
- .marker-visual { background: var(--visual-color); color: var(--visual-color); }
4591
- .marker-topic { background: var(--topic-color); color: var(--topic-color); }
4592
-
4593
- /* ------------------------------------------------------------------ */
4594
- /* Time axis */
4595
- /* ------------------------------------------------------------------ */
4596
- .time-axis {
4597
- display: flex;
4598
- align-items: center;
4599
- margin-bottom: .25rem;
4600
- }
4601
-
4602
- .axis-spacer {
4603
- width: var(--label-w);
4604
- flex-shrink: 0;
4605
- }
4606
-
4607
- .axis-track {
4608
- flex: 1;
4609
- height: 20px;
4610
- position: relative;
4611
- }
4612
-
4613
- .tick {
4614
- position: absolute;
4615
- top: 0;
4616
- transform: translateX(-50%);
4617
- }
4618
-
4619
- .tick span {
4620
- font-size: .6875rem;
4621
- color: var(--text-muted);
4622
- white-space: nowrap;
4623
- }
4624
-
4625
- /* ------------------------------------------------------------------ */
4626
- /* Tooltip (vanilla JS) */
4627
- /* ------------------------------------------------------------------ */
4628
- #tooltip {
4629
- position: fixed;
4630
- background: var(--surface);
4631
- border: 1px solid var(--border);
4632
- border-radius: .375rem;
4633
- padding: .375rem .625rem;
4634
- font-size: .8125rem;
4635
- color: var(--text);
4636
- pointer-events: none;
4637
- z-index: 999;
4638
- box-shadow: 0 4px 12px rgba(0,0,0,.15);
4639
- max-width: 280px;
4640
- word-break: break-word;
4641
- display: none;
4642
- }
4643
-
4644
- /* ------------------------------------------------------------------ */
4645
- /* Responsive */
4646
- /* ------------------------------------------------------------------ */
4647
- @media (max-width: 480px) {
4648
- :root { --label-w: 72px; --lane-h: 32px; }
4649
- .legend { gap: .35rem .75rem; }
4650
- h1 { font-size: 1.1rem; }
4651
- }
4652
- </style>
4653
- </head>
4654
- <body>
4655
- <div class="page">
4656
- <h1>Video Timeline</h1>
4657
-
4658
- <div class="legend" aria-label="Lane colour legend">
4659
- <span class="legend-item"><span class="legend-dot speech"></span>Speech</span>
4660
- <span class="legend-item"><span class="legend-dot code"></span>Code</span>
4661
- <span class="legend-item"><span class="legend-dot visual"></span>Slides / Visuals</span>
4662
- <span class="legend-item"><span class="legend-dot topic"></span>Key Moments</span>
4663
- </div>
4664
-
4665
- <div class="scroll-wrapper">
4666
- <div class="timeline">
4667
- <div class="time-axis">
4668
- <div class="axis-spacer"></div>
4669
- <div class="axis-track">
4670
- ${timeAxis}
4671
- </div>
4672
- </div>
4673
- ${speechLane}
4674
- ${codeLane}
4675
- ${visualLane}
4676
- ${topicLane}
4677
- </div>
4678
- </div>
4679
- </div>
4680
-
4681
- <div id="tooltip" role="tooltip"></div>
4682
-
4683
- <script>
4684
- (function () {
4685
- var tooltip = document.getElementById('tooltip');
4686
- var markers = document.querySelectorAll('.marker');
4687
-
4688
- function showTooltip(el, x, y) {
4689
- var text = el.getAttribute('title') || el.getAttribute('aria-label') || '';
4690
- if (!text) return;
4691
- tooltip.textContent = text;
4692
- tooltip.style.display = 'block';
4693
- positionTooltip(x, y);
4694
- }
4695
-
4696
- function positionTooltip(x, y) {
4697
- var tw = tooltip.offsetWidth;
4698
- var th = tooltip.offsetHeight;
4699
- var vw = window.innerWidth;
4700
- var vh = window.innerHeight;
4701
- var left = x + 12;
4702
- var top = y - th / 2;
4703
- if (left + tw > vw - 8) left = x - tw - 12;
4704
- if (top < 8) top = 8;
4705
- if (top + th > vh - 8) top = vh - th - 8;
4706
- tooltip.style.left = left + 'px';
4707
- tooltip.style.top = top + 'px';
4708
- }
4709
-
4710
- function hideTooltip() {
4711
- tooltip.style.display = 'none';
4712
- }
4713
-
4714
- markers.forEach(function (m) {
4715
- m.addEventListener('mouseenter', function (e) {
4716
- showTooltip(m, e.clientX, e.clientY);
4717
- });
4718
- m.addEventListener('mousemove', function (e) {
4719
- positionTooltip(e.clientX, e.clientY);
4720
- });
4721
- m.addEventListener('mouseleave', hideTooltip);
4722
- m.addEventListener('focus', function () {
4723
- var rect = m.getBoundingClientRect();
4724
- showTooltip(m, rect.right, rect.top + rect.height / 2);
4725
- });
4726
- m.addEventListener('blur', hideTooltip);
4727
- });
4728
- })();
4729
- </script>
4730
- </body>
4731
- </html>`;
4732
- }
4733
-
4734
4365
  // src/output/metadata.ts
4735
4366
  function writeMetadata(params) {
4736
4367
  const { title, source, duration, model, processingTimeMs, filesGenerated, pipelineResult, speakerMapping, declinedMerges } = params;
@@ -4808,12 +4439,10 @@ function resolveFilesToGenerate(params) {
4808
4439
  }
4809
4440
  if (synthesisResult != null || hasPass3d) optional.add("notes.md");
4810
4441
  if (peopleExtraction != null) optional.add("people.md");
4811
- const hasPass1 = segments.some((s) => s.pass1 != null);
4812
- if (hasPass1 || hasPass2) optional.add("timeline.html");
4813
4442
  return optional;
4814
4443
  }
4815
4444
  async function generateOutput(params) {
4816
- const { pipelineResult, outputDir, videoTitle, source, duration, model, processingTimeMs, speakerMapping, declinedMerges } = params;
4445
+ const { pipelineResult, outputDir, videoTitle, source, duration, model, processingTimeMs, channelAuthor, speakerMapping, declinedMerges } = params;
4817
4446
  const slug = slugify(videoTitle);
4818
4447
  const finalOutputDir = join3(outputDir, slug);
4819
4448
  await mkdir(finalOutputDir, { recursive: true });
@@ -4918,14 +4547,6 @@ async function generateOutput(params) {
4918
4547
  errors.push(`action-items.md: ${String(err)}`);
4919
4548
  }
4920
4549
  }
4921
- if (filesToGenerate.has("timeline.html")) {
4922
- try {
4923
- const content = generateTimeline({ pipelineResult, duration, speakerMapping: expandedMapping });
4924
- await writeOutputFile("timeline.html", content);
4925
- } catch (err) {
4926
- errors.push(`timeline.html: ${String(err)}`);
4927
- }
4928
- }
4929
4550
  try {
4930
4551
  const rawFiles = writeRawOutput(pipelineResult);
4931
4552
  await mkdir(join3(finalOutputDir, "raw"), { recursive: true });
@@ -4955,7 +4576,7 @@ async function generateOutput(params) {
4955
4576
  errors.push(`metadata.json: ${String(err)}`);
4956
4577
  }
4957
4578
  try {
4958
- const content = writeGuide({ title: videoTitle, source, duration, pipelineResult, filesGenerated, speakerMapping: expandedMapping });
4579
+ const content = writeGuide({ title: videoTitle, source, duration, pipelineResult, filesGenerated, speakerMapping: expandedMapping, channelAuthor });
4959
4580
  await writeOutputFile("guide.md", content);
4960
4581
  } catch (err) {
4961
4582
  errors.push(`guide.md: ${String(err)}`);
@@ -5068,14 +4689,6 @@ async function reRenderWithSpeakerMapping(params) {
5068
4689
  errors.push(`action-items.md: ${String(err)}`);
5069
4690
  }
5070
4691
  }
5071
- if (filesToReRender.has("timeline.html")) {
5072
- try {
5073
- const content = generateTimeline({ pipelineResult, duration, speakerMapping: expandedMapping });
5074
- await writeOutputFile("timeline.html", content);
5075
- } catch (err) {
5076
- errors.push(`timeline.html: ${String(err)}`);
5077
- }
5078
- }
5079
4692
  if (filesToReRender.has("guide.md")) {
5080
4693
  try {
5081
4694
  const content = writeGuide({ title: videoTitle, source, duration, pipelineResult, filesGenerated, speakerMapping: expandedMapping });
@@ -5254,6 +4867,7 @@ async function runDistill(args) {
5254
4867
  let duration;
5255
4868
  let videoTitle;
5256
4869
  let uploadedFileNames = [];
4870
+ let ytAuthor;
5257
4871
  if (resolved.type === "youtube") {
5258
4872
  const result = await handleYouTube(resolved.value, client);
5259
4873
  fileUri = result.fileUri;
@@ -5270,8 +4884,14 @@ async function runDistill(args) {
5270
4884
  if (result.uploadedFileName != null) {
5271
4885
  uploadedFileNames = [result.uploadedFileName];
5272
4886
  }
5273
- const videoId = extractVideoId(resolved.value);
5274
- videoTitle = videoId != null ? `youtube-${videoId}` : resolved.value;
4887
+ try {
4888
+ const meta = await fetchYouTubeMetadata(resolved.value);
4889
+ videoTitle = meta.title;
4890
+ ytAuthor = meta.author;
4891
+ } catch {
4892
+ const videoId = extractVideoId(resolved.value);
4893
+ videoTitle = videoId != null ? `youtube-${videoId}` : resolved.value;
4894
+ }
5275
4895
  } else {
5276
4896
  const result = await handleLocalFile(resolved.value, client);
5277
4897
  fileUri = result.fileUri;
@@ -5316,6 +4936,7 @@ async function runDistill(args) {
5316
4936
  model,
5317
4937
  context,
5318
4938
  lang: args.lang,
4939
+ channelAuthor: ytAuthor,
5319
4940
  rateLimiter,
5320
4941
  onProgress: (status) => {
5321
4942
  progress2.update(status);
@@ -5339,7 +4960,8 @@ async function runDistill(args) {
5339
4960
  source: rawInput,
5340
4961
  duration,
5341
4962
  model,
5342
- processingTimeMs: elapsedMs
4963
+ processingTimeMs: elapsedMs,
4964
+ channelAuthor: ytAuthor
5343
4965
  });
5344
4966
  const elapsedSecs = Math.round(elapsedMs / 1e3);
5345
4967
  const elapsedMins = Math.floor(elapsedSecs / 60);
@@ -5379,6 +5001,7 @@ async function analyzeVideo(input, context, lang) {
5379
5001
  let mimeType;
5380
5002
  let duration;
5381
5003
  let videoTitle;
5004
+ let ytAuthor;
5382
5005
  if (resolved.type === "youtube") {
5383
5006
  const result = await handleYouTube(resolved.value, client);
5384
5007
  fileUri = result.fileUri;
@@ -5393,8 +5016,14 @@ async function analyzeVideo(input, context, lang) {
5393
5016
  `);
5394
5017
  duration = 600;
5395
5018
  }
5396
- const videoId = extractVideoId(resolved.value);
5397
- videoTitle = videoId != null ? `youtube-${videoId}` : resolved.value;
5019
+ try {
5020
+ const meta = await fetchYouTubeMetadata(resolved.value);
5021
+ videoTitle = meta.title;
5022
+ ytAuthor = meta.author;
5023
+ } catch {
5024
+ const videoId = extractVideoId(resolved.value);
5025
+ videoTitle = videoId != null ? `youtube-${videoId}` : resolved.value;
5026
+ }
5398
5027
  } else {
5399
5028
  const result = await handleLocalFile(resolved.value, client);
5400
5029
  fileUri = result.fileUri;
@@ -5410,6 +5039,7 @@ async function analyzeVideo(input, context, lang) {
5410
5039
  const slug = slugify(videoTitle);
5411
5040
  const finalOutputDir = `${outputDir}/${slug}`;
5412
5041
  const rateLimiter = new RateLimiter();
5042
+ const startTime = Date.now();
5413
5043
  const pipelineResult = await runPipeline({
5414
5044
  client,
5415
5045
  fileUri,
@@ -5418,8 +5048,10 @@ async function analyzeVideo(input, context, lang) {
5418
5048
  model,
5419
5049
  context,
5420
5050
  lang,
5051
+ channelAuthor: ytAuthor,
5421
5052
  rateLimiter
5422
5053
  });
5054
+ const elapsedMs = Date.now() - startTime;
5423
5055
  await generateOutput({
5424
5056
  pipelineResult,
5425
5057
  outputDir,
@@ -5427,7 +5059,8 @@ async function analyzeVideo(input, context, lang) {
5427
5059
  source: input,
5428
5060
  duration,
5429
5061
  model,
5430
- processingTimeMs: 0
5062
+ processingTimeMs: elapsedMs,
5063
+ channelAuthor: ytAuthor
5431
5064
  });
5432
5065
  let summary = "Analysis complete.";
5433
5066
  const synthesisPath = join5(finalOutputDir, "raw", "synthesis.json");
@@ -5988,7 +5621,7 @@ async function run2(args) {
5988
5621
  }
5989
5622
 
5990
5623
  // src/cli/index.ts
5991
- var version = "0.6.3";
5624
+ var version = "0.6.4";
5992
5625
  var DEFAULT_OUTPUT2 = "./vidistill-output/";
5993
5626
  var SUBCOMMANDS = {
5994
5627
  mcp: run,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vidistill",
3
- "version": "0.6.3",
3
+ "version": "0.6.4",
4
4
  "description": "Video intelligence distiller — extract structured notes, transcripts, and insights from any video using Gemini",
5
5
  "type": "module",
6
6
  "license": "MIT",