vidistill 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +29 -0
  2. package/dist/index.js +306 -56
  3. package/package.json +4 -2
package/README.md CHANGED
@@ -104,6 +104,35 @@ vidistill rename-speakers ./vidistill-output/my-meeting/ --rename "Steven Kang"
104
104
  vidistill rename-speakers ./vidistill-output/my-meeting/ --merge "K Iphone" "Kristian"
105
105
  ```
106
106
 
107
+ ## MCP Server
108
+
109
+ vidistill can run as an MCP server, letting AI coding tools (Claude Code, Cursor, etc.) analyze videos and read output directly.
110
+
111
+ ```bash
112
+ vidistill mcp
113
+ ```
114
+
115
+ To configure in Claude Code, add to `~/.claude/claude_code_config.json`:
116
+
117
+ ```json
118
+ {
119
+ "mcpServers": {
120
+ "vidistill": {
121
+ "command": "npx",
122
+ "args": ["vidistill", "mcp"]
123
+ }
124
+ }
125
+ }
126
+ ```
127
+
128
+ **Tools exposed:**
129
+
130
+ - `analyze_video` — run the full pipeline on a URL or file, returns output dir + summary
131
+ - `get_transcript` — read transcript from an existing output dir, with optional time range filtering
132
+ - `get_code` — read extracted code files from an existing output dir
133
+
134
+ Requires `GEMINI_API_KEY` set as environment variable or in `~/.vidistill/config.json`.
135
+
107
136
  ## How It Works
108
137
 
109
138
  Supported video formats: MP4, MOV, WebM, MKV, AVI, MPEG, FLV, WMV, 3GPP. Supported audio formats: MP3, AAC, WAV, FLAC, OGG, M4A.
package/dist/index.js CHANGED
@@ -648,22 +648,22 @@ function normalizeYouTubeUrl(url) {
648
648
  return `https://www.youtube.com/watch?v=${id}`;
649
649
  }
650
650
  function fetchYtDlpDuration(url) {
651
- return new Promise((resolve2) => {
651
+ return new Promise((resolve3) => {
652
652
  execFile("yt-dlp", ["--dump-json", "--no-download", url], { timeout: 15e3 }, (err, stdout) => {
653
653
  if (err) {
654
- resolve2(void 0);
654
+ resolve3(void 0);
655
655
  return;
656
656
  }
657
657
  try {
658
658
  const data = JSON.parse(stdout);
659
659
  const dur = data["duration"];
660
660
  if (typeof dur === "number" && dur > 0) {
661
- resolve2(dur);
661
+ resolve3(dur);
662
662
  } else {
663
- resolve2(void 0);
663
+ resolve3(void 0);
664
664
  }
665
665
  } catch {
666
- resolve2(void 0);
666
+ resolve3(void 0);
667
667
  }
668
668
  });
669
669
  });
@@ -906,7 +906,7 @@ var _require = createRequire(import.meta.url);
906
906
  var ffmpeg = _require("fluent-ffmpeg");
907
907
  var BYTES_PER_SECOND = 5e5;
908
908
  function ffprobeAsync(filePath) {
909
- return new Promise((resolve2, reject) => {
909
+ return new Promise((resolve3, reject) => {
910
910
  ffmpeg.ffprobe(filePath, (err, data) => {
911
911
  if (err) {
912
912
  reject(err);
@@ -917,7 +917,7 @@ function ffprobeAsync(filePath) {
917
917
  reject(new Error("ffprobe returned no duration"));
918
918
  return;
919
919
  }
920
- resolve2(duration);
920
+ resolve3(duration);
921
921
  });
922
922
  });
923
923
  }
@@ -1597,6 +1597,10 @@ function applySpeakerMapping(label, mapping) {
1597
1597
  for (const [key, value] of Object.entries(mapping)) {
1598
1598
  if (key.toLowerCase() === lower) return value;
1599
1599
  }
1600
+ for (const [key, value] of Object.entries(mapping)) {
1601
+ const keyParen = key.match(/\(([^)]+)\)/);
1602
+ if (keyParen && keyParen[1].trim().toLowerCase() === lower) return value;
1603
+ }
1600
1604
  return label;
1601
1605
  }
1602
1606
  function replaceNamesInText(text4, mapping) {
@@ -2030,6 +2034,36 @@ async function runLinkConsensus(params) {
2030
2034
 
2031
2035
  // src/core/transcript-consensus.ts
2032
2036
  var ALIGN_WINDOW_S = 3;
2037
+ var DEDUP_WINDOW_S = 10;
2038
+ function isNearDuplicate(a, b) {
2039
+ const delta = Math.abs(parseTimestamp(a.timestamp) - parseTimestamp(b.timestamp));
2040
+ if (delta > DEDUP_WINDOW_S) return false;
2041
+ if (a.text === b.text) return true;
2042
+ const shared = tokenOverlap(a.text, b.text);
2043
+ const maxTokens = Math.max(a.text.split(/\s+/).length, b.text.split(/\s+/).length);
2044
+ return maxTokens > 0 && shared / maxTokens >= 0.8;
2045
+ }
2046
+ function deduplicateEntries(entries) {
2047
+ if (entries.length <= 1) return entries;
2048
+ const result = [entries[0]];
2049
+ for (let i = 1; i < entries.length; i++) {
2050
+ const curr = entries[i];
2051
+ let isDup = false;
2052
+ for (let j = result.length - 1; j >= Math.max(0, result.length - 3); j--) {
2053
+ if (isNearDuplicate(curr, result[j])) {
2054
+ if (curr.text.length > result[j].text.length) {
2055
+ result[j] = curr;
2056
+ }
2057
+ isDup = true;
2058
+ break;
2059
+ }
2060
+ }
2061
+ if (!isDup) {
2062
+ result.push(curr);
2063
+ }
2064
+ }
2065
+ return result;
2066
+ }
2033
2067
  function selectBestText(texts) {
2034
2068
  if (texts.length === 1) return texts[0];
2035
2069
  const referenceText = texts.join(" ");
@@ -2045,7 +2079,12 @@ function selectBestText(texts) {
2045
2079
  return bestText;
2046
2080
  }
2047
2081
  function mergeTranscriptRuns(runs) {
2048
- if (runs.length === 1) return runs[0];
2082
+ if (runs.length === 1) {
2083
+ return {
2084
+ ...runs[0],
2085
+ transcript_entries: deduplicateEntries(runs[0].transcript_entries)
2086
+ };
2087
+ }
2049
2088
  const referenceRun = runs.reduce(
2050
2089
  (best, run3) => run3.transcript_entries.length > best.transcript_entries.length ? run3 : best
2051
2090
  );
@@ -2092,7 +2131,7 @@ function mergeTranscriptRuns(runs) {
2092
2131
  return {
2093
2132
  segment_index: referenceRun.segment_index,
2094
2133
  time_range: referenceRun.time_range,
2095
- transcript_entries: mergedEntries
2134
+ transcript_entries: deduplicateEntries(mergedEntries)
2096
2135
  };
2097
2136
  }
2098
2137
  function mergeSpeakerSummaries(summaries) {
@@ -5235,17 +5274,228 @@ async function runDistill(args) {
5235
5274
  }
5236
5275
 
5237
5276
  // src/commands/mcp.ts
5238
- import { log as log8 } from "@clack/prompts";
5277
+ import { resolve as resolve2, join as join5, basename as basename4, extname as extname3 } from "path";
5278
+ import { existsSync as existsSync4 } from "fs";
5279
+ import { readdir as readdir2, readFile as readFile3 } from "fs/promises";
5280
+ var DEFAULT_OUTPUT = "./vidistill-output/";
5281
+ async function resolveApiKeyNonInteractive() {
5282
+ const envKey = process.env["GEMINI_API_KEY"];
5283
+ if (envKey && envKey.trim().length > 0) return envKey.trim();
5284
+ const config = await loadConfig();
5285
+ if (config?.apiKey && config.apiKey.trim().length > 0) return config.apiKey.trim();
5286
+ throw new Error("GEMINI_API_KEY not set. Set it as an environment variable or in ~/.vidistill/config.json");
5287
+ }
5288
+ async function analyzeVideo(input, context, lang) {
5289
+ const apiKey = await resolveApiKeyNonInteractive();
5290
+ const resolved = resolveInput(input);
5291
+ const client = new GeminiClient(apiKey);
5292
+ let fileUri;
5293
+ let mimeType;
5294
+ let duration;
5295
+ let videoTitle;
5296
+ if (resolved.type === "youtube") {
5297
+ const result = await handleYouTube(resolved.value, client);
5298
+ fileUri = result.fileUri;
5299
+ mimeType = result.mimeType;
5300
+ try {
5301
+ duration = await detectDuration({
5302
+ ytDlpDuration: result.duration,
5303
+ geminiDuration: result.duration
5304
+ });
5305
+ } catch (err) {
5306
+ process.stderr.write(`Duration detection failed, using 600s fallback: ${err instanceof Error ? err.message : String(err)}
5307
+ `);
5308
+ duration = 600;
5309
+ }
5310
+ const videoId = extractVideoId(resolved.value);
5311
+ videoTitle = videoId != null ? `youtube-${videoId}` : resolved.value;
5312
+ } else {
5313
+ const result = await handleLocalFile(resolved.value, client);
5314
+ fileUri = result.fileUri;
5315
+ mimeType = result.mimeType;
5316
+ duration = await detectDuration({
5317
+ filePath: resolved.value,
5318
+ geminiDuration: result.duration
5319
+ });
5320
+ videoTitle = basename4(resolved.value, extname3(resolved.value));
5321
+ }
5322
+ const model = MODELS.flash;
5323
+ const outputDir = resolve2(DEFAULT_OUTPUT);
5324
+ const slug = slugify(videoTitle);
5325
+ const finalOutputDir = `${outputDir}/${slug}`;
5326
+ const rateLimiter = new RateLimiter();
5327
+ const pipelineResult = await runPipeline({
5328
+ client,
5329
+ fileUri,
5330
+ mimeType,
5331
+ duration,
5332
+ model,
5333
+ context,
5334
+ lang,
5335
+ rateLimiter
5336
+ });
5337
+ await generateOutput({
5338
+ pipelineResult,
5339
+ outputDir,
5340
+ videoTitle,
5341
+ source: input,
5342
+ duration,
5343
+ model,
5344
+ processingTimeMs: 0
5345
+ });
5346
+ let summary = "Analysis complete.";
5347
+ const synthesisPath = join5(finalOutputDir, "raw", "synthesis.json");
5348
+ const synthesis = await readJsonFile(synthesisPath);
5349
+ if (synthesis?.overview) {
5350
+ summary = synthesis.overview;
5351
+ }
5352
+ return { outputDir: finalOutputDir, summary };
5353
+ }
5354
+ async function getTranscript(outputDir, startTime, endTime) {
5355
+ const absDir = resolve2(outputDir);
5356
+ if (!existsSync4(absDir)) {
5357
+ throw new Error("Not a vidistill output directory");
5358
+ }
5359
+ const rawDir = join5(absDir, "raw");
5360
+ if (!existsSync4(rawDir)) {
5361
+ throw new Error("No extracted data found");
5362
+ }
5363
+ const files = await readdir2(rawDir);
5364
+ const pass1Files = files.filter((f) => /^pass1-seg\d+\.json$/.test(f)).sort();
5365
+ if (pass1Files.length === 0) {
5366
+ throw new Error("No extracted data found");
5367
+ }
5368
+ const lines = [];
5369
+ for (const file of pass1Files) {
5370
+ const data = await readJsonFile(join5(rawDir, file));
5371
+ if (data?.transcript_entries == null) continue;
5372
+ for (const entry of data.transcript_entries) {
5373
+ if (startTime != null || endTime != null) {
5374
+ const ts = parseTimestamp(entry.timestamp);
5375
+ if (startTime != null && ts < startTime) continue;
5376
+ if (endTime != null && ts > endTime) continue;
5377
+ }
5378
+ const speaker = entry.speaker ? `${entry.speaker}: ` : "";
5379
+ lines.push(`[${entry.timestamp}] ${speaker}${entry.text}`);
5380
+ }
5381
+ }
5382
+ return lines.join("\n");
5383
+ }
5384
+ async function getCode(outputDir) {
5385
+ const absDir = resolve2(outputDir);
5386
+ if (!existsSync4(absDir)) {
5387
+ throw new Error("Not a vidistill output directory");
5388
+ }
5389
+ const codeDir = join5(absDir, "code");
5390
+ if (!existsSync4(codeDir)) {
5391
+ return [];
5392
+ }
5393
+ const files = await readdir2(codeDir);
5394
+ const results = [];
5395
+ for (const file of files) {
5396
+ const content = await readFile3(join5(codeDir, file), "utf8");
5397
+ results.push({ filename: file, content });
5398
+ }
5399
+ return results;
5400
+ }
5239
5401
  async function run(_args) {
5240
- log8.info("Not implemented yet.");
5402
+ const { McpServer } = await import("@modelcontextprotocol/sdk/server/mcp.js");
5403
+ const { StdioServerTransport } = await import("@modelcontextprotocol/sdk/server/stdio.js");
5404
+ const z = await import("zod");
5405
+ const server = new McpServer({
5406
+ name: "vidistill",
5407
+ version: "1.0.0"
5408
+ });
5409
+ server.registerTool(
5410
+ "analyze_video",
5411
+ {
5412
+ title: "Analyze Video",
5413
+ description: "Run the full vidistill pipeline on a video URL or local file. Returns the output directory and a summary.",
5414
+ inputSchema: z.object({
5415
+ input: z.string().describe("YouTube URL or local file path"),
5416
+ context: z.string().optional().describe('Optional context about the video (e.g. "CS lecture", "product demo")'),
5417
+ lang: z.string().optional().describe("Output language")
5418
+ })
5419
+ },
5420
+ async ({ input, context, lang }) => {
5421
+ try {
5422
+ const result = await analyzeVideo(input, context, lang);
5423
+ return {
5424
+ content: [{ type: "text", text: JSON.stringify(result) }]
5425
+ };
5426
+ } catch (err) {
5427
+ const message = err instanceof Error ? err.message : String(err);
5428
+ return {
5429
+ content: [{ type: "text", text: message }],
5430
+ isError: true
5431
+ };
5432
+ }
5433
+ }
5434
+ );
5435
+ server.registerTool(
5436
+ "get_transcript",
5437
+ {
5438
+ title: "Get Transcript",
5439
+ description: "Read transcript from an existing vidistill output directory. Optionally filter by time range.",
5440
+ inputSchema: z.object({
5441
+ outputDir: z.string().describe("Path to a vidistill output directory"),
5442
+ startTime: z.number().optional().describe("Start time in seconds to filter from"),
5443
+ endTime: z.number().optional().describe("End time in seconds to filter to")
5444
+ })
5445
+ },
5446
+ async ({ outputDir, startTime, endTime }) => {
5447
+ try {
5448
+ const text4 = await getTranscript(outputDir, startTime, endTime);
5449
+ return {
5450
+ content: [{ type: "text", text: text4 }]
5451
+ };
5452
+ } catch (err) {
5453
+ const message = err instanceof Error ? err.message : String(err);
5454
+ return {
5455
+ content: [{ type: "text", text: message }],
5456
+ isError: true
5457
+ };
5458
+ }
5459
+ }
5460
+ );
5461
+ server.registerTool(
5462
+ "get_code",
5463
+ {
5464
+ title: "Get Code",
5465
+ description: "Read code files from an existing vidistill output directory.",
5466
+ inputSchema: z.object({
5467
+ outputDir: z.string().describe("Path to a vidistill output directory")
5468
+ })
5469
+ },
5470
+ async ({ outputDir }) => {
5471
+ try {
5472
+ const files = await getCode(outputDir);
5473
+ return {
5474
+ content: [{ type: "text", text: JSON.stringify(files) }]
5475
+ };
5476
+ } catch (err) {
5477
+ const message = err instanceof Error ? err.message : String(err);
5478
+ return {
5479
+ content: [{ type: "text", text: message }],
5480
+ isError: true
5481
+ };
5482
+ }
5483
+ }
5484
+ );
5485
+ const transport = new StdioServerTransport();
5486
+ await server.connect(transport);
5487
+ process.on("SIGINT", async () => {
5488
+ await server.close();
5489
+ process.exit(0);
5490
+ });
5241
5491
  }
5242
5492
 
5243
5493
  // src/commands/rename-speakers.ts
5244
- import { join as join5 } from "path";
5245
- import { log as log10, text as text3, isCancel as isCancel3, cancel as cancel4 } from "@clack/prompts";
5494
+ import { join as join6 } from "path";
5495
+ import { log as log9, text as text3, isCancel as isCancel3, cancel as cancel4 } from "@clack/prompts";
5246
5496
 
5247
5497
  // src/cli/speaker-naming.ts
5248
- import { log as log9, text as text2, confirm as confirm2, isCancel as isCancel2, cancel as cancel3 } from "@clack/prompts";
5498
+ import { log as log8, text as text2, confirm as confirm2, isCancel as isCancel2, cancel as cancel3 } from "@clack/prompts";
5249
5499
  async function detectAndPromptMerges(mapping) {
5250
5500
  const byName = /* @__PURE__ */ new Map();
5251
5501
  for (const [label, name] of Object.entries(mapping)) {
@@ -5284,7 +5534,7 @@ async function detectAndPromptMerges(mapping) {
5284
5534
  async function collectSpeakersFromRaw(rawDir) {
5285
5535
  const speakerEntries = /* @__PURE__ */ new Map();
5286
5536
  for (let n = 0; n < 1e3; n++) {
5287
- const pass1 = await readJsonFile(join5(rawDir, `pass1-seg${n}.json`));
5537
+ const pass1 = await readJsonFile(join6(rawDir, `pass1-seg${n}.json`));
5288
5538
  if (pass1 == null) break;
5289
5539
  for (const info of pass1.speaker_summary) {
5290
5540
  if (!info.speaker_id) continue;
@@ -5401,22 +5651,22 @@ function formatNameList(names) {
5401
5651
  return names.map((n) => `"${n.replace(/"/g, '\\"')}"`).join(", ");
5402
5652
  }
5403
5653
  async function runList(outputDir) {
5404
- const metadataPath = join5(outputDir, "metadata.json");
5654
+ const metadataPath = join6(outputDir, "metadata.json");
5405
5655
  const metadata = await readJsonFile(metadataPath);
5406
5656
  if (metadata == null) {
5407
- log10.error("Not a vidistill output directory");
5657
+ log9.error("Not a vidistill output directory");
5408
5658
  return;
5409
5659
  }
5410
- const rawDir = join5(outputDir, "raw");
5660
+ const rawDir = join6(outputDir, "raw");
5411
5661
  const speakers = await collectSpeakersFromRaw(rawDir);
5412
5662
  const speakerMapping = metadata.speakerMapping ?? {};
5413
5663
  if (speakers.length === 0 && Object.keys(speakerMapping).length === 0) {
5414
- log10.info("No speakers found.");
5664
+ log9.info("No speakers found.");
5415
5665
  return;
5416
5666
  }
5417
5667
  const groups = groupSpeakersByExistingMapping(speakers, speakerMapping);
5418
5668
  if (groups.length === 0) {
5419
- log10.info("No speakers found.");
5669
+ log9.info("No speakers found.");
5420
5670
  return;
5421
5671
  }
5422
5672
  const lines = groups.map((group, idx) => {
@@ -5425,21 +5675,21 @@ async function runList(outputDir) {
5425
5675
  const labelsStr = group.labels.join(", ");
5426
5676
  return `${String(num)}. ${displayName} (${labelsStr}, ${String(group.totalEntries)} entries)`;
5427
5677
  });
5428
- log10.info(lines.join("\n"));
5678
+ log9.info(lines.join("\n"));
5429
5679
  }
5430
5680
  async function runRename(outputDir, oldName, newName) {
5431
5681
  if (newName.trim().length === 0) {
5432
- log10.error("New name cannot be empty. Use the interactive prompt to clear a mapping.");
5682
+ log9.error("New name cannot be empty. Use the interactive prompt to clear a mapping.");
5433
5683
  return;
5434
5684
  }
5435
- const metadataPath = join5(outputDir, "metadata.json");
5685
+ const metadataPath = join6(outputDir, "metadata.json");
5436
5686
  const metadata = await readJsonFile(metadataPath);
5437
5687
  if (metadata == null) {
5438
- log10.error("Not a vidistill output directory");
5688
+ log9.error("Not a vidistill output directory");
5439
5689
  return;
5440
5690
  }
5441
5691
  const speakerMapping = { ...metadata.speakerMapping ?? {} };
5442
- const rawDir = join5(outputDir, "raw");
5692
+ const rawDir = join6(outputDir, "raw");
5443
5693
  const speakers = await collectSpeakersFromRaw(rawDir);
5444
5694
  const matchingKeys = [];
5445
5695
  const directKey = speakers.find((s) => s.label === oldName);
@@ -5456,18 +5706,18 @@ async function runRename(outputDir, oldName, newName) {
5456
5706
  const currentNames = Object.values(speakerMapping);
5457
5707
  const unmappedLabels = speakers.filter((s) => speakerMapping[s.label] == null).map((s) => s.label);
5458
5708
  const allNames = [.../* @__PURE__ */ new Set([...currentNames, ...unmappedLabels])];
5459
- log10.error(`No speaker named "${oldName}" found. Current speakers: ${formatNameList(allNames)}`);
5709
+ log9.error(`No speaker named "${oldName}" found. Current speakers: ${formatNameList(allNames)}`);
5460
5710
  return;
5461
5711
  }
5462
5712
  if (matchingKeys.length > 1) {
5463
- log10.error(
5713
+ log9.error(
5464
5714
  `Multiple speakers named "${oldName}" (${matchingKeys.join(", ")}). Use SPEAKER_XX label to specify which one.`
5465
5715
  );
5466
5716
  return;
5467
5717
  }
5468
5718
  const key = matchingKeys[0];
5469
5719
  speakerMapping[key] = newName;
5470
- log10.info("Re-rendering output files with updated speaker names...");
5720
+ log9.info("Re-rendering output files with updated speaker names...");
5471
5721
  const result = await reRenderWithSpeakerMapping({
5472
5722
  outputDir,
5473
5723
  speakerMapping,
@@ -5475,20 +5725,20 @@ async function runRename(outputDir, oldName, newName) {
5475
5725
  });
5476
5726
  if (result.errors.length > 0) {
5477
5727
  for (const err of result.errors) {
5478
- log10.error(err);
5728
+ log9.error(err);
5479
5729
  }
5480
5730
  }
5481
- log10.info(`Done. ${String(result.filesGenerated.length)} file${result.filesGenerated.length === 1 ? "" : "s"} updated.`);
5731
+ log9.info(`Done. ${String(result.filesGenerated.length)} file${result.filesGenerated.length === 1 ? "" : "s"} updated.`);
5482
5732
  }
5483
5733
  async function runMerge(outputDir, sourceName, targetName) {
5484
- const metadataPath = join5(outputDir, "metadata.json");
5734
+ const metadataPath = join6(outputDir, "metadata.json");
5485
5735
  const metadata = await readJsonFile(metadataPath);
5486
5736
  if (metadata == null) {
5487
- log10.error("Not a vidistill output directory");
5737
+ log9.error("Not a vidistill output directory");
5488
5738
  return;
5489
5739
  }
5490
5740
  const speakerMapping = { ...metadata.speakerMapping ?? {} };
5491
- const rawDir = join5(outputDir, "raw");
5741
+ const rawDir = join6(outputDir, "raw");
5492
5742
  const speakers = await collectSpeakersFromRaw(rawDir);
5493
5743
  function findKeys(name) {
5494
5744
  const directKey = speakers.find((s) => s.label === name);
@@ -5513,19 +5763,19 @@ async function runMerge(outputDir, sourceName, targetName) {
5513
5763
  const targetKeys = findKeys(targetName);
5514
5764
  if (sourceKeys.length === 0) {
5515
5765
  const currentNames = buildCurrentNames(speakers, speakerMapping);
5516
- log10.error(`No speaker named "${sourceName}" found. Current speakers: ${formatNameList(currentNames)}`);
5766
+ log9.error(`No speaker named "${sourceName}" found. Current speakers: ${formatNameList(currentNames)}`);
5517
5767
  return;
5518
5768
  }
5519
5769
  if (targetKeys.length === 0) {
5520
5770
  const currentNames = buildCurrentNames(speakers, speakerMapping);
5521
- log10.error(`No speaker named "${targetName}" found. Current speakers: ${formatNameList(currentNames)}`);
5771
+ log9.error(`No speaker named "${targetName}" found. Current speakers: ${formatNameList(currentNames)}`);
5522
5772
  return;
5523
5773
  }
5524
5774
  const resolvedTargetName = speakerMapping[targetKeys[0]] ?? targetName;
5525
5775
  for (const key of sourceKeys) {
5526
5776
  speakerMapping[key] = resolvedTargetName;
5527
5777
  }
5528
- log10.info("Re-rendering output files with updated speaker names...");
5778
+ log9.info("Re-rendering output files with updated speaker names...");
5529
5779
  const result = await reRenderWithSpeakerMapping({
5530
5780
  outputDir,
5531
5781
  speakerMapping,
@@ -5533,10 +5783,10 @@ async function runMerge(outputDir, sourceName, targetName) {
5533
5783
  });
5534
5784
  if (result.errors.length > 0) {
5535
5785
  for (const err of result.errors) {
5536
- log10.error(err);
5786
+ log9.error(err);
5537
5787
  }
5538
5788
  }
5539
- log10.info(`Done. ${String(result.filesGenerated.length)} file${result.filesGenerated.length === 1 ? "" : "s"} updated.`);
5789
+ log9.info(`Done. ${String(result.filesGenerated.length)} file${result.filesGenerated.length === 1 ? "" : "s"} updated.`);
5540
5790
  }
5541
5791
  function buildCurrentNames(speakers, speakerMapping) {
5542
5792
  const names = /* @__PURE__ */ new Set();
@@ -5553,11 +5803,11 @@ function buildCurrentNames(speakers, speakerMapping) {
5553
5803
  async function run2(args) {
5554
5804
  const { outputDir, list, rename, merge, error } = parseArgs(args);
5555
5805
  if (error != null) {
5556
- log10.error(error);
5806
+ log9.error(error);
5557
5807
  return;
5558
5808
  }
5559
5809
  if (outputDir == null || outputDir.trim() === "") {
5560
- log10.error('Usage: vidistill rename-speakers <output-dir> [--list] [--rename "old" "new"] [--merge "source" "target"]');
5810
+ log9.error('Usage: vidistill rename-speakers <output-dir> [--list] [--rename "old" "new"] [--merge "source" "target"]');
5561
5811
  return;
5562
5812
  }
5563
5813
  if (list) {
@@ -5572,25 +5822,25 @@ async function run2(args) {
5572
5822
  await runMerge(outputDir, merge[0], merge[1]);
5573
5823
  return;
5574
5824
  }
5575
- const metadataPath = join5(outputDir, "metadata.json");
5825
+ const metadataPath = join6(outputDir, "metadata.json");
5576
5826
  const metadata = await readJsonFile(metadataPath);
5577
5827
  if (metadata == null) {
5578
- log10.error("Not a vidistill output directory");
5828
+ log9.error("Not a vidistill output directory");
5579
5829
  return;
5580
5830
  }
5581
- const rawDir = join5(outputDir, "raw");
5582
- const peopleExtraction = await readJsonFile(join5(rawDir, "pass3b-people.json"));
5831
+ const rawDir = join6(outputDir, "raw");
5832
+ const peopleExtraction = await readJsonFile(join6(rawDir, "pass3b-people.json"));
5583
5833
  if (peopleExtraction == null) {
5584
- log10.info("No speakers detected in this video");
5834
+ log9.info("No speakers detected in this video");
5585
5835
  return;
5586
5836
  }
5587
5837
  const speakers = await collectSpeakersFromRaw(rawDir);
5588
5838
  if (speakers.length === 0) {
5589
- log10.info("No speakers detected in this video");
5839
+ log9.info("No speakers detected in this video");
5590
5840
  return;
5591
5841
  }
5592
5842
  const existingMapping = metadata.speakerMapping ?? {};
5593
- log10.info(
5843
+ log9.info(
5594
5844
  `${String(speakers.length)} speaker${speakers.length === 1 ? "" : "s"} found. Enter names (or press Enter to keep current).`
5595
5845
  );
5596
5846
  const groups = groupSpeakersByExistingMapping(speakers, existingMapping);
@@ -5637,7 +5887,7 @@ async function run2(args) {
5637
5887
  return;
5638
5888
  }
5639
5889
  const { mapping: finalMapping, declinedMerges } = mergeResult;
5640
- log10.info("Re-rendering output files with updated speaker names...");
5890
+ log9.info("Re-rendering output files with updated speaker names...");
5641
5891
  const result = await reRenderWithSpeakerMapping({
5642
5892
  outputDir,
5643
5893
  speakerMapping: finalMapping,
@@ -5645,15 +5895,15 @@ async function run2(args) {
5645
5895
  });
5646
5896
  if (result.errors.length > 0) {
5647
5897
  for (const err of result.errors) {
5648
- log10.error(err);
5898
+ log9.error(err);
5649
5899
  }
5650
5900
  }
5651
- log10.info(`Done. ${String(result.filesGenerated.length)} file${result.filesGenerated.length === 1 ? "" : "s"} updated.`);
5901
+ log9.info(`Done. ${String(result.filesGenerated.length)} file${result.filesGenerated.length === 1 ? "" : "s"} updated.`);
5652
5902
  }
5653
5903
 
5654
5904
  // src/cli/index.ts
5655
- var version = "0.5.3";
5656
- var DEFAULT_OUTPUT = "./vidistill-output/";
5905
+ var version = "0.6.0";
5906
+ var DEFAULT_OUTPUT2 = "./vidistill-output/";
5657
5907
  var SUBCOMMANDS = {
5658
5908
  mcp: run,
5659
5909
  "rename-speakers": run2
@@ -5679,9 +5929,9 @@ Commands: ${Object.keys(SUBCOMMANDS).join(", ")}`
5679
5929
  },
5680
5930
  output: {
5681
5931
  type: "string",
5682
- description: `Output directory for generated notes (default: ${DEFAULT_OUTPUT})`,
5932
+ description: `Output directory for generated notes (default: ${DEFAULT_OUTPUT2})`,
5683
5933
  alias: "o",
5684
- default: DEFAULT_OUTPUT
5934
+ default: DEFAULT_OUTPUT2
5685
5935
  },
5686
5936
  lang: {
5687
5937
  type: "string",
@@ -5705,11 +5955,11 @@ Commands: ${Object.keys(SUBCOMMANDS).join(", ")}`
5705
5955
  lang: args.lang
5706
5956
  });
5707
5957
  } catch (err) {
5708
- const { log: log11 } = await import("@clack/prompts");
5958
+ const { log: log10 } = await import("@clack/prompts");
5709
5959
  const { default: pc4 } = await import("picocolors");
5710
5960
  const raw = err instanceof Error ? err.message : String(err);
5711
5961
  const message = raw.split("\n")[0].slice(0, 200);
5712
- log11.error(pc4.red(message));
5962
+ log10.error(pc4.red(message));
5713
5963
  process.exit(1);
5714
5964
  }
5715
5965
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "vidistill",
3
- "version": "0.5.3",
3
+ "version": "0.6.0",
4
4
  "description": "Video intelligence distiller — extract structured notes, transcripts, and insights from any video using Gemini",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -31,11 +31,13 @@
31
31
  "dependencies": {
32
32
  "@clack/prompts": "1.0.1",
33
33
  "@google/genai": "^1.40.0",
34
+ "@modelcontextprotocol/sdk": "^1.27.1",
34
35
  "citty": "^0.1.6",
35
36
  "figlet": "^1.8.0",
36
37
  "fluent-ffmpeg": "^2.1.3",
37
38
  "picocolors": "^1.1.1",
38
- "ytdlp-nodejs": "^2.2.0"
39
+ "ytdlp-nodejs": "^2.2.0",
40
+ "zod": "^4.3.6"
39
41
  },
40
42
  "devDependencies": {
41
43
  "@types/figlet": "^1.7.0",