vidistill 0.5.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -0
- package/dist/index.js +306 -56
- package/package.json +4 -2
package/README.md
CHANGED
|
@@ -104,6 +104,35 @@ vidistill rename-speakers ./vidistill-output/my-meeting/ --rename "Steven Kang"
|
|
|
104
104
|
vidistill rename-speakers ./vidistill-output/my-meeting/ --merge "K Iphone" "Kristian"
|
|
105
105
|
```
|
|
106
106
|
|
|
107
|
+
## MCP Server
|
|
108
|
+
|
|
109
|
+
vidistill can run as an MCP server, letting AI coding tools (Claude Code, Cursor, etc.) analyze videos and read output directly.
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
vidistill mcp
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
To configure in Claude Code, add to `~/.claude/claude_code_config.json`:
|
|
116
|
+
|
|
117
|
+
```json
|
|
118
|
+
{
|
|
119
|
+
"mcpServers": {
|
|
120
|
+
"vidistill": {
|
|
121
|
+
"command": "npx",
|
|
122
|
+
"args": ["vidistill", "mcp"]
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
**Tools exposed:**
|
|
129
|
+
|
|
130
|
+
- `analyze_video` — run the full pipeline on a URL or file, returns output dir + summary
|
|
131
|
+
- `get_transcript` — read transcript from an existing output dir, with optional time range filtering
|
|
132
|
+
- `get_code` — read extracted code files from an existing output dir
|
|
133
|
+
|
|
134
|
+
Requires `GEMINI_API_KEY` set as environment variable or in `~/.vidistill/config.json`.
|
|
135
|
+
|
|
107
136
|
## How It Works
|
|
108
137
|
|
|
109
138
|
Supported video formats: MP4, MOV, WebM, MKV, AVI, MPEG, FLV, WMV, 3GPP. Supported audio formats: MP3, AAC, WAV, FLAC, OGG, M4A.
|
package/dist/index.js
CHANGED
|
@@ -648,22 +648,22 @@ function normalizeYouTubeUrl(url) {
|
|
|
648
648
|
return `https://www.youtube.com/watch?v=${id}`;
|
|
649
649
|
}
|
|
650
650
|
function fetchYtDlpDuration(url) {
|
|
651
|
-
return new Promise((
|
|
651
|
+
return new Promise((resolve3) => {
|
|
652
652
|
execFile("yt-dlp", ["--dump-json", "--no-download", url], { timeout: 15e3 }, (err, stdout) => {
|
|
653
653
|
if (err) {
|
|
654
|
-
|
|
654
|
+
resolve3(void 0);
|
|
655
655
|
return;
|
|
656
656
|
}
|
|
657
657
|
try {
|
|
658
658
|
const data = JSON.parse(stdout);
|
|
659
659
|
const dur = data["duration"];
|
|
660
660
|
if (typeof dur === "number" && dur > 0) {
|
|
661
|
-
|
|
661
|
+
resolve3(dur);
|
|
662
662
|
} else {
|
|
663
|
-
|
|
663
|
+
resolve3(void 0);
|
|
664
664
|
}
|
|
665
665
|
} catch {
|
|
666
|
-
|
|
666
|
+
resolve3(void 0);
|
|
667
667
|
}
|
|
668
668
|
});
|
|
669
669
|
});
|
|
@@ -906,7 +906,7 @@ var _require = createRequire(import.meta.url);
|
|
|
906
906
|
var ffmpeg = _require("fluent-ffmpeg");
|
|
907
907
|
var BYTES_PER_SECOND = 5e5;
|
|
908
908
|
function ffprobeAsync(filePath) {
|
|
909
|
-
return new Promise((
|
|
909
|
+
return new Promise((resolve3, reject) => {
|
|
910
910
|
ffmpeg.ffprobe(filePath, (err, data) => {
|
|
911
911
|
if (err) {
|
|
912
912
|
reject(err);
|
|
@@ -917,7 +917,7 @@ function ffprobeAsync(filePath) {
|
|
|
917
917
|
reject(new Error("ffprobe returned no duration"));
|
|
918
918
|
return;
|
|
919
919
|
}
|
|
920
|
-
|
|
920
|
+
resolve3(duration);
|
|
921
921
|
});
|
|
922
922
|
});
|
|
923
923
|
}
|
|
@@ -1597,6 +1597,10 @@ function applySpeakerMapping(label, mapping) {
|
|
|
1597
1597
|
for (const [key, value] of Object.entries(mapping)) {
|
|
1598
1598
|
if (key.toLowerCase() === lower) return value;
|
|
1599
1599
|
}
|
|
1600
|
+
for (const [key, value] of Object.entries(mapping)) {
|
|
1601
|
+
const keyParen = key.match(/\(([^)]+)\)/);
|
|
1602
|
+
if (keyParen && keyParen[1].trim().toLowerCase() === lower) return value;
|
|
1603
|
+
}
|
|
1600
1604
|
return label;
|
|
1601
1605
|
}
|
|
1602
1606
|
function replaceNamesInText(text4, mapping) {
|
|
@@ -2030,6 +2034,36 @@ async function runLinkConsensus(params) {
|
|
|
2030
2034
|
|
|
2031
2035
|
// src/core/transcript-consensus.ts
|
|
2032
2036
|
var ALIGN_WINDOW_S = 3;
|
|
2037
|
+
var DEDUP_WINDOW_S = 10;
|
|
2038
|
+
function isNearDuplicate(a, b) {
|
|
2039
|
+
const delta = Math.abs(parseTimestamp(a.timestamp) - parseTimestamp(b.timestamp));
|
|
2040
|
+
if (delta > DEDUP_WINDOW_S) return false;
|
|
2041
|
+
if (a.text === b.text) return true;
|
|
2042
|
+
const shared = tokenOverlap(a.text, b.text);
|
|
2043
|
+
const maxTokens = Math.max(a.text.split(/\s+/).length, b.text.split(/\s+/).length);
|
|
2044
|
+
return maxTokens > 0 && shared / maxTokens >= 0.8;
|
|
2045
|
+
}
|
|
2046
|
+
function deduplicateEntries(entries) {
|
|
2047
|
+
if (entries.length <= 1) return entries;
|
|
2048
|
+
const result = [entries[0]];
|
|
2049
|
+
for (let i = 1; i < entries.length; i++) {
|
|
2050
|
+
const curr = entries[i];
|
|
2051
|
+
let isDup = false;
|
|
2052
|
+
for (let j = result.length - 1; j >= Math.max(0, result.length - 3); j--) {
|
|
2053
|
+
if (isNearDuplicate(curr, result[j])) {
|
|
2054
|
+
if (curr.text.length > result[j].text.length) {
|
|
2055
|
+
result[j] = curr;
|
|
2056
|
+
}
|
|
2057
|
+
isDup = true;
|
|
2058
|
+
break;
|
|
2059
|
+
}
|
|
2060
|
+
}
|
|
2061
|
+
if (!isDup) {
|
|
2062
|
+
result.push(curr);
|
|
2063
|
+
}
|
|
2064
|
+
}
|
|
2065
|
+
return result;
|
|
2066
|
+
}
|
|
2033
2067
|
function selectBestText(texts) {
|
|
2034
2068
|
if (texts.length === 1) return texts[0];
|
|
2035
2069
|
const referenceText = texts.join(" ");
|
|
@@ -2045,7 +2079,12 @@ function selectBestText(texts) {
|
|
|
2045
2079
|
return bestText;
|
|
2046
2080
|
}
|
|
2047
2081
|
function mergeTranscriptRuns(runs) {
|
|
2048
|
-
if (runs.length === 1)
|
|
2082
|
+
if (runs.length === 1) {
|
|
2083
|
+
return {
|
|
2084
|
+
...runs[0],
|
|
2085
|
+
transcript_entries: deduplicateEntries(runs[0].transcript_entries)
|
|
2086
|
+
};
|
|
2087
|
+
}
|
|
2049
2088
|
const referenceRun = runs.reduce(
|
|
2050
2089
|
(best, run3) => run3.transcript_entries.length > best.transcript_entries.length ? run3 : best
|
|
2051
2090
|
);
|
|
@@ -2092,7 +2131,7 @@ function mergeTranscriptRuns(runs) {
|
|
|
2092
2131
|
return {
|
|
2093
2132
|
segment_index: referenceRun.segment_index,
|
|
2094
2133
|
time_range: referenceRun.time_range,
|
|
2095
|
-
transcript_entries: mergedEntries
|
|
2134
|
+
transcript_entries: deduplicateEntries(mergedEntries)
|
|
2096
2135
|
};
|
|
2097
2136
|
}
|
|
2098
2137
|
function mergeSpeakerSummaries(summaries) {
|
|
@@ -5235,17 +5274,228 @@ async function runDistill(args) {
|
|
|
5235
5274
|
}
|
|
5236
5275
|
|
|
5237
5276
|
// src/commands/mcp.ts
|
|
5238
|
-
import {
|
|
5277
|
+
import { resolve as resolve2, join as join5, basename as basename4, extname as extname3 } from "path";
|
|
5278
|
+
import { existsSync as existsSync4 } from "fs";
|
|
5279
|
+
import { readdir as readdir2, readFile as readFile3 } from "fs/promises";
|
|
5280
|
+
var DEFAULT_OUTPUT = "./vidistill-output/";
|
|
5281
|
+
async function resolveApiKeyNonInteractive() {
|
|
5282
|
+
const envKey = process.env["GEMINI_API_KEY"];
|
|
5283
|
+
if (envKey && envKey.trim().length > 0) return envKey.trim();
|
|
5284
|
+
const config = await loadConfig();
|
|
5285
|
+
if (config?.apiKey && config.apiKey.trim().length > 0) return config.apiKey.trim();
|
|
5286
|
+
throw new Error("GEMINI_API_KEY not set. Set it as an environment variable or in ~/.vidistill/config.json");
|
|
5287
|
+
}
|
|
5288
|
+
async function analyzeVideo(input, context, lang) {
|
|
5289
|
+
const apiKey = await resolveApiKeyNonInteractive();
|
|
5290
|
+
const resolved = resolveInput(input);
|
|
5291
|
+
const client = new GeminiClient(apiKey);
|
|
5292
|
+
let fileUri;
|
|
5293
|
+
let mimeType;
|
|
5294
|
+
let duration;
|
|
5295
|
+
let videoTitle;
|
|
5296
|
+
if (resolved.type === "youtube") {
|
|
5297
|
+
const result = await handleYouTube(resolved.value, client);
|
|
5298
|
+
fileUri = result.fileUri;
|
|
5299
|
+
mimeType = result.mimeType;
|
|
5300
|
+
try {
|
|
5301
|
+
duration = await detectDuration({
|
|
5302
|
+
ytDlpDuration: result.duration,
|
|
5303
|
+
geminiDuration: result.duration
|
|
5304
|
+
});
|
|
5305
|
+
} catch (err) {
|
|
5306
|
+
process.stderr.write(`Duration detection failed, using 600s fallback: ${err instanceof Error ? err.message : String(err)}
|
|
5307
|
+
`);
|
|
5308
|
+
duration = 600;
|
|
5309
|
+
}
|
|
5310
|
+
const videoId = extractVideoId(resolved.value);
|
|
5311
|
+
videoTitle = videoId != null ? `youtube-${videoId}` : resolved.value;
|
|
5312
|
+
} else {
|
|
5313
|
+
const result = await handleLocalFile(resolved.value, client);
|
|
5314
|
+
fileUri = result.fileUri;
|
|
5315
|
+
mimeType = result.mimeType;
|
|
5316
|
+
duration = await detectDuration({
|
|
5317
|
+
filePath: resolved.value,
|
|
5318
|
+
geminiDuration: result.duration
|
|
5319
|
+
});
|
|
5320
|
+
videoTitle = basename4(resolved.value, extname3(resolved.value));
|
|
5321
|
+
}
|
|
5322
|
+
const model = MODELS.flash;
|
|
5323
|
+
const outputDir = resolve2(DEFAULT_OUTPUT);
|
|
5324
|
+
const slug = slugify(videoTitle);
|
|
5325
|
+
const finalOutputDir = `${outputDir}/${slug}`;
|
|
5326
|
+
const rateLimiter = new RateLimiter();
|
|
5327
|
+
const pipelineResult = await runPipeline({
|
|
5328
|
+
client,
|
|
5329
|
+
fileUri,
|
|
5330
|
+
mimeType,
|
|
5331
|
+
duration,
|
|
5332
|
+
model,
|
|
5333
|
+
context,
|
|
5334
|
+
lang,
|
|
5335
|
+
rateLimiter
|
|
5336
|
+
});
|
|
5337
|
+
await generateOutput({
|
|
5338
|
+
pipelineResult,
|
|
5339
|
+
outputDir,
|
|
5340
|
+
videoTitle,
|
|
5341
|
+
source: input,
|
|
5342
|
+
duration,
|
|
5343
|
+
model,
|
|
5344
|
+
processingTimeMs: 0
|
|
5345
|
+
});
|
|
5346
|
+
let summary = "Analysis complete.";
|
|
5347
|
+
const synthesisPath = join5(finalOutputDir, "raw", "synthesis.json");
|
|
5348
|
+
const synthesis = await readJsonFile(synthesisPath);
|
|
5349
|
+
if (synthesis?.overview) {
|
|
5350
|
+
summary = synthesis.overview;
|
|
5351
|
+
}
|
|
5352
|
+
return { outputDir: finalOutputDir, summary };
|
|
5353
|
+
}
|
|
5354
|
+
async function getTranscript(outputDir, startTime, endTime) {
|
|
5355
|
+
const absDir = resolve2(outputDir);
|
|
5356
|
+
if (!existsSync4(absDir)) {
|
|
5357
|
+
throw new Error("Not a vidistill output directory");
|
|
5358
|
+
}
|
|
5359
|
+
const rawDir = join5(absDir, "raw");
|
|
5360
|
+
if (!existsSync4(rawDir)) {
|
|
5361
|
+
throw new Error("No extracted data found");
|
|
5362
|
+
}
|
|
5363
|
+
const files = await readdir2(rawDir);
|
|
5364
|
+
const pass1Files = files.filter((f) => /^pass1-seg\d+\.json$/.test(f)).sort();
|
|
5365
|
+
if (pass1Files.length === 0) {
|
|
5366
|
+
throw new Error("No extracted data found");
|
|
5367
|
+
}
|
|
5368
|
+
const lines = [];
|
|
5369
|
+
for (const file of pass1Files) {
|
|
5370
|
+
const data = await readJsonFile(join5(rawDir, file));
|
|
5371
|
+
if (data?.transcript_entries == null) continue;
|
|
5372
|
+
for (const entry of data.transcript_entries) {
|
|
5373
|
+
if (startTime != null || endTime != null) {
|
|
5374
|
+
const ts = parseTimestamp(entry.timestamp);
|
|
5375
|
+
if (startTime != null && ts < startTime) continue;
|
|
5376
|
+
if (endTime != null && ts > endTime) continue;
|
|
5377
|
+
}
|
|
5378
|
+
const speaker = entry.speaker ? `${entry.speaker}: ` : "";
|
|
5379
|
+
lines.push(`[${entry.timestamp}] ${speaker}${entry.text}`);
|
|
5380
|
+
}
|
|
5381
|
+
}
|
|
5382
|
+
return lines.join("\n");
|
|
5383
|
+
}
|
|
5384
|
+
async function getCode(outputDir) {
|
|
5385
|
+
const absDir = resolve2(outputDir);
|
|
5386
|
+
if (!existsSync4(absDir)) {
|
|
5387
|
+
throw new Error("Not a vidistill output directory");
|
|
5388
|
+
}
|
|
5389
|
+
const codeDir = join5(absDir, "code");
|
|
5390
|
+
if (!existsSync4(codeDir)) {
|
|
5391
|
+
return [];
|
|
5392
|
+
}
|
|
5393
|
+
const files = await readdir2(codeDir);
|
|
5394
|
+
const results = [];
|
|
5395
|
+
for (const file of files) {
|
|
5396
|
+
const content = await readFile3(join5(codeDir, file), "utf8");
|
|
5397
|
+
results.push({ filename: file, content });
|
|
5398
|
+
}
|
|
5399
|
+
return results;
|
|
5400
|
+
}
|
|
5239
5401
|
async function run(_args) {
|
|
5240
|
-
|
|
5402
|
+
const { McpServer } = await import("@modelcontextprotocol/sdk/server/mcp.js");
|
|
5403
|
+
const { StdioServerTransport } = await import("@modelcontextprotocol/sdk/server/stdio.js");
|
|
5404
|
+
const z = await import("zod");
|
|
5405
|
+
const server = new McpServer({
|
|
5406
|
+
name: "vidistill",
|
|
5407
|
+
version: "1.0.0"
|
|
5408
|
+
});
|
|
5409
|
+
server.registerTool(
|
|
5410
|
+
"analyze_video",
|
|
5411
|
+
{
|
|
5412
|
+
title: "Analyze Video",
|
|
5413
|
+
description: "Run the full vidistill pipeline on a video URL or local file. Returns the output directory and a summary.",
|
|
5414
|
+
inputSchema: z.object({
|
|
5415
|
+
input: z.string().describe("YouTube URL or local file path"),
|
|
5416
|
+
context: z.string().optional().describe('Optional context about the video (e.g. "CS lecture", "product demo")'),
|
|
5417
|
+
lang: z.string().optional().describe("Output language")
|
|
5418
|
+
})
|
|
5419
|
+
},
|
|
5420
|
+
async ({ input, context, lang }) => {
|
|
5421
|
+
try {
|
|
5422
|
+
const result = await analyzeVideo(input, context, lang);
|
|
5423
|
+
return {
|
|
5424
|
+
content: [{ type: "text", text: JSON.stringify(result) }]
|
|
5425
|
+
};
|
|
5426
|
+
} catch (err) {
|
|
5427
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
5428
|
+
return {
|
|
5429
|
+
content: [{ type: "text", text: message }],
|
|
5430
|
+
isError: true
|
|
5431
|
+
};
|
|
5432
|
+
}
|
|
5433
|
+
}
|
|
5434
|
+
);
|
|
5435
|
+
server.registerTool(
|
|
5436
|
+
"get_transcript",
|
|
5437
|
+
{
|
|
5438
|
+
title: "Get Transcript",
|
|
5439
|
+
description: "Read transcript from an existing vidistill output directory. Optionally filter by time range.",
|
|
5440
|
+
inputSchema: z.object({
|
|
5441
|
+
outputDir: z.string().describe("Path to a vidistill output directory"),
|
|
5442
|
+
startTime: z.number().optional().describe("Start time in seconds to filter from"),
|
|
5443
|
+
endTime: z.number().optional().describe("End time in seconds to filter to")
|
|
5444
|
+
})
|
|
5445
|
+
},
|
|
5446
|
+
async ({ outputDir, startTime, endTime }) => {
|
|
5447
|
+
try {
|
|
5448
|
+
const text4 = await getTranscript(outputDir, startTime, endTime);
|
|
5449
|
+
return {
|
|
5450
|
+
content: [{ type: "text", text: text4 }]
|
|
5451
|
+
};
|
|
5452
|
+
} catch (err) {
|
|
5453
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
5454
|
+
return {
|
|
5455
|
+
content: [{ type: "text", text: message }],
|
|
5456
|
+
isError: true
|
|
5457
|
+
};
|
|
5458
|
+
}
|
|
5459
|
+
}
|
|
5460
|
+
);
|
|
5461
|
+
server.registerTool(
|
|
5462
|
+
"get_code",
|
|
5463
|
+
{
|
|
5464
|
+
title: "Get Code",
|
|
5465
|
+
description: "Read code files from an existing vidistill output directory.",
|
|
5466
|
+
inputSchema: z.object({
|
|
5467
|
+
outputDir: z.string().describe("Path to a vidistill output directory")
|
|
5468
|
+
})
|
|
5469
|
+
},
|
|
5470
|
+
async ({ outputDir }) => {
|
|
5471
|
+
try {
|
|
5472
|
+
const files = await getCode(outputDir);
|
|
5473
|
+
return {
|
|
5474
|
+
content: [{ type: "text", text: JSON.stringify(files) }]
|
|
5475
|
+
};
|
|
5476
|
+
} catch (err) {
|
|
5477
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
5478
|
+
return {
|
|
5479
|
+
content: [{ type: "text", text: message }],
|
|
5480
|
+
isError: true
|
|
5481
|
+
};
|
|
5482
|
+
}
|
|
5483
|
+
}
|
|
5484
|
+
);
|
|
5485
|
+
const transport = new StdioServerTransport();
|
|
5486
|
+
await server.connect(transport);
|
|
5487
|
+
process.on("SIGINT", async () => {
|
|
5488
|
+
await server.close();
|
|
5489
|
+
process.exit(0);
|
|
5490
|
+
});
|
|
5241
5491
|
}
|
|
5242
5492
|
|
|
5243
5493
|
// src/commands/rename-speakers.ts
|
|
5244
|
-
import { join as
|
|
5245
|
-
import { log as
|
|
5494
|
+
import { join as join6 } from "path";
|
|
5495
|
+
import { log as log9, text as text3, isCancel as isCancel3, cancel as cancel4 } from "@clack/prompts";
|
|
5246
5496
|
|
|
5247
5497
|
// src/cli/speaker-naming.ts
|
|
5248
|
-
import { log as
|
|
5498
|
+
import { log as log8, text as text2, confirm as confirm2, isCancel as isCancel2, cancel as cancel3 } from "@clack/prompts";
|
|
5249
5499
|
async function detectAndPromptMerges(mapping) {
|
|
5250
5500
|
const byName = /* @__PURE__ */ new Map();
|
|
5251
5501
|
for (const [label, name] of Object.entries(mapping)) {
|
|
@@ -5284,7 +5534,7 @@ async function detectAndPromptMerges(mapping) {
|
|
|
5284
5534
|
async function collectSpeakersFromRaw(rawDir) {
|
|
5285
5535
|
const speakerEntries = /* @__PURE__ */ new Map();
|
|
5286
5536
|
for (let n = 0; n < 1e3; n++) {
|
|
5287
|
-
const pass1 = await readJsonFile(
|
|
5537
|
+
const pass1 = await readJsonFile(join6(rawDir, `pass1-seg${n}.json`));
|
|
5288
5538
|
if (pass1 == null) break;
|
|
5289
5539
|
for (const info of pass1.speaker_summary) {
|
|
5290
5540
|
if (!info.speaker_id) continue;
|
|
@@ -5401,22 +5651,22 @@ function formatNameList(names) {
|
|
|
5401
5651
|
return names.map((n) => `"${n.replace(/"/g, '\\"')}"`).join(", ");
|
|
5402
5652
|
}
|
|
5403
5653
|
async function runList(outputDir) {
|
|
5404
|
-
const metadataPath =
|
|
5654
|
+
const metadataPath = join6(outputDir, "metadata.json");
|
|
5405
5655
|
const metadata = await readJsonFile(metadataPath);
|
|
5406
5656
|
if (metadata == null) {
|
|
5407
|
-
|
|
5657
|
+
log9.error("Not a vidistill output directory");
|
|
5408
5658
|
return;
|
|
5409
5659
|
}
|
|
5410
|
-
const rawDir =
|
|
5660
|
+
const rawDir = join6(outputDir, "raw");
|
|
5411
5661
|
const speakers = await collectSpeakersFromRaw(rawDir);
|
|
5412
5662
|
const speakerMapping = metadata.speakerMapping ?? {};
|
|
5413
5663
|
if (speakers.length === 0 && Object.keys(speakerMapping).length === 0) {
|
|
5414
|
-
|
|
5664
|
+
log9.info("No speakers found.");
|
|
5415
5665
|
return;
|
|
5416
5666
|
}
|
|
5417
5667
|
const groups = groupSpeakersByExistingMapping(speakers, speakerMapping);
|
|
5418
5668
|
if (groups.length === 0) {
|
|
5419
|
-
|
|
5669
|
+
log9.info("No speakers found.");
|
|
5420
5670
|
return;
|
|
5421
5671
|
}
|
|
5422
5672
|
const lines = groups.map((group, idx) => {
|
|
@@ -5425,21 +5675,21 @@ async function runList(outputDir) {
|
|
|
5425
5675
|
const labelsStr = group.labels.join(", ");
|
|
5426
5676
|
return `${String(num)}. ${displayName} (${labelsStr}, ${String(group.totalEntries)} entries)`;
|
|
5427
5677
|
});
|
|
5428
|
-
|
|
5678
|
+
log9.info(lines.join("\n"));
|
|
5429
5679
|
}
|
|
5430
5680
|
async function runRename(outputDir, oldName, newName) {
|
|
5431
5681
|
if (newName.trim().length === 0) {
|
|
5432
|
-
|
|
5682
|
+
log9.error("New name cannot be empty. Use the interactive prompt to clear a mapping.");
|
|
5433
5683
|
return;
|
|
5434
5684
|
}
|
|
5435
|
-
const metadataPath =
|
|
5685
|
+
const metadataPath = join6(outputDir, "metadata.json");
|
|
5436
5686
|
const metadata = await readJsonFile(metadataPath);
|
|
5437
5687
|
if (metadata == null) {
|
|
5438
|
-
|
|
5688
|
+
log9.error("Not a vidistill output directory");
|
|
5439
5689
|
return;
|
|
5440
5690
|
}
|
|
5441
5691
|
const speakerMapping = { ...metadata.speakerMapping ?? {} };
|
|
5442
|
-
const rawDir =
|
|
5692
|
+
const rawDir = join6(outputDir, "raw");
|
|
5443
5693
|
const speakers = await collectSpeakersFromRaw(rawDir);
|
|
5444
5694
|
const matchingKeys = [];
|
|
5445
5695
|
const directKey = speakers.find((s) => s.label === oldName);
|
|
@@ -5456,18 +5706,18 @@ async function runRename(outputDir, oldName, newName) {
|
|
|
5456
5706
|
const currentNames = Object.values(speakerMapping);
|
|
5457
5707
|
const unmappedLabels = speakers.filter((s) => speakerMapping[s.label] == null).map((s) => s.label);
|
|
5458
5708
|
const allNames = [.../* @__PURE__ */ new Set([...currentNames, ...unmappedLabels])];
|
|
5459
|
-
|
|
5709
|
+
log9.error(`No speaker named "${oldName}" found. Current speakers: ${formatNameList(allNames)}`);
|
|
5460
5710
|
return;
|
|
5461
5711
|
}
|
|
5462
5712
|
if (matchingKeys.length > 1) {
|
|
5463
|
-
|
|
5713
|
+
log9.error(
|
|
5464
5714
|
`Multiple speakers named "${oldName}" (${matchingKeys.join(", ")}). Use SPEAKER_XX label to specify which one.`
|
|
5465
5715
|
);
|
|
5466
5716
|
return;
|
|
5467
5717
|
}
|
|
5468
5718
|
const key = matchingKeys[0];
|
|
5469
5719
|
speakerMapping[key] = newName;
|
|
5470
|
-
|
|
5720
|
+
log9.info("Re-rendering output files with updated speaker names...");
|
|
5471
5721
|
const result = await reRenderWithSpeakerMapping({
|
|
5472
5722
|
outputDir,
|
|
5473
5723
|
speakerMapping,
|
|
@@ -5475,20 +5725,20 @@ async function runRename(outputDir, oldName, newName) {
|
|
|
5475
5725
|
});
|
|
5476
5726
|
if (result.errors.length > 0) {
|
|
5477
5727
|
for (const err of result.errors) {
|
|
5478
|
-
|
|
5728
|
+
log9.error(err);
|
|
5479
5729
|
}
|
|
5480
5730
|
}
|
|
5481
|
-
|
|
5731
|
+
log9.info(`Done. ${String(result.filesGenerated.length)} file${result.filesGenerated.length === 1 ? "" : "s"} updated.`);
|
|
5482
5732
|
}
|
|
5483
5733
|
async function runMerge(outputDir, sourceName, targetName) {
|
|
5484
|
-
const metadataPath =
|
|
5734
|
+
const metadataPath = join6(outputDir, "metadata.json");
|
|
5485
5735
|
const metadata = await readJsonFile(metadataPath);
|
|
5486
5736
|
if (metadata == null) {
|
|
5487
|
-
|
|
5737
|
+
log9.error("Not a vidistill output directory");
|
|
5488
5738
|
return;
|
|
5489
5739
|
}
|
|
5490
5740
|
const speakerMapping = { ...metadata.speakerMapping ?? {} };
|
|
5491
|
-
const rawDir =
|
|
5741
|
+
const rawDir = join6(outputDir, "raw");
|
|
5492
5742
|
const speakers = await collectSpeakersFromRaw(rawDir);
|
|
5493
5743
|
function findKeys(name) {
|
|
5494
5744
|
const directKey = speakers.find((s) => s.label === name);
|
|
@@ -5513,19 +5763,19 @@ async function runMerge(outputDir, sourceName, targetName) {
|
|
|
5513
5763
|
const targetKeys = findKeys(targetName);
|
|
5514
5764
|
if (sourceKeys.length === 0) {
|
|
5515
5765
|
const currentNames = buildCurrentNames(speakers, speakerMapping);
|
|
5516
|
-
|
|
5766
|
+
log9.error(`No speaker named "${sourceName}" found. Current speakers: ${formatNameList(currentNames)}`);
|
|
5517
5767
|
return;
|
|
5518
5768
|
}
|
|
5519
5769
|
if (targetKeys.length === 0) {
|
|
5520
5770
|
const currentNames = buildCurrentNames(speakers, speakerMapping);
|
|
5521
|
-
|
|
5771
|
+
log9.error(`No speaker named "${targetName}" found. Current speakers: ${formatNameList(currentNames)}`);
|
|
5522
5772
|
return;
|
|
5523
5773
|
}
|
|
5524
5774
|
const resolvedTargetName = speakerMapping[targetKeys[0]] ?? targetName;
|
|
5525
5775
|
for (const key of sourceKeys) {
|
|
5526
5776
|
speakerMapping[key] = resolvedTargetName;
|
|
5527
5777
|
}
|
|
5528
|
-
|
|
5778
|
+
log9.info("Re-rendering output files with updated speaker names...");
|
|
5529
5779
|
const result = await reRenderWithSpeakerMapping({
|
|
5530
5780
|
outputDir,
|
|
5531
5781
|
speakerMapping,
|
|
@@ -5533,10 +5783,10 @@ async function runMerge(outputDir, sourceName, targetName) {
|
|
|
5533
5783
|
});
|
|
5534
5784
|
if (result.errors.length > 0) {
|
|
5535
5785
|
for (const err of result.errors) {
|
|
5536
|
-
|
|
5786
|
+
log9.error(err);
|
|
5537
5787
|
}
|
|
5538
5788
|
}
|
|
5539
|
-
|
|
5789
|
+
log9.info(`Done. ${String(result.filesGenerated.length)} file${result.filesGenerated.length === 1 ? "" : "s"} updated.`);
|
|
5540
5790
|
}
|
|
5541
5791
|
function buildCurrentNames(speakers, speakerMapping) {
|
|
5542
5792
|
const names = /* @__PURE__ */ new Set();
|
|
@@ -5553,11 +5803,11 @@ function buildCurrentNames(speakers, speakerMapping) {
|
|
|
5553
5803
|
async function run2(args) {
|
|
5554
5804
|
const { outputDir, list, rename, merge, error } = parseArgs(args);
|
|
5555
5805
|
if (error != null) {
|
|
5556
|
-
|
|
5806
|
+
log9.error(error);
|
|
5557
5807
|
return;
|
|
5558
5808
|
}
|
|
5559
5809
|
if (outputDir == null || outputDir.trim() === "") {
|
|
5560
|
-
|
|
5810
|
+
log9.error('Usage: vidistill rename-speakers <output-dir> [--list] [--rename "old" "new"] [--merge "source" "target"]');
|
|
5561
5811
|
return;
|
|
5562
5812
|
}
|
|
5563
5813
|
if (list) {
|
|
@@ -5572,25 +5822,25 @@ async function run2(args) {
|
|
|
5572
5822
|
await runMerge(outputDir, merge[0], merge[1]);
|
|
5573
5823
|
return;
|
|
5574
5824
|
}
|
|
5575
|
-
const metadataPath =
|
|
5825
|
+
const metadataPath = join6(outputDir, "metadata.json");
|
|
5576
5826
|
const metadata = await readJsonFile(metadataPath);
|
|
5577
5827
|
if (metadata == null) {
|
|
5578
|
-
|
|
5828
|
+
log9.error("Not a vidistill output directory");
|
|
5579
5829
|
return;
|
|
5580
5830
|
}
|
|
5581
|
-
const rawDir =
|
|
5582
|
-
const peopleExtraction = await readJsonFile(
|
|
5831
|
+
const rawDir = join6(outputDir, "raw");
|
|
5832
|
+
const peopleExtraction = await readJsonFile(join6(rawDir, "pass3b-people.json"));
|
|
5583
5833
|
if (peopleExtraction == null) {
|
|
5584
|
-
|
|
5834
|
+
log9.info("No speakers detected in this video");
|
|
5585
5835
|
return;
|
|
5586
5836
|
}
|
|
5587
5837
|
const speakers = await collectSpeakersFromRaw(rawDir);
|
|
5588
5838
|
if (speakers.length === 0) {
|
|
5589
|
-
|
|
5839
|
+
log9.info("No speakers detected in this video");
|
|
5590
5840
|
return;
|
|
5591
5841
|
}
|
|
5592
5842
|
const existingMapping = metadata.speakerMapping ?? {};
|
|
5593
|
-
|
|
5843
|
+
log9.info(
|
|
5594
5844
|
`${String(speakers.length)} speaker${speakers.length === 1 ? "" : "s"} found. Enter names (or press Enter to keep current).`
|
|
5595
5845
|
);
|
|
5596
5846
|
const groups = groupSpeakersByExistingMapping(speakers, existingMapping);
|
|
@@ -5637,7 +5887,7 @@ async function run2(args) {
|
|
|
5637
5887
|
return;
|
|
5638
5888
|
}
|
|
5639
5889
|
const { mapping: finalMapping, declinedMerges } = mergeResult;
|
|
5640
|
-
|
|
5890
|
+
log9.info("Re-rendering output files with updated speaker names...");
|
|
5641
5891
|
const result = await reRenderWithSpeakerMapping({
|
|
5642
5892
|
outputDir,
|
|
5643
5893
|
speakerMapping: finalMapping,
|
|
@@ -5645,15 +5895,15 @@ async function run2(args) {
|
|
|
5645
5895
|
});
|
|
5646
5896
|
if (result.errors.length > 0) {
|
|
5647
5897
|
for (const err of result.errors) {
|
|
5648
|
-
|
|
5898
|
+
log9.error(err);
|
|
5649
5899
|
}
|
|
5650
5900
|
}
|
|
5651
|
-
|
|
5901
|
+
log9.info(`Done. ${String(result.filesGenerated.length)} file${result.filesGenerated.length === 1 ? "" : "s"} updated.`);
|
|
5652
5902
|
}
|
|
5653
5903
|
|
|
5654
5904
|
// src/cli/index.ts
|
|
5655
|
-
var version = "0.
|
|
5656
|
-
var
|
|
5905
|
+
var version = "0.6.0";
|
|
5906
|
+
var DEFAULT_OUTPUT2 = "./vidistill-output/";
|
|
5657
5907
|
var SUBCOMMANDS = {
|
|
5658
5908
|
mcp: run,
|
|
5659
5909
|
"rename-speakers": run2
|
|
@@ -5679,9 +5929,9 @@ Commands: ${Object.keys(SUBCOMMANDS).join(", ")}`
|
|
|
5679
5929
|
},
|
|
5680
5930
|
output: {
|
|
5681
5931
|
type: "string",
|
|
5682
|
-
description: `Output directory for generated notes (default: ${
|
|
5932
|
+
description: `Output directory for generated notes (default: ${DEFAULT_OUTPUT2})`,
|
|
5683
5933
|
alias: "o",
|
|
5684
|
-
default:
|
|
5934
|
+
default: DEFAULT_OUTPUT2
|
|
5685
5935
|
},
|
|
5686
5936
|
lang: {
|
|
5687
5937
|
type: "string",
|
|
@@ -5705,11 +5955,11 @@ Commands: ${Object.keys(SUBCOMMANDS).join(", ")}`
|
|
|
5705
5955
|
lang: args.lang
|
|
5706
5956
|
});
|
|
5707
5957
|
} catch (err) {
|
|
5708
|
-
const { log:
|
|
5958
|
+
const { log: log10 } = await import("@clack/prompts");
|
|
5709
5959
|
const { default: pc4 } = await import("picocolors");
|
|
5710
5960
|
const raw = err instanceof Error ? err.message : String(err);
|
|
5711
5961
|
const message = raw.split("\n")[0].slice(0, 200);
|
|
5712
|
-
|
|
5962
|
+
log10.error(pc4.red(message));
|
|
5713
5963
|
process.exit(1);
|
|
5714
5964
|
}
|
|
5715
5965
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "vidistill",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "Video intelligence distiller — extract structured notes, transcripts, and insights from any video using Gemini",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -31,11 +31,13 @@
|
|
|
31
31
|
"dependencies": {
|
|
32
32
|
"@clack/prompts": "1.0.1",
|
|
33
33
|
"@google/genai": "^1.40.0",
|
|
34
|
+
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
34
35
|
"citty": "^0.1.6",
|
|
35
36
|
"figlet": "^1.8.0",
|
|
36
37
|
"fluent-ffmpeg": "^2.1.3",
|
|
37
38
|
"picocolors": "^1.1.1",
|
|
38
|
-
"ytdlp-nodejs": "^2.2.0"
|
|
39
|
+
"ytdlp-nodejs": "^2.2.0",
|
|
40
|
+
"zod": "^4.3.6"
|
|
39
41
|
},
|
|
40
42
|
"devDependencies": {
|
|
41
43
|
"@types/figlet": "^1.7.0",
|