@vibeframe/mcp-server 0.53.0 → 0.54.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +324 -42
- package/package.json +3 -3
package/dist/index.js
CHANGED
|
@@ -428532,7 +428532,7 @@ var init_WhisperProvider = __esm({
|
|
|
428532
428532
|
isConfigured() {
|
|
428533
428533
|
return !!this.apiKey;
|
|
428534
428534
|
}
|
|
428535
|
-
async transcribe(audio, language) {
|
|
428535
|
+
async transcribe(audio, language, options) {
|
|
428536
428536
|
if (!this.apiKey) {
|
|
428537
428537
|
return {
|
|
428538
428538
|
id: "",
|
|
@@ -428540,14 +428540,21 @@ var init_WhisperProvider = __esm({
|
|
|
428540
428540
|
error: "Whisper API key not configured"
|
|
428541
428541
|
};
|
|
428542
428542
|
}
|
|
428543
|
+
const granularity = options?.granularity ?? "segment";
|
|
428543
428544
|
try {
|
|
428544
428545
|
const formData = new FormData();
|
|
428545
428546
|
formData.append("file", audio, "audio.webm");
|
|
428546
428547
|
formData.append("model", "whisper-1");
|
|
428547
428548
|
formData.append("response_format", "verbose_json");
|
|
428548
|
-
|
|
428549
|
-
|
|
428550
|
-
|
|
428549
|
+
if (granularity === "segment" || granularity === "both") {
|
|
428550
|
+
formData.append("timestamp_granularities[]", "segment");
|
|
428551
|
+
}
|
|
428552
|
+
if (granularity === "word" || granularity === "both") {
|
|
428553
|
+
formData.append("timestamp_granularities[]", "word");
|
|
428554
|
+
}
|
|
428555
|
+
const lang = language ?? options?.language;
|
|
428556
|
+
if (lang) {
|
|
428557
|
+
formData.append("language", lang);
|
|
428551
428558
|
}
|
|
428552
428559
|
const response = await fetch(`${this.baseUrl}/audio/transcriptions`, {
|
|
428553
428560
|
method: "POST",
|
|
@@ -428565,20 +428572,30 @@ var init_WhisperProvider = __esm({
|
|
|
428565
428572
|
};
|
|
428566
428573
|
}
|
|
428567
428574
|
const data = await response.json();
|
|
428568
|
-
|
|
428575
|
+
const result = {
|
|
428569
428576
|
id: crypto.randomUUID(),
|
|
428570
428577
|
status: "completed",
|
|
428571
428578
|
fullText: data.text,
|
|
428572
|
-
detectedLanguage: data.language
|
|
428573
|
-
|
|
428579
|
+
detectedLanguage: data.language
|
|
428580
|
+
};
|
|
428581
|
+
if (granularity === "segment" || granularity === "both") {
|
|
428582
|
+
result.segments = data.segments?.map((seg, index) => ({
|
|
428574
428583
|
id: `segment-${index}`,
|
|
428575
428584
|
startTime: seg.start,
|
|
428576
428585
|
endTime: seg.end,
|
|
428577
428586
|
text: seg.text.trim(),
|
|
428578
428587
|
confidence: 1
|
|
428579
|
-
// Whisper doesn't provide
|
|
428580
|
-
}))
|
|
428581
|
-
}
|
|
428588
|
+
// Whisper doesn't provide per-segment confidence
|
|
428589
|
+
}));
|
|
428590
|
+
}
|
|
428591
|
+
if (granularity === "word" || granularity === "both") {
|
|
428592
|
+
result.words = data.words?.map((w) => ({
|
|
428593
|
+
text: w.word,
|
|
428594
|
+
start: w.start,
|
|
428595
|
+
end: w.end
|
|
428596
|
+
}));
|
|
428597
|
+
}
|
|
428598
|
+
return result;
|
|
428582
428599
|
} catch (error) {
|
|
428583
428600
|
return {
|
|
428584
428601
|
id: "",
|
|
@@ -432768,6 +432785,101 @@ var init_elevenlabs = __esm({
|
|
|
432768
432785
|
}
|
|
432769
432786
|
});
|
|
432770
432787
|
|
|
432788
|
+
// ../ai-providers/dist/kokoro/KokoroProvider.js
|
|
432789
|
+
async function loadKokoroFactory() {
|
|
432790
|
+
if (factoryOverride)
|
|
432791
|
+
return factoryOverride;
|
|
432792
|
+
const mod = await import("kokoro-js");
|
|
432793
|
+
return mod.KokoroTTS;
|
|
432794
|
+
}
|
|
432795
|
+
function loadModel(progress) {
|
|
432796
|
+
if (modelPromise)
|
|
432797
|
+
return modelPromise;
|
|
432798
|
+
modelPromise = (async () => {
|
|
432799
|
+
const factory = await loadKokoroFactory();
|
|
432800
|
+
return factory.from_pretrained(KOKORO_MODEL_ID, {
|
|
432801
|
+
dtype: "q8",
|
|
432802
|
+
device: "cpu",
|
|
432803
|
+
progress_callback: progress ? (raw2) => progress(normaliseEvent(raw2)) : void 0
|
|
432804
|
+
});
|
|
432805
|
+
})().catch((err) => {
|
|
432806
|
+
modelPromise = null;
|
|
432807
|
+
throw err;
|
|
432808
|
+
});
|
|
432809
|
+
return modelPromise;
|
|
432810
|
+
}
|
|
432811
|
+
function normaliseEvent(raw2) {
|
|
432812
|
+
const r = raw2 ?? {};
|
|
432813
|
+
return {
|
|
432814
|
+
status: typeof r.status === "string" ? r.status : "unknown",
|
|
432815
|
+
file: typeof r.file === "string" ? r.file : void 0,
|
|
432816
|
+
progress: typeof r.progress === "number" ? r.progress : void 0,
|
|
432817
|
+
loaded: typeof r.loaded === "number" ? r.loaded : void 0,
|
|
432818
|
+
total: typeof r.total === "number" ? r.total : void 0
|
|
432819
|
+
};
|
|
432820
|
+
}
|
|
432821
|
+
var KOKORO_DEFAULT_VOICE, KOKORO_MODEL_ID, modelPromise, factoryOverride, KokoroProvider, kokoroProvider;
|
|
432822
|
+
var init_KokoroProvider = __esm({
|
|
432823
|
+
"../ai-providers/dist/kokoro/KokoroProvider.js"() {
|
|
432824
|
+
"use strict";
|
|
432825
|
+
KOKORO_DEFAULT_VOICE = "af_heart";
|
|
432826
|
+
KOKORO_MODEL_ID = "onnx-community/Kokoro-82M-v1.0-ONNX";
|
|
432827
|
+
modelPromise = null;
|
|
432828
|
+
factoryOverride = null;
|
|
432829
|
+
KokoroProvider = class {
|
|
432830
|
+
constructor() {
|
|
432831
|
+
this.id = "kokoro";
|
|
432832
|
+
this.name = "Kokoro (local)";
|
|
432833
|
+
this.description = "Local text-to-speech via Kokoro-82M (Apache 2.0)";
|
|
432834
|
+
this.capabilities = ["text-to-speech"];
|
|
432835
|
+
this.iconUrl = "/icons/kokoro.svg";
|
|
432836
|
+
this.isAvailable = true;
|
|
432837
|
+
}
|
|
432838
|
+
async initialize(_config) {
|
|
432839
|
+
}
|
|
432840
|
+
isConfigured() {
|
|
432841
|
+
return true;
|
|
432842
|
+
}
|
|
432843
|
+
/**
|
|
432844
|
+
* Synthesise speech from text. Returns a WAV buffer matching
|
|
432845
|
+
* `ElevenLabsProvider.textToSpeech`'s `TTSResult` shape.
|
|
432846
|
+
*/
|
|
432847
|
+
async textToSpeech(text, options = {}) {
|
|
432848
|
+
if (!text || !text.trim()) {
|
|
432849
|
+
return { success: false, error: "Empty text" };
|
|
432850
|
+
}
|
|
432851
|
+
try {
|
|
432852
|
+
const model = await loadModel(options.onProgress);
|
|
432853
|
+
const audio = await model.generate(text, {
|
|
432854
|
+
voice: options.voice ?? KOKORO_DEFAULT_VOICE,
|
|
432855
|
+
speed: options.speed ?? 1
|
|
432856
|
+
});
|
|
432857
|
+
const buffer = Buffer.from(audio.toWav());
|
|
432858
|
+
return {
|
|
432859
|
+
success: true,
|
|
432860
|
+
audioBuffer: buffer,
|
|
432861
|
+
characterCount: text.length
|
|
432862
|
+
};
|
|
432863
|
+
} catch (error) {
|
|
432864
|
+
return {
|
|
432865
|
+
success: false,
|
|
432866
|
+
error: error instanceof Error ? error.message : "Unknown error"
|
|
432867
|
+
};
|
|
432868
|
+
}
|
|
432869
|
+
}
|
|
432870
|
+
};
|
|
432871
|
+
kokoroProvider = new KokoroProvider();
|
|
432872
|
+
}
|
|
432873
|
+
});
|
|
432874
|
+
|
|
432875
|
+
// ../ai-providers/dist/kokoro/index.js
|
|
432876
|
+
var init_kokoro = __esm({
|
|
432877
|
+
"../ai-providers/dist/kokoro/index.js"() {
|
|
432878
|
+
"use strict";
|
|
432879
|
+
init_KokoroProvider();
|
|
432880
|
+
}
|
|
432881
|
+
});
|
|
432882
|
+
|
|
432771
432883
|
// ../ai-providers/dist/openai-image/OpenAIImageProvider.js
|
|
432772
432884
|
var DEFAULT_MODEL, OpenAIImageProvider, openaiImageProvider;
|
|
432773
432885
|
var init_OpenAIImageProvider = __esm({
|
|
@@ -434730,7 +434842,10 @@ __export(dist_exports2, {
|
|
|
434730
434842
|
GeminiProvider: () => GeminiProvider,
|
|
434731
434843
|
GrokProvider: () => GrokProvider,
|
|
434732
434844
|
KNOWN_VOICES: () => KNOWN_VOICES,
|
|
434845
|
+
KOKORO_DEFAULT_VOICE: () => KOKORO_DEFAULT_VOICE,
|
|
434846
|
+
KOKORO_MODEL_ID: () => KOKORO_MODEL_ID,
|
|
434733
434847
|
KlingProvider: () => KlingProvider,
|
|
434848
|
+
KokoroProvider: () => KokoroProvider,
|
|
434734
434849
|
OllamaProvider: () => OllamaProvider,
|
|
434735
434850
|
OpenAIImageProvider: () => OpenAIImageProvider,
|
|
434736
434851
|
OpenAIProvider: () => OpenAIProvider,
|
|
@@ -434743,6 +434858,7 @@ __export(dist_exports2, {
|
|
|
434743
434858
|
getBestProviderForCapability: () => getBestProviderForCapability,
|
|
434744
434859
|
grokProvider: () => grokProvider,
|
|
434745
434860
|
klingProvider: () => klingProvider,
|
|
434861
|
+
kokoroProvider: () => kokoroProvider,
|
|
434746
434862
|
ollamaProvider: () => ollamaProvider,
|
|
434747
434863
|
openaiImageProvider: () => openaiImageProvider,
|
|
434748
434864
|
openaiProvider: () => openaiProvider,
|
|
@@ -434763,6 +434879,7 @@ var init_dist2 = __esm({
|
|
|
434763
434879
|
init_claude();
|
|
434764
434880
|
init_ollama();
|
|
434765
434881
|
init_elevenlabs();
|
|
434882
|
+
init_kokoro();
|
|
434766
434883
|
init_openai_image();
|
|
434767
434884
|
init_runway();
|
|
434768
434885
|
init_kling();
|
|
@@ -442748,8 +442865,8 @@ async function extendVideoNaturally(videoPath, targetDuration, outputPath) {
|
|
|
442748
442865
|
const videoDuration = await getVideoDuration(videoPath);
|
|
442749
442866
|
const ratio = targetDuration / videoDuration;
|
|
442750
442867
|
if (ratio <= 1) {
|
|
442751
|
-
const { copyFile:
|
|
442752
|
-
await
|
|
442868
|
+
const { copyFile: copyFile5 } = await import("node:fs/promises");
|
|
442869
|
+
await copyFile5(videoPath, outputPath);
|
|
442753
442870
|
return;
|
|
442754
442871
|
}
|
|
442755
442872
|
if (ratio <= 1.15) {
|
|
@@ -446295,6 +446412,16 @@ function slugifySceneName(name) {
|
|
|
446295
446412
|
const slug = normalised.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "");
|
|
446296
446413
|
return slug || "scene";
|
|
446297
446414
|
}
|
|
446415
|
+
function renderTranscriptSpans(transcript) {
|
|
446416
|
+
return transcript.map((w, i) => `<span class="word" data-i="${i}">${esc(w.text)}</span>`).join(" ");
|
|
446417
|
+
}
|
|
446418
|
+
function buildTranscriptTweens(transcript, targetSelector) {
|
|
446419
|
+
return transcript.map((w, i) => {
|
|
446420
|
+
const start = Math.max(0, Number(w.start.toFixed(3)));
|
|
446421
|
+
const sel = `${targetSelector}[data-i="${i}"]`;
|
|
446422
|
+
return `tl.fromTo('${sel}', { opacity: 0, y: 10 }, { opacity: 1, y: 0, duration: 0.18, ease: 'power2.out' }, ${start});`;
|
|
446423
|
+
}).join("\n ");
|
|
446424
|
+
}
|
|
446298
446425
|
function buildPreset(input3) {
|
|
446299
446426
|
const id = input3.id;
|
|
446300
446427
|
const scope = `[data-composition-id="${id}"]`;
|
|
@@ -446325,7 +446452,15 @@ function buildPreset(input3) {
|
|
|
446325
446452
|
const backdropMarkup = `<div class="backdrop"></div>`;
|
|
446326
446453
|
switch (input3.preset) {
|
|
446327
446454
|
case "simple": {
|
|
446328
|
-
const
|
|
446455
|
+
const transcript = input3.transcript;
|
|
446456
|
+
const useWordSync = !!(transcript && transcript.length > 0);
|
|
446457
|
+
const captionText = subhead || headline;
|
|
446458
|
+
const captionInner = useWordSync ? renderTranscriptSpans(transcript) : esc(captionText);
|
|
446459
|
+
const wordCss = useWordSync ? `
|
|
446460
|
+
${scope} .caption .word { display: inline-block; opacity: 0; }` : "";
|
|
446461
|
+
const timeline = useWordSync ? `${buildTranscriptTweens(transcript, `${scope} .caption .word`)}
|
|
446462
|
+
tl.to('${scope} .caption', { opacity: 0, duration: 0.4, ease: 'power2.in' }, ${(dur - 0.4).toFixed(2)});` : `tl.from('${scope} .caption', { opacity: 0, y: 28, duration: 0.6, ease: 'power2.out' }, 0.1);
|
|
446463
|
+
tl.to('${scope} .caption', { opacity: 0, duration: 0.4, ease: 'power2.in' }, ${(dur - 0.4).toFixed(2)});`;
|
|
446329
446464
|
return {
|
|
446330
446465
|
css: `${scope} {
|
|
446331
446466
|
position: absolute; inset: 0; width: 100%; height: 100%;
|
|
@@ -446341,11 +446476,10 @@ function buildPreset(input3) {
|
|
|
446341
446476
|
font-weight: 700;
|
|
446342
446477
|
line-height: 1.2;
|
|
446343
446478
|
text-shadow: 0 4px 20px rgba(0,0,0,0.65);
|
|
446344
|
-
}`,
|
|
446479
|
+
}${wordCss}`,
|
|
446345
446480
|
body: `${backdropMarkup}
|
|
446346
|
-
<div class="caption" id="caption">${
|
|
446347
|
-
timeline
|
|
446348
|
-
tl.to('${scope} .caption', { opacity: 0, duration: 0.4, ease: 'power2.in' }, ${(dur - 0.4).toFixed(2)});`
|
|
446481
|
+
<div class="caption" id="caption">${captionInner}</div>`,
|
|
446482
|
+
timeline
|
|
446349
446483
|
};
|
|
446350
446484
|
}
|
|
446351
446485
|
case "announcement": {
|
|
@@ -446381,6 +446515,12 @@ function buildPreset(input3) {
|
|
|
446381
446515
|
case "explainer": {
|
|
446382
446516
|
const k = kicker || humanise(id).toUpperCase();
|
|
446383
446517
|
const sub = subhead || "";
|
|
446518
|
+
const transcript = input3.transcript;
|
|
446519
|
+
const useWordSync = !!(transcript && transcript.length > 0 && sub);
|
|
446520
|
+
const subtitleInner = useWordSync ? renderTranscriptSpans(transcript) : esc(sub);
|
|
446521
|
+
const wordCss = useWordSync ? `
|
|
446522
|
+
${scope} #subtitle .word { display: inline-block; opacity: 0; }` : "";
|
|
446523
|
+
const subtitleTween = useWordSync ? buildTranscriptTweens(transcript, `${scope} #subtitle .word`) : sub ? `tl.from('${scope} #subtitle', { opacity: 0, y: 30, duration: 0.55, ease: 'power3.out' }, 0.55);` : "";
|
|
446384
446524
|
return {
|
|
446385
446525
|
css: `${scope} {
|
|
446386
446526
|
position: absolute; inset: 0; width: 100%; height: 100%;
|
|
@@ -446403,23 +446543,28 @@ function buildPreset(input3) {
|
|
|
446403
446543
|
}
|
|
446404
446544
|
${scope} .subtitle {
|
|
446405
446545
|
font-size: 38px; font-weight: 300; color: #c0c0d0; max-width: 80%;
|
|
446406
|
-
}`,
|
|
446546
|
+
}${wordCss}`,
|
|
446407
446547
|
body: `${backdropMarkup}
|
|
446408
446548
|
<div class="stage">
|
|
446409
446549
|
<div class="kicker" id="kicker">${esc(k)}</div>
|
|
446410
446550
|
<h1 class="title" id="title">${esc(headline)}</h1>${sub ? `
|
|
446411
|
-
<div class="subtitle" id="subtitle">${
|
|
446551
|
+
<div class="subtitle" id="subtitle">${subtitleInner}</div>` : ""}
|
|
446412
446552
|
</div>`,
|
|
446413
446553
|
timeline: `tl.from('${scope} #kicker', { opacity: 0, y: 16, duration: 0.4, ease: 'power2.out' }, 0.1);
|
|
446414
446554
|
tl.from('${scope} #title', { opacity: 0, y: 60, duration: 0.7, ease: 'power3.out' }, 0.25);
|
|
446415
|
-
${
|
|
446555
|
+
${subtitleTween}`
|
|
446416
446556
|
};
|
|
446417
446557
|
}
|
|
446418
446558
|
case "kinetic-type": {
|
|
446419
|
-
const
|
|
446420
|
-
const
|
|
446559
|
+
const transcript = input3.transcript;
|
|
446560
|
+
const useWordSync = !!(transcript && transcript.length > 0);
|
|
446561
|
+
const words = useWordSync ? transcript.map((w) => w.text) : headline.split(/\s+/).filter(Boolean);
|
|
446562
|
+
const wordSpans = words.map((w, i) => `<span class="word" data-i="${i}" id="w-${i}">${esc(w)}</span>`).join(" ");
|
|
446421
446563
|
const stagger = Math.max(0.08, Math.min(0.3, (dur - 0.6) / Math.max(words.length, 1)));
|
|
446422
|
-
const tweens =
|
|
446564
|
+
const tweens = useWordSync ? transcript.map((w, i) => {
|
|
446565
|
+
const start = Math.max(0, Number(w.start.toFixed(3)));
|
|
446566
|
+
return `tl.from('${scope} #w-${i}', { opacity: 0, y: 80, scale: 0.8, duration: 0.35, ease: 'back.out(1.8)' }, ${start});`;
|
|
446567
|
+
}).join("\n ") : words.map((_, i) => {
|
|
446423
446568
|
const start = (0.05 + i * stagger).toFixed(2);
|
|
446424
446569
|
return `tl.from('${scope} #w-${i}', { opacity: 0, y: 80, scale: 0.8, duration: 0.45, ease: 'back.out(1.8)' }, ${start});`;
|
|
446425
446570
|
}).join("\n ");
|
|
@@ -450197,12 +450342,12 @@ function resolveProvider(category) {
|
|
|
450197
450342
|
if (!candidates) return null;
|
|
450198
450343
|
if (configDefaults?.[category]) {
|
|
450199
450344
|
const preferred = candidates.find((c) => c.name === configDefaults[category]);
|
|
450200
|
-
if (preferred && hasApiKey(preferred.envVar)) {
|
|
450345
|
+
if (preferred && (preferred.envVar === null || hasApiKey(preferred.envVar))) {
|
|
450201
450346
|
return { name: preferred.name, label: preferred.label };
|
|
450202
450347
|
}
|
|
450203
450348
|
}
|
|
450204
450349
|
for (const candidate of candidates) {
|
|
450205
|
-
if (hasApiKey(candidate.envVar)) {
|
|
450350
|
+
if (candidate.envVar === null || hasApiKey(candidate.envVar)) {
|
|
450206
450351
|
return { name: candidate.name, label: candidate.label };
|
|
450207
450352
|
}
|
|
450208
450353
|
}
|
|
@@ -450225,7 +450370,8 @@ var init_provider_resolver = __esm({
|
|
|
450225
450370
|
{ name: "runway", envVar: "RUNWAY_API_SECRET", label: "Runway" }
|
|
450226
450371
|
];
|
|
450227
450372
|
SPEECH_PROVIDERS = [
|
|
450228
|
-
{ name: "elevenlabs", envVar: "ELEVENLABS_API_KEY", label: "ElevenLabs" }
|
|
450373
|
+
{ name: "elevenlabs", envVar: "ELEVENLABS_API_KEY", label: "ElevenLabs" },
|
|
450374
|
+
{ name: "kokoro", envVar: null, label: "Kokoro (local)" }
|
|
450229
450375
|
];
|
|
450230
450376
|
PROVIDER_MAP = {
|
|
450231
450377
|
image: IMAGE_PROVIDERS,
|
|
@@ -459473,12 +459619,67 @@ init_source();
|
|
|
459473
459619
|
init_ora();
|
|
459474
459620
|
var import_yaml6 = __toESM(require_dist14(), 1);
|
|
459475
459621
|
init_dist2();
|
|
459622
|
+
import { basename as basename17, resolve as resolve38, relative as relative7, dirname as dirname24 } from "node:path";
|
|
459623
|
+
import { mkdir as mkdir19, readFile as readFile22, writeFile as writeFile25, access as access5, copyFile as copyFile4 } from "node:fs/promises";
|
|
459624
|
+
import { existsSync as existsSync37 } from "node:fs";
|
|
459625
|
+
|
|
459626
|
+
// ../cli/src/commands/_shared/tts-resolve.ts
|
|
459627
|
+
init_dist2();
|
|
459628
|
+
init_api_key();
|
|
459629
|
+
init_api_key();
|
|
459630
|
+
async function resolveTtsProvider(preferred = "auto") {
|
|
459631
|
+
const choice = preferred === "auto" ? hasApiKey("ELEVENLABS_API_KEY") ? "elevenlabs" : "kokoro" : preferred;
|
|
459632
|
+
if (choice === "elevenlabs") {
|
|
459633
|
+
return buildElevenLabs();
|
|
459634
|
+
}
|
|
459635
|
+
return buildKokoro();
|
|
459636
|
+
}
|
|
459637
|
+
async function buildElevenLabs() {
|
|
459638
|
+
const key2 = await getApiKey("ELEVENLABS_API_KEY", "ElevenLabs");
|
|
459639
|
+
if (!key2) {
|
|
459640
|
+
throw new TtsKeyMissingError("elevenlabs");
|
|
459641
|
+
}
|
|
459642
|
+
const provider = new ElevenLabsProvider();
|
|
459643
|
+
await provider.initialize({ apiKey: key2 });
|
|
459644
|
+
const call = async (text, opts) => provider.textToSpeech(text, {
|
|
459645
|
+
voiceId: opts?.voice,
|
|
459646
|
+
speed: opts?.speed
|
|
459647
|
+
});
|
|
459648
|
+
return { provider: "elevenlabs", audioExtension: "mp3", call };
|
|
459649
|
+
}
|
|
459650
|
+
async function buildKokoro() {
|
|
459651
|
+
const provider = new KokoroProvider();
|
|
459652
|
+
await provider.initialize({});
|
|
459653
|
+
const call = async (text, opts) => provider.textToSpeech(text, {
|
|
459654
|
+
voice: opts?.voice,
|
|
459655
|
+
speed: opts?.speed,
|
|
459656
|
+
onProgress: opts?.onProgress
|
|
459657
|
+
});
|
|
459658
|
+
return { provider: "kokoro", audioExtension: "wav", call };
|
|
459659
|
+
}
|
|
459660
|
+
var TtsKeyMissingError = class extends Error {
|
|
459661
|
+
constructor(provider) {
|
|
459662
|
+
super(
|
|
459663
|
+
provider === "elevenlabs" ? "ElevenLabs API key required (ELEVENLABS_API_KEY). Run 'vibe setup', set ELEVENLABS_API_KEY in .env, or pass --tts kokoro for local synthesis." : `Provider ${provider} is unavailable.`
|
|
459664
|
+
);
|
|
459665
|
+
this.provider = provider;
|
|
459666
|
+
this.name = "TtsKeyMissingError";
|
|
459667
|
+
}
|
|
459668
|
+
};
|
|
459669
|
+
function parseTtsProviderName(value) {
|
|
459670
|
+
if (!value) return "auto";
|
|
459671
|
+
if (value === "auto" || value === "elevenlabs" || value === "kokoro") {
|
|
459672
|
+
return value;
|
|
459673
|
+
}
|
|
459674
|
+
throw new Error(
|
|
459675
|
+
`Invalid --tts: ${value}. Valid: auto, elevenlabs, kokoro.`
|
|
459676
|
+
);
|
|
459677
|
+
}
|
|
459678
|
+
|
|
459679
|
+
// ../cli/src/commands/scene.ts
|
|
459476
459680
|
init_scene_project();
|
|
459477
459681
|
init_scene_html_emit();
|
|
459478
459682
|
init_scene_lint();
|
|
459479
|
-
import { basename as basename17, resolve as resolve38, relative as relative7, dirname as dirname24 } from "node:path";
|
|
459480
|
-
import { mkdir as mkdir19, readFile as readFile22, writeFile as writeFile25, access as access5 } from "node:fs/promises";
|
|
459481
|
-
import { existsSync as existsSync37 } from "node:fs";
|
|
459482
459683
|
init_output();
|
|
459483
459684
|
init_api_key();
|
|
459484
459685
|
init_audio();
|
|
@@ -459567,9 +459768,15 @@ sceneCommand.command("init").description("Scaffold a new scene project (or safel
|
|
|
459567
459768
|
exitWithError(generalError(`Failed to scaffold: ${msg}`));
|
|
459568
459769
|
}
|
|
459569
459770
|
});
|
|
459570
|
-
sceneCommand.command("add").description("Add a new scene to a project: AI narration + image + per-scene HTML").argument("<name>", "Scene name (slugified into the composition id)").option("--style <preset>", `Style preset: ${SCENE_PRESETS.join(", ")}`, "simple").option("--narration <text>", "Narration text (or path to a .txt file). Drives TTS + scene duration.").option("-d, --duration <sec>", "Explicit scene duration in seconds (overrides narration audio)").option("--visuals <prompt>", "Image prompt \u2014 generates assets/scene-<id>.png via the configured image provider").option("--headline <text>", "Visible headline (defaults to the humanised scene name)").option("--kicker <text>", "Small label above the headline (explainer / product-shot)").option("--insert-into <path>", "Root composition file to update", "index.html").option("--project <dir>", "Project directory", ".").option("--image-provider <name>", "Image provider: gemini, openai", "gemini").option("--
|
|
459771
|
+
sceneCommand.command("add").description("Add a new scene to a project: AI narration + image + per-scene HTML").argument("<name>", "Scene name (slugified into the composition id)").option("--style <preset>", `Style preset: ${SCENE_PRESETS.join(", ")}`, "simple").option("--narration <text>", "Narration text (or path to a .txt file). Drives TTS + scene duration.").option("--narration-file <path>", "Existing narration audio file (.wav/.mp3). Skips TTS \u2014 useful with hyperframes tts, Mac say, or other external tools.").option("-d, --duration <sec>", "Explicit scene duration in seconds (overrides narration audio)").option("--visuals <prompt>", "Image prompt \u2014 generates assets/scene-<id>.png via the configured image provider").option("--headline <text>", "Visible headline (defaults to the humanised scene name)").option("--kicker <text>", "Small label above the headline (explainer / product-shot)").option("--insert-into <path>", "Root composition file to update", "index.html").option("--project <dir>", "Project directory", ".").option("--image-provider <name>", "Image provider: gemini, openai", "gemini").option("--tts <provider>", "TTS provider: auto, elevenlabs, kokoro (default auto \u2014 picks ElevenLabs when key set, else Kokoro local)", "auto").option("--voice <id>", "Voice id (ElevenLabs name/id, or Kokoro id like af_heart, am_michael)").option("--no-audio", "Skip TTS even when --narration is provided (useful for tests/agent dry runs)").option("--no-image", "Skip image generation even when --visuals is provided").option("--no-transcribe", "Skip Whisper word-level transcribe step (no transcript-<id>.json emitted)").option("--transcribe-language <code>", "BCP-47 language code passed to Whisper (e.g. en, ko)").option("--force", "Overwrite an existing compositions/scene-<id>.html").option("--dry-run", "Preview parameters without writing files or calling APIs").action(async (name, options) => {
|
|
459571
459772
|
if (options.style) options.style = validatePreset(options.style);
|
|
459572
459773
|
if (options.duration !== void 0) options.duration = validateDuration(options.duration);
|
|
459774
|
+
let tts;
|
|
459775
|
+
try {
|
|
459776
|
+
tts = parseTtsProviderName(options.tts);
|
|
459777
|
+
} catch (error) {
|
|
459778
|
+
exitWithError(usageError(error instanceof Error ? error.message : String(error)));
|
|
459779
|
+
}
|
|
459573
459780
|
if (options.dryRun) {
|
|
459574
459781
|
const id = slugifySceneName(name);
|
|
459575
459782
|
outputResult({
|
|
@@ -459587,6 +459794,7 @@ sceneCommand.command("add").description("Add a new scene to a project: AI narrat
|
|
|
459587
459794
|
project: options.project,
|
|
459588
459795
|
insertInto: options.insertInto,
|
|
459589
459796
|
imageProvider: options.imageProvider,
|
|
459797
|
+
tts,
|
|
459590
459798
|
audio: options.audio,
|
|
459591
459799
|
// commander sets `audio: false` when --no-audio is passed
|
|
459592
459800
|
image: options.image
|
|
@@ -459600,6 +459808,7 @@ sceneCommand.command("add").description("Add a new scene to a project: AI narrat
|
|
|
459600
459808
|
name,
|
|
459601
459809
|
preset: options.style,
|
|
459602
459810
|
narration: options.narration,
|
|
459811
|
+
narrationFile: options.narrationFile,
|
|
459603
459812
|
duration: options.duration,
|
|
459604
459813
|
visuals: options.visuals,
|
|
459605
459814
|
headline: options.headline,
|
|
@@ -459607,9 +459816,12 @@ sceneCommand.command("add").description("Add a new scene to a project: AI narrat
|
|
|
459607
459816
|
projectDir: options.project,
|
|
459608
459817
|
insertInto: options.insertInto,
|
|
459609
459818
|
imageProvider: options.imageProvider,
|
|
459819
|
+
tts,
|
|
459610
459820
|
voice: options.voice,
|
|
459611
459821
|
skipAudio: options.audio === false,
|
|
459612
459822
|
skipImage: options.image === false,
|
|
459823
|
+
skipTranscribe: options.transcribe === false,
|
|
459824
|
+
transcribeLanguage: options.transcribeLanguage,
|
|
459613
459825
|
force: !!options.force,
|
|
459614
459826
|
onProgress: (msg) => {
|
|
459615
459827
|
if (spinner2) spinner2.text = msg;
|
|
@@ -459633,6 +459845,7 @@ sceneCommand.command("add").description("Add a new scene to a project: AI narrat
|
|
|
459633
459845
|
console.log(source_default.green(" +"), result.scenePath);
|
|
459634
459846
|
if (result.audioPath) console.log(source_default.green(" +"), result.audioPath);
|
|
459635
459847
|
if (result.imagePath) console.log(source_default.green(" +"), result.imagePath);
|
|
459848
|
+
if (result.transcriptPath) console.log(source_default.green(" +"), result.transcriptPath);
|
|
459636
459849
|
console.log(source_default.yellow(" ~"), result.rootPath, source_default.dim("(updated)"));
|
|
459637
459850
|
console.log();
|
|
459638
459851
|
console.log(source_default.bold.cyan("Composition"));
|
|
@@ -459721,19 +459934,49 @@ async function executeSceneAdd(opts) {
|
|
|
459721
459934
|
let audioRelPath;
|
|
459722
459935
|
let audioAbsPath;
|
|
459723
459936
|
let narrationDuration;
|
|
459724
|
-
if (
|
|
459725
|
-
const
|
|
459726
|
-
if (!
|
|
459727
|
-
return errResult(
|
|
459937
|
+
if (opts.narrationFile && !opts.skipAudio) {
|
|
459938
|
+
const sourceAbs = resolve38(opts.narrationFile);
|
|
459939
|
+
if (!await pathExists2(sourceAbs)) {
|
|
459940
|
+
return errResult(`Narration file not found: ${sourceAbs}`);
|
|
459728
459941
|
}
|
|
459729
|
-
|
|
459730
|
-
|
|
459731
|
-
|
|
459732
|
-
|
|
459942
|
+
const ext = (sourceAbs.match(/\.([a-z0-9]+)$/i)?.[1] ?? "wav").toLowerCase();
|
|
459943
|
+
if (ext !== "wav" && ext !== "mp3") {
|
|
459944
|
+
return errResult(`Unsupported narration file extension: .${ext}. Use .wav or .mp3.`);
|
|
459945
|
+
}
|
|
459946
|
+
audioRelPath = `assets/narration-${id}.${ext}`;
|
|
459947
|
+
audioAbsPath = resolve38(projectDir, audioRelPath);
|
|
459948
|
+
await mkdir19(dirname24(audioAbsPath), { recursive: true });
|
|
459949
|
+
await copyFile4(sourceAbs, audioAbsPath);
|
|
459950
|
+
try {
|
|
459951
|
+
narrationDuration = await getAudioDuration(audioAbsPath);
|
|
459952
|
+
} catch {
|
|
459953
|
+
narrationDuration = void 0;
|
|
459954
|
+
}
|
|
459955
|
+
} else if (narrationText && !opts.skipAudio) {
|
|
459956
|
+
let resolution;
|
|
459957
|
+
try {
|
|
459958
|
+
resolution = await resolveTtsProvider(opts.tts ?? "auto");
|
|
459959
|
+
} catch (error) {
|
|
459960
|
+
if (error instanceof TtsKeyMissingError) {
|
|
459961
|
+
return errResult(error.message);
|
|
459962
|
+
}
|
|
459963
|
+
throw error;
|
|
459964
|
+
}
|
|
459965
|
+
opts.onProgress?.(
|
|
459966
|
+
resolution.provider === "kokoro" ? "Generating narration with Kokoro (local \u2014 first run downloads ~330MB)..." : "Generating narration with ElevenLabs..."
|
|
459967
|
+
);
|
|
459968
|
+
const tts = await resolution.call(narrationText, {
|
|
459969
|
+
voice: opts.voice,
|
|
459970
|
+
onProgress: (event) => {
|
|
459971
|
+
if (event.status === "progress" && typeof event.progress === "number") {
|
|
459972
|
+
opts.onProgress?.(`Kokoro model: ${event.file ?? ""} ${Math.round(event.progress)}%`);
|
|
459973
|
+
}
|
|
459974
|
+
}
|
|
459975
|
+
});
|
|
459733
459976
|
if (!tts.success || !tts.audioBuffer) {
|
|
459734
|
-
return errResult(
|
|
459977
|
+
return errResult(`${resolution.provider} TTS failed: ${tts.error ?? "unknown error"}`);
|
|
459735
459978
|
}
|
|
459736
|
-
audioRelPath = `assets/narration-${id}.
|
|
459979
|
+
audioRelPath = `assets/narration-${id}.${resolution.audioExtension}`;
|
|
459737
459980
|
audioAbsPath = resolve38(projectDir, audioRelPath);
|
|
459738
459981
|
await mkdir19(dirname24(audioAbsPath), { recursive: true });
|
|
459739
459982
|
await writeFile25(audioAbsPath, tts.audioBuffer);
|
|
@@ -459743,6 +459986,41 @@ async function executeSceneAdd(opts) {
|
|
|
459743
459986
|
narrationDuration = void 0;
|
|
459744
459987
|
}
|
|
459745
459988
|
}
|
|
459989
|
+
let transcriptRelPath;
|
|
459990
|
+
let transcriptWordCount;
|
|
459991
|
+
let transcriptWords;
|
|
459992
|
+
if (audioAbsPath && !opts.skipTranscribe) {
|
|
459993
|
+
const whisperKey = await getApiKey("OPENAI_API_KEY", "OpenAI");
|
|
459994
|
+
if (!whisperKey) {
|
|
459995
|
+
opts.onProgress?.(
|
|
459996
|
+
"Skipping transcribe (OPENAI_API_KEY not set \u2014 narration plays but word-sync unavailable)"
|
|
459997
|
+
);
|
|
459998
|
+
} else {
|
|
459999
|
+
opts.onProgress?.("Transcribing narration (Whisper word-level)...");
|
|
460000
|
+
try {
|
|
460001
|
+
const whisper = new WhisperProvider();
|
|
460002
|
+
await whisper.initialize({ apiKey: whisperKey });
|
|
460003
|
+
const audioBytes = await readFile22(audioAbsPath);
|
|
460004
|
+
const audioBlob = new Blob([new Uint8Array(audioBytes)]);
|
|
460005
|
+
const transcript = await whisper.transcribe(audioBlob, void 0, {
|
|
460006
|
+
granularity: "word",
|
|
460007
|
+
language: opts.transcribeLanguage
|
|
460008
|
+
});
|
|
460009
|
+
if (transcript.status === "completed" && transcript.words?.length) {
|
|
460010
|
+
transcriptRelPath = `assets/transcript-${id}.json`;
|
|
460011
|
+
const transcriptAbs = resolve38(projectDir, transcriptRelPath);
|
|
460012
|
+
await writeFile25(transcriptAbs, JSON.stringify(transcript.words, null, 2), "utf-8");
|
|
460013
|
+
transcriptWordCount = transcript.words.length;
|
|
460014
|
+
transcriptWords = transcript.words.map((w) => ({ text: w.text, start: w.start, end: w.end }));
|
|
460015
|
+
} else if (transcript.status === "failed") {
|
|
460016
|
+
opts.onProgress?.(`Transcribe failed: ${transcript.error ?? "unknown error"}`);
|
|
460017
|
+
}
|
|
460018
|
+
} catch (error) {
|
|
460019
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
460020
|
+
opts.onProgress?.(`Transcribe failed: ${msg}`);
|
|
460021
|
+
}
|
|
460022
|
+
}
|
|
460023
|
+
}
|
|
459746
460024
|
let imageRelPath;
|
|
459747
460025
|
let imageAbsPath;
|
|
459748
460026
|
if (opts.visuals && !opts.skipImage) {
|
|
@@ -459812,7 +460090,8 @@ async function executeSceneAdd(opts) {
|
|
|
459812
460090
|
subhead: narrationText,
|
|
459813
460091
|
kicker: opts.kicker,
|
|
459814
460092
|
imagePath: imageRelPath,
|
|
459815
|
-
audioPath: audioRelPath
|
|
460093
|
+
audioPath: audioRelPath,
|
|
460094
|
+
transcript: transcriptWords
|
|
459816
460095
|
});
|
|
459817
460096
|
await mkdir19(dirname24(scenePath), { recursive: true });
|
|
459818
460097
|
await writeFile25(scenePath, sceneHtml, "utf-8");
|
|
@@ -459820,6 +460099,7 @@ async function executeSceneAdd(opts) {
|
|
|
459820
460099
|
const start = nextSceneStart(rootHtmlBefore);
|
|
459821
460100
|
const updated = insertClipIntoRoot(rootHtmlBefore, { id, start, duration });
|
|
459822
460101
|
await writeFile25(rootPath, updated, "utf-8");
|
|
460102
|
+
const transcriptAbsPath = transcriptRelPath ? resolve38(projectDir, transcriptRelPath) : void 0;
|
|
459823
460103
|
return {
|
|
459824
460104
|
success: true,
|
|
459825
460105
|
id,
|
|
@@ -459829,7 +460109,9 @@ async function executeSceneAdd(opts) {
|
|
|
459829
460109
|
scenePath: relative7(process.cwd(), scenePath) || scenePath,
|
|
459830
460110
|
rootPath: relative7(process.cwd(), rootPath) || rootPath,
|
|
459831
460111
|
audioPath: audioAbsPath ? relative7(process.cwd(), audioAbsPath) || audioAbsPath : void 0,
|
|
459832
|
-
imagePath: imageAbsPath ? relative7(process.cwd(), imageAbsPath) || imageAbsPath : void 0
|
|
460112
|
+
imagePath: imageAbsPath ? relative7(process.cwd(), imageAbsPath) || imageAbsPath : void 0,
|
|
460113
|
+
transcriptPath: transcriptAbsPath ? relative7(process.cwd(), transcriptAbsPath) || transcriptAbsPath : void 0,
|
|
460114
|
+
transcriptWordCount
|
|
459833
460115
|
};
|
|
459834
460116
|
}
|
|
459835
460117
|
sceneCommand.command("lint").description("Validate scene HTML against Hyperframes rules (in-process, no Chrome required)").argument("[root]", "Root composition file relative to --project", "index.html").option("--project <dir>", "Project directory", ".").option("--fix", 'Apply mechanical auto-fixes (currently: missing class="clip")').action(async (root2, options) => {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vibeframe/mcp-server",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.54.0",
|
|
4
4
|
"description": "VibeFrame MCP Server - AI-native video editing via Model Context Protocol",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -57,8 +57,8 @@
|
|
|
57
57
|
"tsx": "^4.21.0",
|
|
58
58
|
"typescript": "^5.3.3",
|
|
59
59
|
"vitest": "^1.2.2",
|
|
60
|
-
"@vibeframe/cli": "0.
|
|
61
|
-
"@vibeframe/core": "0.
|
|
60
|
+
"@vibeframe/cli": "0.54.0",
|
|
61
|
+
"@vibeframe/core": "0.54.0"
|
|
62
62
|
},
|
|
63
63
|
"engines": {
|
|
64
64
|
"node": ">=20"
|