vargai 0.4.0-alpha77 → 0.4.0-alpha79
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/ai-sdk/providers/editly/index.ts +22 -3
- package/src/ai-sdk/providers/fal.ts +31 -0
- package/src/react/renderers/utils.ts +23 -5
- package/src/react/resolve.ts +6 -0
- package/bun.lock +0 -1458
package/package.json
CHANGED
|
@@ -419,6 +419,8 @@ interface TimedTextLayer {
|
|
|
419
419
|
layer: TextLayer;
|
|
420
420
|
startTime: number;
|
|
421
421
|
duration: number;
|
|
422
|
+
transitionInDuration: number;
|
|
423
|
+
transitionOutDuration: number;
|
|
422
424
|
}
|
|
423
425
|
|
|
424
426
|
function collectTextLayers(clips: ProcessedClip[]): TimedTextLayer[] {
|
|
@@ -429,12 +431,19 @@ function collectTextLayers(clips: ProcessedClip[]): TimedTextLayer[] {
|
|
|
429
431
|
const clip = clips[i];
|
|
430
432
|
if (!clip) continue;
|
|
431
433
|
|
|
434
|
+
const transitionInDuration =
|
|
435
|
+
i > 0 ? (clips[i - 1]?.transition.duration ?? 0) : 0;
|
|
436
|
+
const transitionOutDuration =
|
|
437
|
+
i < clips.length - 1 ? clip.transition.duration : 0;
|
|
438
|
+
|
|
432
439
|
for (const layer of clip.layers) {
|
|
433
440
|
if (layer && isTextOverlayLayer(layer)) {
|
|
434
441
|
textLayers.push({
|
|
435
442
|
layer: layer as TextLayer,
|
|
436
443
|
startTime: currentTime,
|
|
437
444
|
duration: clip.duration,
|
|
445
|
+
transitionInDuration,
|
|
446
|
+
transitionOutDuration,
|
|
438
447
|
});
|
|
439
448
|
}
|
|
440
449
|
}
|
|
@@ -845,13 +854,23 @@ export async function editly(config: EditlyConfig): Promise<EditlyResult> {
|
|
|
845
854
|
const timedLayer = textLayers[i];
|
|
846
855
|
if (!timedLayer) continue;
|
|
847
856
|
|
|
848
|
-
const {
|
|
857
|
+
const {
|
|
858
|
+
layer,
|
|
859
|
+
startTime,
|
|
860
|
+
duration,
|
|
861
|
+
transitionInDuration,
|
|
862
|
+
transitionOutDuration,
|
|
863
|
+
} = timedLayer;
|
|
849
864
|
const outputLabel = `vwithtext${i}`;
|
|
850
865
|
|
|
866
|
+
// Shrink text visibility to avoid overlap during transitions
|
|
867
|
+
const effectiveStart = startTime + transitionInDuration;
|
|
868
|
+
const effectiveStop = startTime + duration - transitionOutDuration;
|
|
869
|
+
|
|
851
870
|
const timedLayerWithEnable = {
|
|
852
871
|
...layer,
|
|
853
|
-
start: layer.start
|
|
854
|
-
stop: layer.stop
|
|
872
|
+
start: layer.start != null ? layer.start + startTime : effectiveStart,
|
|
873
|
+
stop: layer.stop != null ? layer.stop + startTime : effectiveStop,
|
|
855
874
|
};
|
|
856
875
|
|
|
857
876
|
if (layer.type === "title") {
|
|
@@ -145,11 +145,21 @@ const VIDEO_MODELS: Record<string, { t2v: string; i2v: string }> = {
|
|
|
145
145
|
t2v: "xai/grok-imagine-video/text-to-video",
|
|
146
146
|
i2v: "xai/grok-imagine-video/image-to-video",
|
|
147
147
|
},
|
|
148
|
+
// Sora 2 - OpenAI's video model via fal (t2v + i2v, with audio)
|
|
149
|
+
"sora-2": {
|
|
150
|
+
t2v: "fal-ai/sora-2/text-to-video",
|
|
151
|
+
i2v: "fal-ai/sora-2/image-to-video",
|
|
152
|
+
},
|
|
153
|
+
"sora-2-pro": {
|
|
154
|
+
t2v: "fal-ai/sora-2/text-to-video/pro",
|
|
155
|
+
i2v: "fal-ai/sora-2/image-to-video/pro",
|
|
156
|
+
},
|
|
148
157
|
};
|
|
149
158
|
|
|
150
159
|
// Video edit models - video-to-video editing
|
|
151
160
|
const VIDEO_EDIT_MODELS: Record<string, string> = {
|
|
152
161
|
"grok-imagine-edit": "xai/grok-imagine-video/edit-video",
|
|
162
|
+
"sora-2-remix": "fal-ai/sora-2/video-to-video/remix",
|
|
153
163
|
};
|
|
154
164
|
|
|
155
165
|
// Motion control models - video-to-video with motion transfer
|
|
@@ -183,6 +193,9 @@ const IMAGE_MODELS: Record<string, string> = {
|
|
|
183
193
|
"qwen-image-2/edit": "fal-ai/qwen-image-2/edit",
|
|
184
194
|
"qwen-image-2-pro": "fal-ai/qwen-image-2/pro/text-to-image",
|
|
185
195
|
"qwen-image-2-pro/edit": "fal-ai/qwen-image-2/pro/edit",
|
|
196
|
+
// Grok Imagine Image - xAI text-to-image and image editing
|
|
197
|
+
"grok-imagine-image": "xai/grok-imagine-image",
|
|
198
|
+
"grok-imagine-image/edit": "xai/grok-imagine-image/edit",
|
|
186
199
|
// Qwen Image Edit 2511 Multiple Angles - camera angle adjustment
|
|
187
200
|
"qwen-angles": "fal-ai/qwen-image-edit-2511-multiple-angles",
|
|
188
201
|
// Recraft V4 Pro - text-to-image
|
|
@@ -479,6 +492,7 @@ class FalVideoModel implements VideoModelV3 {
|
|
|
479
492
|
const isKlingV26 = this.modelId === "kling-v2.6";
|
|
480
493
|
const isLtx2 = this.modelId === "ltx-2-19b-distilled";
|
|
481
494
|
const isGrokImagine = this.modelId === "grok-imagine";
|
|
495
|
+
const isSora2 = this.modelId === "sora-2" || this.modelId === "sora-2-pro";
|
|
482
496
|
|
|
483
497
|
const fileHashes = await computeFileHashes(files as ImageModelV3File[]);
|
|
484
498
|
|
|
@@ -591,6 +605,23 @@ class FalVideoModel implements VideoModelV3 {
|
|
|
591
605
|
if (!input.resolution) {
|
|
592
606
|
input.resolution = "720p";
|
|
593
607
|
}
|
|
608
|
+
} else if (isSora2) {
|
|
609
|
+
// Sora 2: only supports 4, 8, 12, 16, 20 second durations
|
|
610
|
+
const allowedDurations = [4, 8, 12, 16, 20];
|
|
611
|
+
const d = duration ?? 4;
|
|
612
|
+
if (!allowedDurations.includes(d)) {
|
|
613
|
+
warnings.push({
|
|
614
|
+
type: "other",
|
|
615
|
+
message: `Sora 2 only supports durations: ${allowedDurations.join(", ")}s. Got ${d}s, defaulting to 4s.`,
|
|
616
|
+
});
|
|
617
|
+
input.duration = 4;
|
|
618
|
+
} else {
|
|
619
|
+
input.duration = d;
|
|
620
|
+
}
|
|
621
|
+
// Disable video deletion so generated video URLs remain accessible
|
|
622
|
+
if (input.delete_video === undefined) {
|
|
623
|
+
input.delete_video = false;
|
|
624
|
+
}
|
|
594
625
|
} else {
|
|
595
626
|
input.duration = duration ?? 5;
|
|
596
627
|
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { existsSync, statSync } from "node:fs";
|
|
2
2
|
import { resolve } from "node:path";
|
|
3
|
+
import { ResolvedElement } from "../resolved-element";
|
|
3
4
|
import type { VargElement, VargNode } from "../types";
|
|
4
5
|
|
|
5
6
|
export function resolvePath(path: string): string {
|
|
@@ -84,11 +85,22 @@ function serializeValue(v: unknown): string {
|
|
|
84
85
|
}
|
|
85
86
|
return v;
|
|
86
87
|
}
|
|
88
|
+
// Never put raw binary data in cache keys — use semantic identity instead.
|
|
89
|
+
// Audio segments can be 48-110KB; base64-encoding them would exceed
|
|
90
|
+
// Upstash Redis' 32KB key size limit.
|
|
87
91
|
if (v instanceof Uint8Array) {
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
+
return `uint8:${v.byteLength}`;
|
|
93
|
+
}
|
|
94
|
+
// ResolvedElement (e.g. a speech segment used as Video audio input):
|
|
95
|
+
// serialize by content identity (type + text + duration), not binary data.
|
|
96
|
+
if (v instanceof ResolvedElement) {
|
|
97
|
+
const parts = [v.type];
|
|
98
|
+
for (const child of v.children) {
|
|
99
|
+
if (typeof child === "string") parts.push(child);
|
|
100
|
+
}
|
|
101
|
+
if (v.meta.duration) parts.push(String(v.meta.duration));
|
|
102
|
+
if (v.meta.file?.url) parts.push(v.meta.file.url);
|
|
103
|
+
return `resolved(${parts.join(",")})`;
|
|
92
104
|
}
|
|
93
105
|
if (isVargElement(v)) {
|
|
94
106
|
return `element:${computeCacheKey(v).join(":")}`;
|
|
@@ -97,6 +109,12 @@ function serializeValue(v: unknown): string {
|
|
|
97
109
|
return `[${v.map(serializeValue).join(",")}]`;
|
|
98
110
|
}
|
|
99
111
|
if (v && typeof v === "object") {
|
|
112
|
+
// Skip File-like objects with binary data — use URL if available
|
|
113
|
+
if ("_data" in v && "_mediaType" in v) {
|
|
114
|
+
const url = (v as { _url?: string | null })._url;
|
|
115
|
+
const mediaType = (v as { _mediaType: string })._mediaType;
|
|
116
|
+
return url ? `file(${url})` : `file(${mediaType})`;
|
|
117
|
+
}
|
|
100
118
|
const entries = Object.entries(v)
|
|
101
119
|
.map(([key, val]) => `${key}:${serializeValue(val)}`)
|
|
102
120
|
.join(",");
|
|
@@ -134,7 +152,7 @@ export function computeCacheKey(element: VargElement): CacheKeyPart[] {
|
|
|
134
152
|
} else if (v === null || v === undefined) {
|
|
135
153
|
key.push(k, v);
|
|
136
154
|
} else if (v instanceof Uint8Array) {
|
|
137
|
-
key.push(k, `uint8:${v.byteLength}
|
|
155
|
+
key.push(k, `uint8:${v.byteLength}`);
|
|
138
156
|
} else if (isVargElement(v)) {
|
|
139
157
|
key.push(k, ...computeCacheKey(v));
|
|
140
158
|
} else if (Array.isArray(v) || typeof v === "object") {
|
package/src/react/resolve.ts
CHANGED
|
@@ -187,6 +187,12 @@ async function sliceSegments(
|
|
|
187
187
|
descriptors.map(async (desc) => {
|
|
188
188
|
const bytes = await sliceAudio(fullFile, desc.start, desc.end);
|
|
189
189
|
const segmentFile = File.fromBuffer(bytes, "audio/mpeg");
|
|
190
|
+
// Upload segment to storage so downstream cache keys use the URL
|
|
191
|
+
// instead of serializing raw audio bytes (which can exceed Redis key limits).
|
|
192
|
+
const ctx = getResolveContext();
|
|
193
|
+
if (ctx?.storage) {
|
|
194
|
+
await segmentFile.upload(ctx.storage);
|
|
195
|
+
}
|
|
190
196
|
|
|
191
197
|
// Rebase word timings relative to the segment's sliced audio (t=0)
|
|
192
198
|
const segmentWords = allWords
|