vargai 0.4.0-alpha104 → 0.4.0-alpha105
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/ai-sdk/providers/elevenlabs.ts +1 -1
- package/src/ai-sdk/providers/fal.ts +21 -11
- package/src/definitions/models/nano-banana-2.ts +6 -5
- package/src/providers/elevenlabs.ts +2 -1
- package/src/providers/fal.ts +6 -2
- package/src/react/renderers/captions.ts +18 -7
- package/src/react/renderers/image.ts +3 -3
- package/src/react/resolve.ts +5 -3
- package/src/react/types.ts +1 -1
package/package.json
CHANGED
|
@@ -104,7 +104,7 @@
|
|
|
104
104
|
"license": "Apache-2.0",
|
|
105
105
|
"author": "varg.ai <hello@varg.ai> (https://varg.ai)",
|
|
106
106
|
"sideEffects": false,
|
|
107
|
-
"version": "0.4.0-
|
|
107
|
+
"version": "0.4.0-alpha105",
|
|
108
108
|
"exports": {
|
|
109
109
|
".": "./src/index.ts",
|
|
110
110
|
"./ai": "./src/ai-sdk/index.ts",
|
|
@@ -89,7 +89,7 @@ class ElevenLabsMusicModel implements MusicModelV3 {
|
|
|
89
89
|
const elevenLabsOptions = providerOptions?.elevenlabs ?? {};
|
|
90
90
|
const audio = await this.client.music.compose({
|
|
91
91
|
prompt,
|
|
92
|
-
musicLengthMs: duration ? duration * 1000 : undefined,
|
|
92
|
+
musicLengthMs: duration ? Math.round(duration * 1000) : undefined,
|
|
93
93
|
modelId: this.modelId,
|
|
94
94
|
...elevenLabsOptions,
|
|
95
95
|
} as Parameters<typeof this.client.music.compose>[0]);
|
|
@@ -196,7 +196,7 @@ const IMAGE_MODELS: Record<string, string> = {
|
|
|
196
196
|
"recraft-v3": "fal-ai/recraft/v3/text-to-image",
|
|
197
197
|
"nano-banana-pro": "fal-ai/nano-banana-pro",
|
|
198
198
|
"nano-banana-pro/edit": "fal-ai/nano-banana-pro/edit",
|
|
199
|
-
"nano-banana-2": "fal-ai/nano-banana-2
|
|
199
|
+
"nano-banana-2": "fal-ai/nano-banana-2",
|
|
200
200
|
"nano-banana-2/edit": "fal-ai/nano-banana-2/edit",
|
|
201
201
|
"seedream-v4.5/edit": "fal-ai/bytedance/seedream/v4.5/edit",
|
|
202
202
|
// Qwen Image 2 - text-to-image and image-to-image editing (standard + pro)
|
|
@@ -924,13 +924,21 @@ class FalImageModel implements ImageModelV3 {
|
|
|
924
924
|
}
|
|
925
925
|
|
|
926
926
|
const hasFiles = files && files.length > 0;
|
|
927
|
-
const finalEndpoint = this.resolveEndpoint();
|
|
928
927
|
|
|
929
928
|
let stableKey: string | undefined;
|
|
930
929
|
if (hasFiles && files) {
|
|
931
930
|
const fileHashes = await computeFileHashes(files);
|
|
931
|
+
const imageUrls = await pMap(files, fileToUrl, { concurrency: 2 });
|
|
932
|
+
// Reve uses singular image_url instead of image_urls array
|
|
933
|
+
if (SINGULAR_IMAGE_URL_MODELS.has(this.modelId)) {
|
|
934
|
+
input.image_url = imageUrls[0];
|
|
935
|
+
} else {
|
|
936
|
+
input.image_urls = imageUrls;
|
|
937
|
+
}
|
|
938
|
+
// Compute stable key after files are resolved
|
|
939
|
+
const finalEndpointForKey = this.resolveEndpoint(hasFiles);
|
|
932
940
|
stableKey = JSON.stringify({
|
|
933
|
-
endpoint:
|
|
941
|
+
endpoint: finalEndpointForKey,
|
|
934
942
|
prompt,
|
|
935
943
|
n,
|
|
936
944
|
size,
|
|
@@ -940,13 +948,6 @@ class FalImageModel implements ImageModelV3 {
|
|
|
940
948
|
modelId: this.modelId,
|
|
941
949
|
fileHashes,
|
|
942
950
|
});
|
|
943
|
-
const imageUrls = await pMap(files, fileToUrl, { concurrency: 2 });
|
|
944
|
-
// Reve uses singular image_url instead of image_urls array
|
|
945
|
-
if (SINGULAR_IMAGE_URL_MODELS.has(this.modelId)) {
|
|
946
|
-
input.image_url = imageUrls[0];
|
|
947
|
-
} else {
|
|
948
|
-
input.image_urls = imageUrls;
|
|
949
|
-
}
|
|
950
951
|
}
|
|
951
952
|
|
|
952
953
|
if (isQwenAngles && !input.image_urls) {
|
|
@@ -962,6 +963,10 @@ class FalImageModel implements ImageModelV3 {
|
|
|
962
963
|
}
|
|
963
964
|
}
|
|
964
965
|
|
|
966
|
+
// Resolve endpoint after file processing so dual-endpoint models
|
|
967
|
+
// (e.g. nano-banana-2 vs nano-banana-2/edit) route correctly
|
|
968
|
+
const finalEndpoint = this.resolveEndpoint(hasFiles);
|
|
969
|
+
|
|
965
970
|
const result = await executeWithQueueRecovery<{ data: unknown }>(
|
|
966
971
|
finalEndpoint,
|
|
967
972
|
input,
|
|
@@ -998,11 +1003,16 @@ class FalImageModel implements ImageModelV3 {
|
|
|
998
1003
|
};
|
|
999
1004
|
}
|
|
1000
1005
|
|
|
1001
|
-
private resolveEndpoint(): string {
|
|
1006
|
+
private resolveEndpoint(hasFiles?: boolean): string {
|
|
1002
1007
|
if (this.modelId.startsWith("raw:")) {
|
|
1003
1008
|
return this.modelId.slice(4);
|
|
1004
1009
|
}
|
|
1005
1010
|
|
|
1011
|
+
// Nano Banana 2: route to /edit when images are provided, base endpoint for t2i
|
|
1012
|
+
if (this.modelId === "nano-banana-2" && hasFiles) {
|
|
1013
|
+
return "fal-ai/nano-banana-2/edit";
|
|
1014
|
+
}
|
|
1015
|
+
|
|
1006
1016
|
return IMAGE_MODELS[this.modelId] ?? this.modelId;
|
|
1007
1017
|
}
|
|
1008
1018
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Nano Banana 2 image
|
|
3
|
-
*
|
|
2
|
+
* Nano Banana 2 image model (Google's next-gen image generation/editing)
|
|
3
|
+
* Supports both text-to-image (no images) and image editing (with image_urls)
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
import { z } from "zod";
|
|
@@ -35,8 +35,9 @@ const nanoBanana2InputSchema = z.object({
|
|
|
35
35
|
prompt: z.string().describe("Text description for image editing"),
|
|
36
36
|
image_urls: z
|
|
37
37
|
.array(z.string().url())
|
|
38
|
+
.optional()
|
|
38
39
|
.describe(
|
|
39
|
-
"Input image URLs for image
|
|
40
|
+
"Input image URLs for image editing. When provided, routes to the /edit endpoint. Omit for text-to-image generation.",
|
|
40
41
|
),
|
|
41
42
|
resolution: nanoBanana2ResolutionSchema
|
|
42
43
|
.default("1K")
|
|
@@ -103,11 +104,11 @@ export const definition: ModelDefinition<typeof schema> = {
|
|
|
103
104
|
type: "model",
|
|
104
105
|
name: "nano-banana-2",
|
|
105
106
|
description:
|
|
106
|
-
"Google Nano Banana 2 - next-gen image editing model.
|
|
107
|
+
"Google Nano Banana 2 - next-gen image generation and editing model. Supports text-to-image and image editing (with image_urls).",
|
|
107
108
|
providers: ["fal"],
|
|
108
109
|
defaultProvider: "fal",
|
|
109
110
|
providerModels: {
|
|
110
|
-
fal: "fal-ai/nano-banana-2
|
|
111
|
+
fal: "fal-ai/nano-banana-2",
|
|
111
112
|
},
|
|
112
113
|
schema,
|
|
113
114
|
};
|
|
@@ -117,7 +117,8 @@ export class ElevenLabsProvider extends BaseProvider {
|
|
|
117
117
|
|
|
118
118
|
const audio = await this.client.music.compose({
|
|
119
119
|
prompt,
|
|
120
|
-
musicLengthMs
|
|
120
|
+
musicLengthMs:
|
|
121
|
+
musicLengthMs != null ? Math.round(musicLengthMs) : undefined,
|
|
121
122
|
modelId: "music_v1",
|
|
122
123
|
});
|
|
123
124
|
|
package/src/providers/fal.ts
CHANGED
|
@@ -54,9 +54,13 @@ export class FalProvider extends BaseProvider {
|
|
|
54
54
|
return "fal-ai/nano-banana-pro/edit";
|
|
55
55
|
}
|
|
56
56
|
}
|
|
57
|
-
// Nano Banana 2:
|
|
57
|
+
// Nano Banana 2: route to /edit when image_urls are provided, otherwise use base t2i endpoint
|
|
58
58
|
if (model === "fal-ai/nano-banana-2") {
|
|
59
|
-
|
|
59
|
+
const imageUrls = inputs.image_urls as string[] | undefined;
|
|
60
|
+
if (imageUrls && imageUrls.length > 0) {
|
|
61
|
+
return "fal-ai/nano-banana-2/edit";
|
|
62
|
+
}
|
|
63
|
+
return "fal-ai/nano-banana-2";
|
|
60
64
|
}
|
|
61
65
|
// Qwen Image 2: route to /edit endpoint when image_urls are provided
|
|
62
66
|
if (model === "fal-ai/qwen-image-2/text-to-image") {
|
|
@@ -156,12 +156,17 @@ function parseSrt(content: string): SrtEntry[] {
|
|
|
156
156
|
return entries;
|
|
157
157
|
}
|
|
158
158
|
|
|
159
|
+
/**
|
|
160
|
+
* Format seconds to ASS timestamp `H:MM:SS.CC`.
|
|
161
|
+
* Computes from total centiseconds to avoid overflow when rounding
|
|
162
|
+
* lands on 100 cs (e.g. 1.999s would otherwise produce `0:00:01.100`).
|
|
163
|
+
*/
|
|
159
164
|
function formatAssTime(seconds: number): string {
|
|
160
|
-
const
|
|
161
|
-
const
|
|
162
|
-
const
|
|
163
|
-
const
|
|
164
|
-
|
|
165
|
+
const totalCs = Math.max(0, Math.round(seconds * 100));
|
|
166
|
+
const h = Math.floor(totalCs / 360000);
|
|
167
|
+
const m = Math.floor((totalCs % 360000) / 6000);
|
|
168
|
+
const s = Math.floor((totalCs % 6000) / 100);
|
|
169
|
+
const cs = totalCs % 100;
|
|
165
170
|
return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(cs).padStart(2, "0")}`;
|
|
166
171
|
}
|
|
167
172
|
|
|
@@ -190,9 +195,15 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
|
|
190
195
|
|
|
191
196
|
const entries = parseSrt(srtContent);
|
|
192
197
|
const assDialogues = entries
|
|
193
|
-
.map((entry) => {
|
|
198
|
+
.map((entry, i) => {
|
|
194
199
|
const start = formatAssTime(entry.start);
|
|
195
|
-
|
|
200
|
+
// Clamp end to next entry's start to prevent overlapping subtitles
|
|
201
|
+
// (transcription engines often produce overlapping word timestamps)
|
|
202
|
+
const nextStart =
|
|
203
|
+
i < entries.length - 1 ? entries[i + 1]!.start : undefined;
|
|
204
|
+
const clampedEnd =
|
|
205
|
+
nextStart !== undefined ? Math.min(entry.end, nextStart) : entry.end;
|
|
206
|
+
const end = formatAssTime(clampedEnd);
|
|
196
207
|
const text = entry.text.replace(/\n/g, "\\N");
|
|
197
208
|
return `Dialogue: 0,${start},${end},Default,,0,0,0,,${text}`;
|
|
198
209
|
})
|
|
@@ -37,9 +37,9 @@ async function resolvePrompt(
|
|
|
37
37
|
if (typeof prompt === "string") {
|
|
38
38
|
return prompt;
|
|
39
39
|
}
|
|
40
|
-
const resolvedImages =
|
|
41
|
-
prompt.images.map((img) => resolveImageInput(img, ctx))
|
|
42
|
-
|
|
40
|
+
const resolvedImages = prompt.images
|
|
41
|
+
? await Promise.all(prompt.images.map((img) => resolveImageInput(img, ctx)))
|
|
42
|
+
: [];
|
|
43
43
|
return { text: prompt.text, images: resolvedImages };
|
|
44
44
|
}
|
|
45
45
|
|
package/src/react/resolve.ts
CHANGED
|
@@ -577,9 +577,11 @@ async function resolveImagePrompt(
|
|
|
577
577
|
prompt: ImagePrompt,
|
|
578
578
|
): Promise<string | { text?: string; images: Uint8Array[] }> {
|
|
579
579
|
if (typeof prompt === "string") return prompt;
|
|
580
|
-
const resolvedImages =
|
|
581
|
-
|
|
582
|
-
|
|
580
|
+
const resolvedImages = prompt.images
|
|
581
|
+
? await Promise.all(
|
|
582
|
+
prompt.images.map((img) => resolveImageInputForStandalone(img)),
|
|
583
|
+
)
|
|
584
|
+
: [];
|
|
583
585
|
return { text: prompt.text, images: resolvedImages };
|
|
584
586
|
}
|
|
585
587
|
|
package/src/react/types.ts
CHANGED
|
@@ -129,7 +129,7 @@ export interface OverlayProps extends BaseProps, PositionProps, AudioProps {
|
|
|
129
129
|
}
|
|
130
130
|
|
|
131
131
|
export type ImageInput = Uint8Array | string | VargElement<"image">;
|
|
132
|
-
export type ImagePrompt = string | { text?: string; images
|
|
132
|
+
export type ImagePrompt = string | { text?: string; images?: ImageInput[] };
|
|
133
133
|
|
|
134
134
|
export interface ImageProps extends BaseProps, PositionProps {
|
|
135
135
|
prompt?: ImagePrompt;
|