@mevdragon/vidfarm-devcli 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +11 -4
- package/PLATFORM_SPEC.md +142 -2
- package/README.md +165 -16
- package/SKILL.developer.md +577 -0
- package/dist/infra/cdk/bin/vidfarm-prod.js +59 -0
- package/dist/infra/cdk/lib/vidfarm-prod-stack.js +212 -0
- package/dist/src/account-pages.js +578 -0
- package/dist/src/app.js +887 -66
- package/dist/src/cli.js +284 -5
- package/dist/src/config.js +24 -4
- package/dist/src/db.js +427 -18
- package/dist/src/dev-app.js +59 -12
- package/dist/src/homepage.js +441 -0
- package/dist/src/index.js +12 -7
- package/dist/src/lib/crypto.js +14 -0
- package/dist/src/lib/template-dna.js +542 -0
- package/dist/src/lib/template-style-options.js +49 -0
- package/dist/src/registry.js +54 -7
- package/dist/src/runtime.js +3 -1
- package/dist/src/services/auth.js +69 -5
- package/dist/src/services/jobs.js +23 -4
- package/dist/src/services/providers.js +74 -12
- package/dist/src/services/storage.js +52 -18
- package/dist/src/services/template-certification.js +160 -0
- package/dist/src/services/template-loader.js +37 -0
- package/dist/src/services/template-sources.js +135 -0
- package/dist/src/worker.js +19 -7
- package/dist/templates/template_0000/src/lib/images.js +242 -0
- package/dist/templates/template_0000/src/remotion/Root.js +33 -0
- package/dist/templates/template_0000/src/sdk.js +3 -0
- package/dist/templates/template_0000/src/style-options.js +51 -0
- package/dist/templates/template_0000/src/template-dna.js +9 -0
- package/dist/templates/template_0000/src/template.js +1217 -0
- package/package.json +9 -1
- package/templates/template_0000/README.md +121 -0
- package/templates/template_0000/SKILL.md +193 -0
- package/templates/template_0000/assets/Abel-Regular.ttf +0 -0
- package/templates/template_0000/assets/DMSerifDisplay-Regular.ttf +0 -0
- package/templates/template_0000/assets/Montserrat[wght].ttf +0 -0
- package/templates/template_0000/assets/SourceCodePro[wght].ttf +0 -0
- package/templates/template_0000/assets/TikTokSans-SemiBold.ttf +0 -0
- package/templates/template_0000/assets/Yesteryear-Regular.ttf +0 -0
- package/templates/template_0000/composition.json +11 -0
- package/templates/template_0000/package-lock.json +5137 -0
- package/templates/template_0000/package.json +30 -0
- package/templates/template_0000/research/preview/.gitkeep +1 -0
- package/templates/template_0000/research/source_notes.md +7 -0
- package/templates/template_0000/scripts/create-site.mjs +27 -0
- package/templates/template_0000/scripts/render-cloud.mjs +72 -0
- package/templates/template_0000/src/lib/images.ts +284 -0
- package/templates/template_0000/src/remotion/Root.js +33 -0
- package/templates/template_0000/src/remotion/Root.tsx +75 -0
- package/templates/template_0000/src/remotion/index.tsx +4 -0
- package/templates/template_0000/src/sdk.ts +122 -0
- package/templates/template_0000/src/style-options.js +51 -0
- package/templates/template_0000/src/style-options.ts +60 -0
- package/templates/template_0000/src/template-dna.ts +15 -0
- package/templates/template_0000/src/template.ts +1747 -0
- package/templates/template_0000/template.config.json +26 -0
- package/templates/template_0000/tsconfig.json +19 -0
- package/dist/templates/template_0000/demo-template.js +0 -196
- package/dist/templates/template_0000/remotion/Root.js +0 -66
- /package/dist/templates/template_0000/{remotion → src/remotion}/index.js +0 -0
|
@@ -0,0 +1,1747 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { fileURLToPath } from "node:url";
|
|
5
|
+
import sharp from "sharp";
|
|
6
|
+
import { normalizeToPortraitFrame } from "./lib/images.js";
|
|
7
|
+
import {
|
|
8
|
+
defineTemplate,
|
|
9
|
+
type ProviderType,
|
|
10
|
+
type TemplateJobContext,
|
|
11
|
+
} from "./sdk.js";
|
|
12
|
+
import {
|
|
13
|
+
TEMPLATE_FONT_IDS,
|
|
14
|
+
TEMPLATE_FONT_OPTIONS,
|
|
15
|
+
TEMPLATE_TEXT_BACKGROUND_COLOR_IDS,
|
|
16
|
+
TEMPLATE_TEXT_BACKGROUND_COLOR_OPTIONS,
|
|
17
|
+
type TemplateFontId,
|
|
18
|
+
type TemplateTextBackgroundColorId,
|
|
19
|
+
} from "./style-options.js";
|
|
20
|
+
import { templateLinkToOriginal, templateVisualDna, templateViralDna } from "./template-dna.js";
|
|
21
|
+
|
|
22
|
+
const TEMPLATE_ID = "4c7a7e1a-7f35-4f30-9f86-9c8a63c7f2db";
|
|
23
|
+
const TEMPLATE_SLUG_ID = "template_0000";
|
|
24
|
+
const COMPOSITION_ID = "template-0000";
|
|
25
|
+
const TEMPLATE_PREVIEW_MEDIA = [
|
|
26
|
+
"https://vidfarmprodstack-vidfarmbucket335ee12f-0vsvtd5earqy.s3.us-east-1.amazonaws.com/templates/template-0000/about/preview-01.jpg",
|
|
27
|
+
] as const;
|
|
28
|
+
const FRAME = { width: 1080, height: 1920 } as const;
|
|
29
|
+
const TIKTOK_SAFE_AREA = {
|
|
30
|
+
left: 78,
|
|
31
|
+
right: 248,
|
|
32
|
+
top: 196,
|
|
33
|
+
bottom: 408,
|
|
34
|
+
} as const;
|
|
35
|
+
const NATIVE_UI_TEXT_ZONE = {
|
|
36
|
+
minCenterXPercent: 0.38,
|
|
37
|
+
maxCenterXPercent: 0.62,
|
|
38
|
+
minCenterYPercent: 0.2,
|
|
39
|
+
maxCenterYPercent: 0.58,
|
|
40
|
+
} as const;
|
|
41
|
+
const TEXT_BACKGROUND_NONE = "none" as const;
|
|
42
|
+
|
|
43
|
+
const legacySlideInputSchema = z.union([
|
|
44
|
+
z.tuple([z.string().min(3), z.string().min(1)]),
|
|
45
|
+
z.tuple([
|
|
46
|
+
z.string().min(3),
|
|
47
|
+
z.string().min(1),
|
|
48
|
+
z.number().int().min(500).max(30000),
|
|
49
|
+
]),
|
|
50
|
+
]);
|
|
51
|
+
const structuredSlideInputSchema = z.object({
|
|
52
|
+
image_prompt: z.string().min(3),
|
|
53
|
+
image_prompt_attachments: z.array(z.string().url()).max(12).default([]),
|
|
54
|
+
caption: z.string().min(1),
|
|
55
|
+
duration_ms: z.number().int().min(500).max(30000).default(4000),
|
|
56
|
+
});
|
|
57
|
+
const slideInputSchema = z.union([
|
|
58
|
+
legacySlideInputSchema,
|
|
59
|
+
structuredSlideInputSchema,
|
|
60
|
+
]);
|
|
61
|
+
const createSlideshowInputSchema = z.object({
|
|
62
|
+
slides: z.array(slideInputSchema).min(1).max(20),
|
|
63
|
+
meta_details_prompt: z.string().min(8).max(4000).optional(),
|
|
64
|
+
});
|
|
65
|
+
const renderSlideSchema = z.object({
|
|
66
|
+
imageUrl: z.string().url(),
|
|
67
|
+
durationMs: z.number().int().min(500).max(30000),
|
|
68
|
+
});
|
|
69
|
+
const renderVideoInputSchema = z.object({
|
|
70
|
+
slides: z.array(renderSlideSchema).min(1).max(20),
|
|
71
|
+
});
|
|
72
|
+
const supportedImageProviders = ["openai", "gemini", "openrouter"] as const;
|
|
73
|
+
type ImageProvider = (typeof supportedImageProviders)[number];
|
|
74
|
+
|
|
75
|
+
type Layout = {
|
|
76
|
+
zone: "top" | "center" | "bottom";
|
|
77
|
+
align: "left" | "center" | "right";
|
|
78
|
+
maxWidthPercent: number;
|
|
79
|
+
anchorXPercent?: number;
|
|
80
|
+
anchorYPercent?: number;
|
|
81
|
+
justification: string;
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
type SlideOutput = {
|
|
85
|
+
index: number;
|
|
86
|
+
imagePrompt: string;
|
|
87
|
+
imagePromptAttachments: string[];
|
|
88
|
+
overlayText: string;
|
|
89
|
+
durationMs: number;
|
|
90
|
+
backgroundImageUrl: string | null;
|
|
91
|
+
frameImageUrl: string | null;
|
|
92
|
+
prompt: string;
|
|
93
|
+
revisedPrompt: string | null;
|
|
94
|
+
layout: Layout;
|
|
95
|
+
};
|
|
96
|
+
|
|
97
|
+
type MetaDetails = {
|
|
98
|
+
title: string;
|
|
99
|
+
description: string;
|
|
100
|
+
pinned_comment: string;
|
|
101
|
+
location: string;
|
|
102
|
+
song: string;
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
type TextStyleSpec = {
|
|
106
|
+
fontId: TemplateFontId;
|
|
107
|
+
fontFamily: string;
|
|
108
|
+
backgroundColorId:
|
|
109
|
+
| TemplateTextBackgroundColorId
|
|
110
|
+
| typeof TEXT_BACKGROUND_NONE;
|
|
111
|
+
backgroundColorHex: string | null;
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
type SlideshowManifest = {
|
|
115
|
+
templateId: string;
|
|
116
|
+
size: { width: number; height: number; aspectRatio: "9:16" };
|
|
117
|
+
textStyle: {
|
|
118
|
+
fontId: TemplateFontId;
|
|
119
|
+
fontFamily: string;
|
|
120
|
+
fill: string;
|
|
121
|
+
backgroundColorId:
|
|
122
|
+
| TemplateTextBackgroundColorId
|
|
123
|
+
| typeof TEXT_BACKGROUND_NONE;
|
|
124
|
+
backgroundColorHex: string | null;
|
|
125
|
+
shadow: string;
|
|
126
|
+
availableFonts: typeof TEMPLATE_FONT_OPTIONS;
|
|
127
|
+
availableBackgroundColors: typeof TEMPLATE_TEXT_BACKGROUND_COLOR_OPTIONS;
|
|
128
|
+
};
|
|
129
|
+
safeArea: typeof TIKTOK_SAFE_AREA;
|
|
130
|
+
metaDetails: MetaDetails;
|
|
131
|
+
slides: SlideOutput[];
|
|
132
|
+
};
|
|
133
|
+
|
|
134
|
+
const remotionEntryPoint = resolveRemotionEntryPoint();
|
|
135
|
+
const templateRuntimeConfig = resolveTemplateRuntimeConfig();
|
|
136
|
+
const remotionRuntimeConfig = {
|
|
137
|
+
serveUrl: templateRuntimeConfig.remotion?.serve_url ?? undefined,
|
|
138
|
+
};
|
|
139
|
+
const skillPath = resolveSkillPath();
|
|
140
|
+
const overlayFonts = resolveOverlayFonts();
|
|
141
|
+
|
|
142
|
+
export const template0000Definition = defineTemplate({
|
|
143
|
+
id: TEMPLATE_ID,
|
|
144
|
+
slugId: TEMPLATE_SLUG_ID,
|
|
145
|
+
version: "1.0.0",
|
|
146
|
+
about: {
|
|
147
|
+
title: "Template 0000",
|
|
148
|
+
description:
|
|
149
|
+
"Starter TikTok-safe slideshow template with staged image generation, text compositing, and optional Remotion video rendering.",
|
|
150
|
+
viral_dna: templateViralDna,
|
|
151
|
+
visual_dna: templateVisualDna,
|
|
152
|
+
preview_media: [...TEMPLATE_PREVIEW_MEDIA],
|
|
153
|
+
link_to_original: templateLinkToOriginal,
|
|
154
|
+
},
|
|
155
|
+
skillPath,
|
|
156
|
+
configSchema: z.object({
|
|
157
|
+
defaultProvider: z.enum(supportedImageProviders).default("gemini"),
|
|
158
|
+
textModel: z.string().optional(),
|
|
159
|
+
imageModel: z.string().optional(),
|
|
160
|
+
captionFont: z.enum(TEMPLATE_FONT_IDS).default("montserrat"),
|
|
161
|
+
captionBackgroundColor: z
|
|
162
|
+
.enum([TEXT_BACKGROUND_NONE, ...TEMPLATE_TEXT_BACKGROUND_COLOR_IDS])
|
|
163
|
+
.default(TEXT_BACKGROUND_NONE),
|
|
164
|
+
renderCompositionId: z.string().default(COMPOSITION_ID),
|
|
165
|
+
}),
|
|
166
|
+
operations: {
|
|
167
|
+
create_slideshow: {
|
|
168
|
+
description:
|
|
169
|
+
"Generate TikTok-safe 9:16 background images, composite the exact overlay text into finished slide frames, and save a manifest.",
|
|
170
|
+
inputSchema: createSlideshowInputSchema,
|
|
171
|
+
workflow: "createSlideshowWorkflow",
|
|
172
|
+
webhookSupport: true,
|
|
173
|
+
smokeTestPayload: {
|
|
174
|
+
slides: [
|
|
175
|
+
["a founder at a desk", "Launch faster", 2400],
|
|
176
|
+
{
|
|
177
|
+
image_prompt: "close-up product photo",
|
|
178
|
+
image_prompt_attachments: [
|
|
179
|
+
"https://images.unsplash.com/photo-1520607162513-77705c0f0d4a?auto=format&fit=crop&w=1080&q=80",
|
|
180
|
+
],
|
|
181
|
+
caption: "Built in-house",
|
|
182
|
+
duration_ms: 3200,
|
|
183
|
+
},
|
|
184
|
+
],
|
|
185
|
+
meta_details_prompt:
|
|
186
|
+
"Target US TikTok skincare and startup audiences. Make the hook curiosity-driven, natural, and UGC-native.",
|
|
187
|
+
},
|
|
188
|
+
},
|
|
189
|
+
render_video: {
|
|
190
|
+
description:
|
|
191
|
+
"Turn finished slideshow frames into an auto-advancing vertical video using Remotion.",
|
|
192
|
+
inputSchema: renderVideoInputSchema,
|
|
193
|
+
workflow: "renderVideoWorkflow",
|
|
194
|
+
webhookSupport: true,
|
|
195
|
+
smokeTestPayload: {
|
|
196
|
+
slides: [
|
|
197
|
+
{
|
|
198
|
+
imageUrl:
|
|
199
|
+
"https://images.unsplash.com/photo-1519389950473-47ba0277781c?auto=format&fit=crop&w=1080&q=80",
|
|
200
|
+
durationMs: 2400,
|
|
201
|
+
},
|
|
202
|
+
{
|
|
203
|
+
imageUrl:
|
|
204
|
+
"https://images.unsplash.com/photo-1520607162513-77705c0f0d4a?auto=format&fit=crop&w=1080&q=80",
|
|
205
|
+
durationMs: 3200,
|
|
206
|
+
},
|
|
207
|
+
],
|
|
208
|
+
},
|
|
209
|
+
},
|
|
210
|
+
},
|
|
211
|
+
jobs: {
|
|
212
|
+
async createSlideshowWorkflow(ctx, input) {
|
|
213
|
+
const payload = createSlideshowInputSchema.parse(input);
|
|
214
|
+
ctx.logger.progress(0.04, "Starting template_0000 slideshow image stage");
|
|
215
|
+
const provider = parseImageProvider(ctx.templateConfig.defaultProvider);
|
|
216
|
+
const textModel = String(
|
|
217
|
+
ctx.templateConfig.textModel ?? defaultTextModelForProvider(provider),
|
|
218
|
+
);
|
|
219
|
+
const imageModel = String(
|
|
220
|
+
ctx.templateConfig.imageModel ?? defaultImageModelForProvider(provider),
|
|
221
|
+
);
|
|
222
|
+
const textStyle = resolveTextStyleSpec(ctx.templateConfig);
|
|
223
|
+
|
|
224
|
+
const slides: SlideOutput[] = [];
|
|
225
|
+
for (const [index, rawSlide] of payload.slides.entries()) {
|
|
226
|
+
const { imagePrompt, imagePromptAttachments, overlayText, durationMs } =
|
|
227
|
+
normalizeSlideInput(rawSlide);
|
|
228
|
+
ctx.logger.progress(
|
|
229
|
+
0.08 + (index / payload.slides.length) * 0.42,
|
|
230
|
+
`Generating slide ${index + 1} image`,
|
|
231
|
+
);
|
|
232
|
+
const image = await generatePortraitCandidateImage(ctx, {
|
|
233
|
+
provider,
|
|
234
|
+
imageModel,
|
|
235
|
+
imagePrompt,
|
|
236
|
+
imagePromptAttachments,
|
|
237
|
+
overlayText,
|
|
238
|
+
});
|
|
239
|
+
await ctx.billing.record({
|
|
240
|
+
type: "ai_generation",
|
|
241
|
+
costUsd: 0.04,
|
|
242
|
+
metadata: {
|
|
243
|
+
stage: "image_generation",
|
|
244
|
+
slideIndex: index,
|
|
245
|
+
model: imageModel,
|
|
246
|
+
},
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
ctx.logger.progress(
|
|
250
|
+
0.13 + (index / payload.slides.length) * 0.3,
|
|
251
|
+
`Normalizing slide ${index + 1} to strict 9:16 portrait`,
|
|
252
|
+
);
|
|
253
|
+
const normalizedImage = await normalizeToPortraitFrame(
|
|
254
|
+
image.bytes,
|
|
255
|
+
FRAME,
|
|
256
|
+
);
|
|
257
|
+
|
|
258
|
+
const backgroundArtifact = await ctx.storage.putBuffer(
|
|
259
|
+
`backgrounds/slide-${pad2(index + 1)}.png`,
|
|
260
|
+
normalizedImage.bytes,
|
|
261
|
+
{
|
|
262
|
+
contentType: normalizedImage.contentType,
|
|
263
|
+
kind: "image",
|
|
264
|
+
metadata: {
|
|
265
|
+
slideIndex: index,
|
|
266
|
+
prompt: image.prompt,
|
|
267
|
+
revisedPrompt: image.revisedPrompt,
|
|
268
|
+
width: normalizedImage.width,
|
|
269
|
+
height: normalizedImage.height,
|
|
270
|
+
aspectRatio: "9:16",
|
|
271
|
+
},
|
|
272
|
+
},
|
|
273
|
+
);
|
|
274
|
+
|
|
275
|
+
ctx.logger.progress(
|
|
276
|
+
0.18 + (index / payload.slides.length) * 0.28,
|
|
277
|
+
`Scoring TikTok-safe text layout for slide ${index + 1}`,
|
|
278
|
+
);
|
|
279
|
+
const layout = await chooseSlideLayout(ctx, {
|
|
280
|
+
provider,
|
|
281
|
+
model: textModel,
|
|
282
|
+
imageBytes: normalizedImage.bytes,
|
|
283
|
+
imageUrl: backgroundArtifact.url,
|
|
284
|
+
overlayText,
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
ctx.logger.progress(
|
|
288
|
+
0.24 + (index / payload.slides.length) * 0.28,
|
|
289
|
+
`Compositing caption into slide ${index + 1}`,
|
|
290
|
+
);
|
|
291
|
+
const frameBytes = await renderFinishedSlide(
|
|
292
|
+
normalizedImage.bytes,
|
|
293
|
+
overlayText,
|
|
294
|
+
layout,
|
|
295
|
+
textStyle,
|
|
296
|
+
);
|
|
297
|
+
const frameArtifact = await ctx.storage.putBuffer(
|
|
298
|
+
`slides/slide-${pad2(index + 1)}.png`,
|
|
299
|
+
frameBytes,
|
|
300
|
+
{
|
|
301
|
+
contentType: "image/png",
|
|
302
|
+
kind: "image",
|
|
303
|
+
metadata: {
|
|
304
|
+
slideIndex: index,
|
|
305
|
+
overlayText,
|
|
306
|
+
layout,
|
|
307
|
+
aspectRatio: "9:16",
|
|
308
|
+
kind: "finished_slide_frame",
|
|
309
|
+
},
|
|
310
|
+
},
|
|
311
|
+
);
|
|
312
|
+
|
|
313
|
+
slides.push({
|
|
314
|
+
index,
|
|
315
|
+
imagePrompt,
|
|
316
|
+
imagePromptAttachments,
|
|
317
|
+
overlayText,
|
|
318
|
+
durationMs,
|
|
319
|
+
backgroundImageUrl: backgroundArtifact.url,
|
|
320
|
+
frameImageUrl: frameArtifact.url,
|
|
321
|
+
prompt: image.prompt,
|
|
322
|
+
revisedPrompt: image.revisedPrompt,
|
|
323
|
+
layout,
|
|
324
|
+
});
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
ctx.logger.progress(0.72, "Generating post metadata");
|
|
328
|
+
const metaDetails = await generateMetaDetails(ctx, {
|
|
329
|
+
provider,
|
|
330
|
+
textModel,
|
|
331
|
+
slides,
|
|
332
|
+
metaDetailsPrompt: payload.meta_details_prompt,
|
|
333
|
+
});
|
|
334
|
+
await ctx.billing.record({
|
|
335
|
+
type: "ai_generation",
|
|
336
|
+
costUsd: 0.01,
|
|
337
|
+
metadata: { stage: "meta_details_generation", model: textModel },
|
|
338
|
+
});
|
|
339
|
+
|
|
340
|
+
ctx.logger.progress(0.78, "Saving slideshow manifest");
|
|
341
|
+
const manifest: SlideshowManifest = {
|
|
342
|
+
templateId: TEMPLATE_ID,
|
|
343
|
+
size: { width: FRAME.width, height: FRAME.height, aspectRatio: "9:16" },
|
|
344
|
+
textStyle: {
|
|
345
|
+
fontId: textStyle.fontId,
|
|
346
|
+
fontFamily: textStyle.fontFamily,
|
|
347
|
+
fill: "#ffffff",
|
|
348
|
+
backgroundColorId: textStyle.backgroundColorId,
|
|
349
|
+
backgroundColorHex: textStyle.backgroundColorHex,
|
|
350
|
+
shadow: "soft native TikTok-style black drop shadow",
|
|
351
|
+
availableFonts: TEMPLATE_FONT_OPTIONS,
|
|
352
|
+
availableBackgroundColors: TEMPLATE_TEXT_BACKGROUND_COLOR_OPTIONS,
|
|
353
|
+
},
|
|
354
|
+
safeArea: TIKTOK_SAFE_AREA,
|
|
355
|
+
metaDetails,
|
|
356
|
+
slides,
|
|
357
|
+
};
|
|
358
|
+
const manifestArtifact = await ctx.storage.putJson(
|
|
359
|
+
`manifests/${TEMPLATE_ID}.json`,
|
|
360
|
+
manifest,
|
|
361
|
+
);
|
|
362
|
+
const files = slides
|
|
363
|
+
.flatMap((slide) => [slide.backgroundImageUrl, slide.frameImageUrl])
|
|
364
|
+
.filter((value): value is string => Boolean(value));
|
|
365
|
+
if (manifestArtifact.url) {
|
|
366
|
+
files.push(manifestArtifact.url);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
ctx.logger.progress(1, "Slideshow images complete", {
|
|
370
|
+
fileCount: files.length,
|
|
371
|
+
});
|
|
372
|
+
|
|
373
|
+
return {
|
|
374
|
+
progress: 1,
|
|
375
|
+
output: {
|
|
376
|
+
files,
|
|
377
|
+
manifest: manifestArtifact,
|
|
378
|
+
...metaDetails,
|
|
379
|
+
metaDetails,
|
|
380
|
+
metaDetailsInput: {
|
|
381
|
+
meta_details_prompt: payload.meta_details_prompt ?? null,
|
|
382
|
+
slides: slides.map((slide) => ({
|
|
383
|
+
index: slide.index,
|
|
384
|
+
imagePrompt: slide.imagePrompt,
|
|
385
|
+
imagePromptAttachments: slide.imagePromptAttachments,
|
|
386
|
+
overlayText: slide.overlayText,
|
|
387
|
+
durationMs: slide.durationMs,
|
|
388
|
+
})),
|
|
389
|
+
},
|
|
390
|
+
renderVideoInput: {
|
|
391
|
+
slides: slides
|
|
392
|
+
.map((slide) =>
|
|
393
|
+
slide.frameImageUrl
|
|
394
|
+
? {
|
|
395
|
+
imageUrl: slide.frameImageUrl,
|
|
396
|
+
durationMs: slide.durationMs,
|
|
397
|
+
}
|
|
398
|
+
: null,
|
|
399
|
+
)
|
|
400
|
+
.filter(
|
|
401
|
+
(slide): slide is { imageUrl: string; durationMs: number } =>
|
|
402
|
+
Boolean(slide),
|
|
403
|
+
),
|
|
404
|
+
},
|
|
405
|
+
slides,
|
|
406
|
+
},
|
|
407
|
+
};
|
|
408
|
+
},
|
|
409
|
+
async renderVideoWorkflow(ctx, input) {
|
|
410
|
+
const payload = renderVideoInputSchema.parse(input);
|
|
411
|
+
const compositionId = String(
|
|
412
|
+
ctx.templateConfig.renderCompositionId ?? COMPOSITION_ID,
|
|
413
|
+
);
|
|
414
|
+
ctx.logger.progress(0.08, "Preparing slideshow frames for video render");
|
|
415
|
+
const slides = await resolveSlidesForRender(payload);
|
|
416
|
+
|
|
417
|
+
ctx.logger.progress(0.56, "Submitting Remotion render");
|
|
418
|
+
const render = await ctx.remotion.render({
|
|
419
|
+
compositionId,
|
|
420
|
+
serveUrl: remotionRuntimeConfig.serveUrl,
|
|
421
|
+
entryPoint: remotionEntryPoint,
|
|
422
|
+
outputKey: "renders/final.mp4",
|
|
423
|
+
inputProps: {
|
|
424
|
+
slides,
|
|
425
|
+
fps: 30,
|
|
426
|
+
},
|
|
427
|
+
});
|
|
428
|
+
await ctx.billing.record({
|
|
429
|
+
type: "render",
|
|
430
|
+
costUsd: 0.35,
|
|
431
|
+
metadata: render.metadata,
|
|
432
|
+
});
|
|
433
|
+
|
|
434
|
+
const files = slides.map((slide) => slide.imageUrl);
|
|
435
|
+
if (render.outputUrl) {
|
|
436
|
+
files.push(render.outputUrl);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
ctx.logger.progress(1, "Slideshow video complete", {
|
|
440
|
+
fileCount: files.length,
|
|
441
|
+
renderId: render.renderId,
|
|
442
|
+
});
|
|
443
|
+
|
|
444
|
+
return {
|
|
445
|
+
progress: 1,
|
|
446
|
+
output: {
|
|
447
|
+
files,
|
|
448
|
+
render,
|
|
449
|
+
slides,
|
|
450
|
+
},
|
|
451
|
+
};
|
|
452
|
+
},
|
|
453
|
+
},
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
const IMAGE_PROMPT_SYSTEM_WRAPPER = [
|
|
457
|
+
"Create an exact 9:16 portrait slideshow image for a TikTok-style vertical video.",
|
|
458
|
+
"The composition must be designed for strict full-frame mobile portrait output at 1080x1920.",
|
|
459
|
+
"Hold the camera vertically. The generated image itself must be tall portrait, not a landscape image placed inside a vertical canvas.",
|
|
460
|
+
"Return only the background scene or subject image. This is not a phone screenshot, social app mockup, or UI composition.",
|
|
461
|
+
"Do not produce square, landscape, or loose portrait framing.",
|
|
462
|
+
"Do not letterbox, pillarbox, add borders, or leave empty margins.",
|
|
463
|
+
"Crop tighter if needed so the frame reads as a native vertical shot. Avoid fake poster framing, inset postcards, or giant blank padding.",
|
|
464
|
+
"Do not sacrifice the vertical composition just to keep the entire subject visible.",
|
|
465
|
+
"Leave less important negative space near the top edge, right edge, and lower portion of the frame so later video overlays can sit there without covering the subject.",
|
|
466
|
+
"Keep the right edge visually quieter and avoid placing important details there.",
|
|
467
|
+
"Keep the top edge visually quieter and avoid placing important details there.",
|
|
468
|
+
"Keep the lower portion visually quieter and avoid placing important details there.",
|
|
469
|
+
"Reserve one clean editorial text lane in the upper-left or center-left portion of the frame for the overlay.",
|
|
470
|
+
"That reserved lane is empty space only. Do not render the overlay or any placeholder text.",
|
|
471
|
+
"Do not render any phone UI, social media UI, app chrome, search bars, tabs, buttons, likes, comments, avatars, battery icons, status bars, frames, stickers, or interface elements.",
|
|
472
|
+
"Do not render any words, letters, captions, subtitles, titles, logos, signage, labels, watermarks, or typography inside the image.",
|
|
473
|
+
"The final image must contain zero visible text unless part of the scene itself.",
|
|
474
|
+
"Use cinematic lighting, crisp detail, and framing that already fits an exact 9:16 portrait frame, unless otherwise specified.",
|
|
475
|
+
"Avoid giant blank ceiling, wall, floor, or sky bands that make the frame feel padded or zoomed out.",
|
|
476
|
+
].join("\n");
|
|
477
|
+
|
|
478
|
+
function buildImagePrompt(
|
|
479
|
+
imagePrompt: string,
|
|
480
|
+
overlayText: string,
|
|
481
|
+
imagePromptAttachments: string[] = [],
|
|
482
|
+
) {
|
|
483
|
+
return [
|
|
484
|
+
IMAGE_PROMPT_SYSTEM_WRAPPER,
|
|
485
|
+
`User visual prompt: ${imagePrompt}`,
|
|
486
|
+
...(imagePromptAttachments.length
|
|
487
|
+
? [
|
|
488
|
+
`Use the attached reference media as visual guidance for subject matter, style, composition, or product details, but still return one fresh final image.`,
|
|
489
|
+
]
|
|
490
|
+
: []),
|
|
491
|
+
`Reserve room for a short editorial overlay approximately ${overlayText.length} characters long, but do not render the overlay text itself.`,
|
|
492
|
+
].join("\n");
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
async function generatePortraitCandidateImage(
|
|
496
|
+
ctx: Parameters<
|
|
497
|
+
typeof template0000Definition.jobs.createSlideshowWorkflow
|
|
498
|
+
>[0],
|
|
499
|
+
input: {
|
|
500
|
+
provider: ImageProvider;
|
|
501
|
+
imageModel: string;
|
|
502
|
+
imagePrompt: string;
|
|
503
|
+
imagePromptAttachments: string[];
|
|
504
|
+
overlayText: string;
|
|
505
|
+
},
|
|
506
|
+
) {
|
|
507
|
+
const attempts = [
|
|
508
|
+
buildImagePrompt(
|
|
509
|
+
input.imagePrompt,
|
|
510
|
+
input.overlayText,
|
|
511
|
+
input.imagePromptAttachments,
|
|
512
|
+
),
|
|
513
|
+
[
|
|
514
|
+
buildImagePrompt(
|
|
515
|
+
input.imagePrompt,
|
|
516
|
+
input.overlayText,
|
|
517
|
+
input.imagePromptAttachments,
|
|
518
|
+
),
|
|
519
|
+
"Critical correction: the previous output risked being too wide.",
|
|
520
|
+
"Return a true full-frame mobile portrait composition.",
|
|
521
|
+
"Do not create a landscape scene floating inside a vertical canvas.",
|
|
522
|
+
"The subject and horizon should already be composed for 9:16 viewing.",
|
|
523
|
+
"Tighter framing is preferred over wide empty margins.",
|
|
524
|
+
].join("\n"),
|
|
525
|
+
[
|
|
526
|
+
buildImagePrompt(
|
|
527
|
+
input.imagePrompt,
|
|
528
|
+
input.overlayText,
|
|
529
|
+
input.imagePromptAttachments,
|
|
530
|
+
),
|
|
531
|
+
"Critical correction: return an unmistakably vertical composition.",
|
|
532
|
+
"Push the camera closer if needed so the frame reads as native 9:16 portrait.",
|
|
533
|
+
"No inset landscape window, no postcard composition, no wide aerial framing.",
|
|
534
|
+
"Use a clean magazine-style composition with one obvious text lane.",
|
|
535
|
+
].join("\n"),
|
|
536
|
+
[
|
|
537
|
+
buildImagePrompt(
|
|
538
|
+
input.imagePrompt,
|
|
539
|
+
input.overlayText,
|
|
540
|
+
input.imagePromptAttachments,
|
|
541
|
+
),
|
|
542
|
+
"Critical correction: the previous output likely had empty padding or weak mobile framing.",
|
|
543
|
+
"Fill the frame with a real handheld-phone portrait composition.",
|
|
544
|
+
"No white bars, no blank top or bottom bands, and no subject floating in the middle of excess space.",
|
|
545
|
+
"Crop decisively and favor a native social-video camera distance.",
|
|
546
|
+
].join("\n"),
|
|
547
|
+
];
|
|
548
|
+
|
|
549
|
+
let best: {
|
|
550
|
+
bytes: Uint8Array;
|
|
551
|
+
contentType: string;
|
|
552
|
+
revisedPrompt: string | null;
|
|
553
|
+
prompt: string;
|
|
554
|
+
score: number;
|
|
555
|
+
} | null = null;
|
|
556
|
+
|
|
557
|
+
for (const prompt of attempts) {
|
|
558
|
+
const image = await ctx.providers.generateImage({
|
|
559
|
+
provider: input.provider,
|
|
560
|
+
model: input.imageModel,
|
|
561
|
+
prompt,
|
|
562
|
+
promptAttachments: input.imagePromptAttachments,
|
|
563
|
+
size: sourceImageSizeForProvider(input.provider),
|
|
564
|
+
aspectRatio: "9:16",
|
|
565
|
+
imageSize: sourceImageOutputSizeForProvider(
|
|
566
|
+
input.provider,
|
|
567
|
+
input.imageModel,
|
|
568
|
+
),
|
|
569
|
+
});
|
|
570
|
+
const score = await portraitScore(image.bytes);
|
|
571
|
+
if (!best || score < best.score) {
|
|
572
|
+
best = { ...image, prompt, score };
|
|
573
|
+
}
|
|
574
|
+
if (score <= 0.16) {
|
|
575
|
+
break;
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
if (!best) {
|
|
580
|
+
throw new Error("No image candidate was generated.");
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
return best;
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
function defaultLayout(): Layout {
|
|
587
|
+
return {
|
|
588
|
+
zone: "center",
|
|
589
|
+
align: "center",
|
|
590
|
+
maxWidthPercent: 58,
|
|
591
|
+
anchorXPercent: 0.5,
|
|
592
|
+
anchorYPercent: 0.28,
|
|
593
|
+
justification:
|
|
594
|
+
"Default placement uses a centered TikTok-native text zone in the middle third of the safe frame.",
|
|
595
|
+
};
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
function parseImageProvider(value: unknown): ImageProvider {
|
|
599
|
+
if (typeof value === "string" && isImageProvider(value)) {
|
|
600
|
+
return value;
|
|
601
|
+
}
|
|
602
|
+
return "gemini";
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
function isImageProvider(value: unknown): value is ImageProvider {
|
|
606
|
+
return (
|
|
607
|
+
typeof value === "string" &&
|
|
608
|
+
supportedImageProviders.includes(value as ImageProvider)
|
|
609
|
+
);
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
function sourceImageSizeForProvider(provider: ImageProvider) {
|
|
613
|
+
if (provider === "openai") {
|
|
614
|
+
return "1024x1792";
|
|
615
|
+
}
|
|
616
|
+
return "1080x1920";
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
function defaultImageModelForProvider(provider: ImageProvider) {
|
|
620
|
+
if (provider === "openai") {
|
|
621
|
+
return "gpt-image-1";
|
|
622
|
+
}
|
|
623
|
+
if (provider === "openrouter") {
|
|
624
|
+
return "bytedance-seed/seedream-4.5";
|
|
625
|
+
}
|
|
626
|
+
return "gemini-3.1-flash-image-preview";
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
function defaultTextModelForProvider(provider: ImageProvider) {
|
|
630
|
+
if (provider === "openai") {
|
|
631
|
+
return "gpt-4.1-mini";
|
|
632
|
+
}
|
|
633
|
+
if (provider === "openrouter") {
|
|
634
|
+
return "openai/gpt-4.1-mini";
|
|
635
|
+
}
|
|
636
|
+
return "gemini-3.1-flash-lite";
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
function sourceImageOutputSizeForProvider(
|
|
640
|
+
provider: ImageProvider,
|
|
641
|
+
model: string,
|
|
642
|
+
) {
|
|
643
|
+
if (provider === "openrouter") {
|
|
644
|
+
return "2K" as const;
|
|
645
|
+
}
|
|
646
|
+
if (provider !== "gemini") {
|
|
647
|
+
return undefined;
|
|
648
|
+
}
|
|
649
|
+
if (
|
|
650
|
+
model === "gemini-3.1-flash-image-preview" ||
|
|
651
|
+
model === "gemini-3-pro-image-preview"
|
|
652
|
+
) {
|
|
653
|
+
return "1K" as const;
|
|
654
|
+
}
|
|
655
|
+
return undefined;
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
function resolveTextStyleSpec(config: Record<string, unknown>): TextStyleSpec {
|
|
659
|
+
const fontId = isTemplateFontId(config.captionFont)
|
|
660
|
+
? config.captionFont
|
|
661
|
+
: "montserrat";
|
|
662
|
+
const backgroundColorId =
|
|
663
|
+
config.captionBackgroundColor === TEXT_BACKGROUND_NONE ||
|
|
664
|
+
isTemplateBackgroundColorId(config.captionBackgroundColor)
|
|
665
|
+
? config.captionBackgroundColor
|
|
666
|
+
: TEXT_BACKGROUND_NONE;
|
|
667
|
+
const fontOption =
|
|
668
|
+
TEMPLATE_FONT_OPTIONS.find((option) => option.id === fontId) ??
|
|
669
|
+
TEMPLATE_FONT_OPTIONS[1];
|
|
670
|
+
const backgroundOption =
|
|
671
|
+
backgroundColorId && backgroundColorId !== TEXT_BACKGROUND_NONE
|
|
672
|
+
? (TEMPLATE_TEXT_BACKGROUND_COLOR_OPTIONS.find(
|
|
673
|
+
(option) => option.id === backgroundColorId,
|
|
674
|
+
) ?? null)
|
|
675
|
+
: null;
|
|
676
|
+
|
|
677
|
+
return {
|
|
678
|
+
fontId,
|
|
679
|
+
fontFamily: fontOption.family,
|
|
680
|
+
backgroundColorId: backgroundColorId ?? TEXT_BACKGROUND_NONE,
|
|
681
|
+
backgroundColorHex: backgroundOption?.hex ?? null,
|
|
682
|
+
};
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
function isTemplateFontId(value: unknown): value is TemplateFontId {
|
|
686
|
+
return (
|
|
687
|
+
typeof value === "string" &&
|
|
688
|
+
TEMPLATE_FONT_IDS.includes(value as TemplateFontId)
|
|
689
|
+
);
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
function isTemplateBackgroundColorId(
|
|
693
|
+
value: unknown,
|
|
694
|
+
): value is TemplateTextBackgroundColorId {
|
|
695
|
+
return (
|
|
696
|
+
typeof value === "string" &&
|
|
697
|
+
TEMPLATE_TEXT_BACKGROUND_COLOR_IDS.includes(
|
|
698
|
+
value as TemplateTextBackgroundColorId,
|
|
699
|
+
)
|
|
700
|
+
);
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
function pad2(value: number) {
|
|
704
|
+
return String(value).padStart(2, "0");
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
async function resolveSlidesForRender(
|
|
708
|
+
input: z.infer<typeof renderVideoInputSchema>,
|
|
709
|
+
) {
|
|
710
|
+
return input.slides;
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
const slideshowManifestSchema = z.object({
|
|
714
|
+
templateId: z.string(),
|
|
715
|
+
size: z.object({
|
|
716
|
+
width: z.number(),
|
|
717
|
+
height: z.number(),
|
|
718
|
+
aspectRatio: z.literal("9:16"),
|
|
719
|
+
}),
|
|
720
|
+
textStyle: z.object({
|
|
721
|
+
fontId: z.enum(TEMPLATE_FONT_IDS),
|
|
722
|
+
fontFamily: z.string(),
|
|
723
|
+
fill: z.string(),
|
|
724
|
+
backgroundColorId: z.enum([
|
|
725
|
+
TEXT_BACKGROUND_NONE,
|
|
726
|
+
...TEMPLATE_TEXT_BACKGROUND_COLOR_IDS,
|
|
727
|
+
]),
|
|
728
|
+
backgroundColorHex: z.string().nullable(),
|
|
729
|
+
shadow: z.string(),
|
|
730
|
+
availableFonts: z.array(
|
|
731
|
+
z.object({
|
|
732
|
+
id: z.enum(TEMPLATE_FONT_IDS),
|
|
733
|
+
label: z.string(),
|
|
734
|
+
family: z.string(),
|
|
735
|
+
assetFile: z.string(),
|
|
736
|
+
}),
|
|
737
|
+
),
|
|
738
|
+
availableBackgroundColors: z.array(
|
|
739
|
+
z.object({
|
|
740
|
+
id: z.enum(TEMPLATE_TEXT_BACKGROUND_COLOR_IDS),
|
|
741
|
+
label: z.string(),
|
|
742
|
+
hex: z.string(),
|
|
743
|
+
}),
|
|
744
|
+
),
|
|
745
|
+
}),
|
|
746
|
+
safeArea: z.object({
|
|
747
|
+
left: z.number(),
|
|
748
|
+
right: z.number(),
|
|
749
|
+
top: z.number(),
|
|
750
|
+
bottom: z.number(),
|
|
751
|
+
}),
|
|
752
|
+
metaDetails: z.object({
|
|
753
|
+
title: z.string(),
|
|
754
|
+
description: z.string(),
|
|
755
|
+
pinned_comment: z.string(),
|
|
756
|
+
location: z.string(),
|
|
757
|
+
song: z.string(),
|
|
758
|
+
}),
|
|
759
|
+
slides: z.array(
|
|
760
|
+
z.object({
|
|
761
|
+
index: z.number(),
|
|
762
|
+
imagePrompt: z.string(),
|
|
763
|
+
imagePromptAttachments: z.array(z.string().url()),
|
|
764
|
+
overlayText: z.string(),
|
|
765
|
+
durationMs: z.number().int(),
|
|
766
|
+
backgroundImageUrl: z.string().url().nullable(),
|
|
767
|
+
frameImageUrl: z.string().url().nullable(),
|
|
768
|
+
prompt: z.string(),
|
|
769
|
+
revisedPrompt: z.string().nullable(),
|
|
770
|
+
layout: z.object({
|
|
771
|
+
zone: z.enum(["top", "center", "bottom"]),
|
|
772
|
+
align: z.enum(["left", "center", "right"]),
|
|
773
|
+
maxWidthPercent: z.number(),
|
|
774
|
+
anchorXPercent: z.number().optional(),
|
|
775
|
+
anchorYPercent: z.number().optional(),
|
|
776
|
+
justification: z.string(),
|
|
777
|
+
}),
|
|
778
|
+
}),
|
|
779
|
+
),
|
|
780
|
+
});
|
|
781
|
+
|
|
782
|
+
function normalizeSlideInput(input: z.infer<typeof slideInputSchema>) {
|
|
783
|
+
if (Array.isArray(input)) {
|
|
784
|
+
return {
|
|
785
|
+
imagePrompt: input[0],
|
|
786
|
+
imagePromptAttachments: [],
|
|
787
|
+
overlayText: input[1],
|
|
788
|
+
durationMs: input[2] ?? 4000,
|
|
789
|
+
};
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
return {
|
|
793
|
+
imagePrompt: input.image_prompt,
|
|
794
|
+
imagePromptAttachments: input.image_prompt_attachments,
|
|
795
|
+
overlayText: input.caption,
|
|
796
|
+
durationMs: input.duration_ms,
|
|
797
|
+
};
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
async function generateMetaDetails(
|
|
801
|
+
ctx: TemplateJobContext,
|
|
802
|
+
input: {
|
|
803
|
+
provider: ImageProvider;
|
|
804
|
+
textModel: string;
|
|
805
|
+
slides: SlideOutput[];
|
|
806
|
+
metaDetailsPrompt?: string;
|
|
807
|
+
},
|
|
808
|
+
): Promise<MetaDetails> {
|
|
809
|
+
const prompt = buildMetaDetailsPrompt(input.slides, input.metaDetailsPrompt);
|
|
810
|
+
const response = await ctx.providers.generateText({
|
|
811
|
+
provider: input.provider,
|
|
812
|
+
model: input.textModel,
|
|
813
|
+
prompt,
|
|
814
|
+
temperature: 0.7,
|
|
815
|
+
});
|
|
816
|
+
return parseMetaDetailsResponse(response.text, input.slides);
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
function buildMetaDetailsPrompt(
|
|
820
|
+
slides: SlideOutput[],
|
|
821
|
+
metaDetailsPrompt?: string,
|
|
822
|
+
) {
|
|
823
|
+
return [
|
|
824
|
+
"You are writing TikTok post metadata for a vertical slideshow video.",
|
|
825
|
+
"Return only valid JSON with exactly these string keys: title, description, pinned_comment, location, song.",
|
|
826
|
+
"Do not include markdown fences or commentary.",
|
|
827
|
+
"Keep title under 90 characters.",
|
|
828
|
+
"Keep description under 300 characters.",
|
|
829
|
+
"Keep pinned_comment under 120 characters.",
|
|
830
|
+
"Keep location under 80 characters.",
|
|
831
|
+
"Keep song under 120 characters.",
|
|
832
|
+
"Write concise, high-performing, native-feeling TikTok social copy.",
|
|
833
|
+
"Use the slide captions and visual prompts to infer the post theme.",
|
|
834
|
+
metaDetailsPrompt
|
|
835
|
+
? `Additional user guidance:\n${metaDetailsPrompt}`
|
|
836
|
+
: "No extra user guidance was supplied.",
|
|
837
|
+
"Slides:",
|
|
838
|
+
...slides.map((slide) =>
|
|
839
|
+
[
|
|
840
|
+
`Slide ${slide.index + 1}:`,
|
|
841
|
+
`caption=${slide.overlayText}`,
|
|
842
|
+
`image_prompt=${slide.imagePrompt}`,
|
|
843
|
+
slide.imagePromptAttachments.length
|
|
844
|
+
? `attachments=${slide.imagePromptAttachments.join(", ")}`
|
|
845
|
+
: "attachments=none",
|
|
846
|
+
`duration_ms=${slide.durationMs}`,
|
|
847
|
+
].join("\n"),
|
|
848
|
+
),
|
|
849
|
+
].join("\n\n");
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
function parseMetaDetailsResponse(
|
|
853
|
+
raw: string,
|
|
854
|
+
slides: SlideOutput[],
|
|
855
|
+
): MetaDetails {
|
|
856
|
+
const fallback = buildFallbackMetaDetails(slides);
|
|
857
|
+
const cleaned = raw
|
|
858
|
+
.trim()
|
|
859
|
+
.replace(/^```json\s*/i, "")
|
|
860
|
+
.replace(/^```\s*/i, "")
|
|
861
|
+
.replace(/\s*```$/i, "");
|
|
862
|
+
try {
|
|
863
|
+
const parsed = z
|
|
864
|
+
.object({
|
|
865
|
+
title: z.string().min(1),
|
|
866
|
+
description: z.string().min(1),
|
|
867
|
+
pinned_comment: z.string().min(1),
|
|
868
|
+
location: z.string().min(1),
|
|
869
|
+
song: z.string().min(1),
|
|
870
|
+
})
|
|
871
|
+
.parse(JSON.parse(cleaned));
|
|
872
|
+
return parsed;
|
|
873
|
+
} catch {
|
|
874
|
+
return fallback;
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
function buildFallbackMetaDetails(slides: SlideOutput[]): MetaDetails {
|
|
879
|
+
const leadCaption = slides[0]?.overlayText ?? "Watch this";
|
|
880
|
+
const supportingCaption =
|
|
881
|
+
slides[1]?.overlayText ?? slides[0]?.imagePrompt ?? "See the full story";
|
|
882
|
+
return {
|
|
883
|
+
title: truncateText(leadCaption, 90),
|
|
884
|
+
description: truncateText(`${leadCaption}. ${supportingCaption}.`, 300),
|
|
885
|
+
pinned_comment: truncateText(
|
|
886
|
+
`Which slide hit hardest: "${leadCaption}" or "${supportingCaption}"?`,
|
|
887
|
+
120,
|
|
888
|
+
),
|
|
889
|
+
location: "United States",
|
|
890
|
+
song: "Original Sound",
|
|
891
|
+
};
|
|
892
|
+
}
|
|
893
|
+
|
|
894
|
+
function truncateText(value: string, maxLength: number) {
|
|
895
|
+
if (value.length <= maxLength) {
|
|
896
|
+
return value;
|
|
897
|
+
}
|
|
898
|
+
return `${value.slice(0, Math.max(0, maxLength - 1)).trimEnd()}…`;
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
async function renderFinishedSlide(
|
|
902
|
+
imageBytes: Uint8Array,
|
|
903
|
+
overlayText: string,
|
|
904
|
+
layout: Layout,
|
|
905
|
+
textStyle: TextStyleSpec,
|
|
906
|
+
) {
|
|
907
|
+
const layers = await buildTextLayers(overlayText, layout, textStyle);
|
|
908
|
+
return sharp(Buffer.from(imageBytes)).composite(layers).png().toBuffer();
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
async function buildTextLayers(
|
|
912
|
+
overlayText: string,
|
|
913
|
+
layout: Layout,
|
|
914
|
+
textStyle: TextStyleSpec,
|
|
915
|
+
) {
|
|
916
|
+
const safeWidth =
|
|
917
|
+
FRAME.width - TIKTOK_SAFE_AREA.left - TIKTOK_SAFE_AREA.right;
|
|
918
|
+
const blockWidth = Math.round(
|
|
919
|
+
safeWidth * clamp(layout.maxWidthPercent / 100, 0.4, 0.62),
|
|
920
|
+
);
|
|
921
|
+
const { fontSize, lines, lineHeight } = fitLinesToBlock(
|
|
922
|
+
overlayText,
|
|
923
|
+
blockWidth,
|
|
924
|
+
textStyle.fontId,
|
|
925
|
+
);
|
|
926
|
+
const blockHeight = Math.round(lines.length * lineHeight + 10);
|
|
927
|
+
const { left: x, top: y } = resolveTextBlockRect(
|
|
928
|
+
layout,
|
|
929
|
+
blockWidth,
|
|
930
|
+
blockHeight,
|
|
931
|
+
);
|
|
932
|
+
const rawLayers = (
|
|
933
|
+
await Promise.all(
|
|
934
|
+
lines.map(async (line, index) =>
|
|
935
|
+
buildLineLayers({
|
|
936
|
+
line,
|
|
937
|
+
lineIndex: index,
|
|
938
|
+
lineHeight,
|
|
939
|
+
fontSize,
|
|
940
|
+
blockWidth,
|
|
941
|
+
blockX: x,
|
|
942
|
+
blockY: y,
|
|
943
|
+
layout,
|
|
944
|
+
textStyle,
|
|
945
|
+
}),
|
|
946
|
+
),
|
|
947
|
+
)
|
|
948
|
+
).flat();
|
|
949
|
+
|
|
950
|
+
const clippedLayers = await Promise.all(
|
|
951
|
+
rawLayers.map((layer) => clipCompositeLayer(layer)),
|
|
952
|
+
);
|
|
953
|
+
return clippedLayers.filter((layer): layer is NonNullable<typeof layer> =>
|
|
954
|
+
Boolean(layer),
|
|
955
|
+
);
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
function fitLinesToBlock(
|
|
959
|
+
text: string,
|
|
960
|
+
blockWidth: number,
|
|
961
|
+
fontId: TemplateFontId,
|
|
962
|
+
) {
|
|
963
|
+
let fontSize = pickFontSize(text, fontId);
|
|
964
|
+
let lineHeight = pickLineHeight(fontSize, fontId);
|
|
965
|
+
let maxChars = approximateMaxChars(blockWidth, fontSize);
|
|
966
|
+
let lines = wrapText(text, maxChars);
|
|
967
|
+
|
|
968
|
+
while (
|
|
969
|
+
(lines.length > 6 || longestLine(lines) > maxChars + 1) &&
|
|
970
|
+
fontSize > 34
|
|
971
|
+
) {
|
|
972
|
+
fontSize -= 3;
|
|
973
|
+
lineHeight = pickLineHeight(fontSize, fontId);
|
|
974
|
+
maxChars = approximateMaxChars(blockWidth, fontSize);
|
|
975
|
+
lines = wrapText(text, maxChars);
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
return { fontSize, lines, lineHeight };
|
|
979
|
+
}
|
|
980
|
+
|
|
981
|
+
function pickFontSize(text: string, fontId: TemplateFontId) {
|
|
982
|
+
if (fontId === "yesteryear") {
|
|
983
|
+
if (text.length <= 22) {
|
|
984
|
+
return 74;
|
|
985
|
+
}
|
|
986
|
+
if (text.length <= 38) {
|
|
987
|
+
return 64;
|
|
988
|
+
}
|
|
989
|
+
return 54;
|
|
990
|
+
}
|
|
991
|
+
if (fontId === "dm_serif_display") {
|
|
992
|
+
if (text.length <= 22) {
|
|
993
|
+
return 66;
|
|
994
|
+
}
|
|
995
|
+
if (text.length <= 38) {
|
|
996
|
+
return 58;
|
|
997
|
+
}
|
|
998
|
+
return 48;
|
|
999
|
+
}
|
|
1000
|
+
if (text.length <= 22) {
|
|
1001
|
+
return 58;
|
|
1002
|
+
}
|
|
1003
|
+
if (text.length <= 38) {
|
|
1004
|
+
return 52;
|
|
1005
|
+
}
|
|
1006
|
+
if (text.length <= 62) {
|
|
1007
|
+
return 46;
|
|
1008
|
+
}
|
|
1009
|
+
if (text.length <= 96) {
|
|
1010
|
+
return 40;
|
|
1011
|
+
}
|
|
1012
|
+
return 36;
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
function pickLineHeight(fontSize: number, fontId: TemplateFontId) {
|
|
1016
|
+
if (fontId === "yesteryear") {
|
|
1017
|
+
return Math.round(fontSize * 1.04);
|
|
1018
|
+
}
|
|
1019
|
+
if (fontId === "dm_serif_display") {
|
|
1020
|
+
return Math.round(fontSize * 1.08);
|
|
1021
|
+
}
|
|
1022
|
+
return Math.round(fontSize * 1.12);
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
function approximateMaxChars(blockWidth: number, fontSize: number) {
|
|
1026
|
+
return clamp(Math.floor(blockWidth / Math.max(20, fontSize * 0.42)), 12, 34);
|
|
1027
|
+
}
|
|
1028
|
+
|
|
1029
|
+
async function buildLineLayers(input: {
|
|
1030
|
+
line: string;
|
|
1031
|
+
lineIndex: number;
|
|
1032
|
+
lineHeight: number;
|
|
1033
|
+
fontSize: number;
|
|
1034
|
+
blockWidth: number;
|
|
1035
|
+
blockX: number;
|
|
1036
|
+
blockY: number;
|
|
1037
|
+
layout: Layout;
|
|
1038
|
+
textStyle: TextStyleSpec;
|
|
1039
|
+
}) {
|
|
1040
|
+
const lineTop = input.blockY + input.lineIndex * input.lineHeight;
|
|
1041
|
+
const shadowPadding = 40;
|
|
1042
|
+
const fontAsset = overlayFonts[input.textStyle.fontId];
|
|
1043
|
+
const measured = await renderTrimmedTextBuffer({
|
|
1044
|
+
text: input.line,
|
|
1045
|
+
blockWidth: input.blockWidth,
|
|
1046
|
+
lineHeight: input.lineHeight,
|
|
1047
|
+
fontSize: input.fontSize,
|
|
1048
|
+
fontFamily: input.textStyle.fontFamily,
|
|
1049
|
+
fontPath: fontAsset.path,
|
|
1050
|
+
color: "#ffffff",
|
|
1051
|
+
});
|
|
1052
|
+
|
|
1053
|
+
const lineLeft =
|
|
1054
|
+
input.layout.align === "center"
|
|
1055
|
+
? input.blockX + Math.round((input.blockWidth - measured.width) / 2)
|
|
1056
|
+
: input.layout.align === "right"
|
|
1057
|
+
? input.blockX + input.blockWidth - measured.width
|
|
1058
|
+
: input.blockX;
|
|
1059
|
+
|
|
1060
|
+
const layers: Array<{ input: Buffer; left: number; top: number }> = [];
|
|
1061
|
+
if (input.textStyle.backgroundColorHex) {
|
|
1062
|
+
const chip = await buildTextChip({
|
|
1063
|
+
width: measured.width,
|
|
1064
|
+
height: measured.height,
|
|
1065
|
+
fill: input.textStyle.backgroundColorHex,
|
|
1066
|
+
});
|
|
1067
|
+
layers.push({
|
|
1068
|
+
input: chip,
|
|
1069
|
+
left: lineLeft - 14,
|
|
1070
|
+
top: lineTop - 8,
|
|
1071
|
+
});
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
const shadowFar = await renderShadowLayer({
|
|
1075
|
+
textBuffer: measured.buffer,
|
|
1076
|
+
width: measured.width,
|
|
1077
|
+
height: measured.height,
|
|
1078
|
+
shadowPadding,
|
|
1079
|
+
blur: 8,
|
|
1080
|
+
});
|
|
1081
|
+
const shadowNear = await renderShadowLayer({
|
|
1082
|
+
textBuffer: measured.buffer,
|
|
1083
|
+
width: measured.width,
|
|
1084
|
+
height: measured.height,
|
|
1085
|
+
shadowPadding,
|
|
1086
|
+
blur: 2.4,
|
|
1087
|
+
});
|
|
1088
|
+
|
|
1089
|
+
layers.push(
|
|
1090
|
+
{
|
|
1091
|
+
input: shadowFar,
|
|
1092
|
+
left: lineLeft - shadowPadding,
|
|
1093
|
+
top: lineTop - shadowPadding + 8,
|
|
1094
|
+
},
|
|
1095
|
+
{
|
|
1096
|
+
input: shadowNear,
|
|
1097
|
+
left: lineLeft - shadowPadding,
|
|
1098
|
+
top: lineTop - shadowPadding + 4,
|
|
1099
|
+
},
|
|
1100
|
+
{ input: measured.buffer, left: lineLeft, top: lineTop },
|
|
1101
|
+
);
|
|
1102
|
+
|
|
1103
|
+
return layers;
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
async function renderTrimmedTextBuffer(input: {
|
|
1107
|
+
text: string;
|
|
1108
|
+
blockWidth: number;
|
|
1109
|
+
lineHeight: number;
|
|
1110
|
+
fontSize: number;
|
|
1111
|
+
fontFamily: string;
|
|
1112
|
+
fontPath: string;
|
|
1113
|
+
color: string;
|
|
1114
|
+
}) {
|
|
1115
|
+
const markupSafe = escapePango(input.text);
|
|
1116
|
+
const rendered = await sharp({
|
|
1117
|
+
text: {
|
|
1118
|
+
text: `<span foreground="${input.color}">${markupSafe}</span>`,
|
|
1119
|
+
rgba: true,
|
|
1120
|
+
width: input.blockWidth + 40,
|
|
1121
|
+
height: input.lineHeight + 28,
|
|
1122
|
+
align: "left",
|
|
1123
|
+
spacing: 0,
|
|
1124
|
+
font: `${input.fontFamily} ${input.fontSize}`,
|
|
1125
|
+
fontfile: input.fontPath,
|
|
1126
|
+
},
|
|
1127
|
+
})
|
|
1128
|
+
.trim()
|
|
1129
|
+
.png()
|
|
1130
|
+
.toBuffer();
|
|
1131
|
+
const metadata = await sharp(rendered).metadata();
|
|
1132
|
+
return {
|
|
1133
|
+
buffer: rendered,
|
|
1134
|
+
width: metadata.width ?? input.blockWidth,
|
|
1135
|
+
height: metadata.height ?? input.lineHeight,
|
|
1136
|
+
};
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
async function renderShadowLayer(input: {
|
|
1140
|
+
textBuffer: Buffer;
|
|
1141
|
+
width: number;
|
|
1142
|
+
height: number;
|
|
1143
|
+
shadowPadding: number;
|
|
1144
|
+
blur: number;
|
|
1145
|
+
}) {
|
|
1146
|
+
return sharp({
|
|
1147
|
+
create: {
|
|
1148
|
+
width: input.width + input.shadowPadding * 2,
|
|
1149
|
+
height: input.height + input.shadowPadding * 2,
|
|
1150
|
+
channels: 4,
|
|
1151
|
+
background: { r: 0, g: 0, b: 0, alpha: 0 },
|
|
1152
|
+
},
|
|
1153
|
+
})
|
|
1154
|
+
.composite([
|
|
1155
|
+
{
|
|
1156
|
+
input: input.textBuffer,
|
|
1157
|
+
left: input.shadowPadding,
|
|
1158
|
+
top: input.shadowPadding,
|
|
1159
|
+
},
|
|
1160
|
+
])
|
|
1161
|
+
.modulate({ brightness: 0 })
|
|
1162
|
+
.linear(0, 0)
|
|
1163
|
+
.blur(input.blur)
|
|
1164
|
+
.png()
|
|
1165
|
+
.toBuffer();
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
async function buildTextChip(input: {
|
|
1169
|
+
width: number;
|
|
1170
|
+
height: number;
|
|
1171
|
+
fill: string;
|
|
1172
|
+
}) {
|
|
1173
|
+
const chipWidth = input.width + 28;
|
|
1174
|
+
const chipHeight = input.height + 16;
|
|
1175
|
+
return sharp(
|
|
1176
|
+
Buffer.from(
|
|
1177
|
+
`<svg width="${chipWidth}" height="${chipHeight}" xmlns="http://www.w3.org/2000/svg">
|
|
1178
|
+
<rect x="0" y="0" width="${chipWidth}" height="${chipHeight}" rx="12" ry="12" fill="${input.fill}" fill-opacity="0.86"/>
|
|
1179
|
+
</svg>`,
|
|
1180
|
+
),
|
|
1181
|
+
)
|
|
1182
|
+
.png()
|
|
1183
|
+
.toBuffer();
|
|
1184
|
+
}
|
|
1185
|
+
|
|
1186
|
+
function wrapText(text: string, maxChars: number) {
|
|
1187
|
+
const words = text.trim().split(/\s+/).filter(Boolean);
|
|
1188
|
+
const lines: string[] = [];
|
|
1189
|
+
let current = "";
|
|
1190
|
+
for (const word of words) {
|
|
1191
|
+
const candidate = current ? `${current} ${word}` : word;
|
|
1192
|
+
if (candidate.length <= maxChars || !current) {
|
|
1193
|
+
current = candidate;
|
|
1194
|
+
continue;
|
|
1195
|
+
}
|
|
1196
|
+
lines.push(current);
|
|
1197
|
+
current = word;
|
|
1198
|
+
}
|
|
1199
|
+
if (current) {
|
|
1200
|
+
lines.push(current);
|
|
1201
|
+
}
|
|
1202
|
+
return lines.length ? lines : [text];
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
function longestLine(lines: string[]) {
|
|
1206
|
+
return lines.reduce((longest, line) => Math.max(longest, line.length), 0);
|
|
1207
|
+
}
|
|
1208
|
+
|
|
1209
|
+
async function portraitScore(input: Uint8Array) {
|
|
1210
|
+
const metadata = await sharp(Buffer.from(input)).rotate().metadata();
|
|
1211
|
+
const width = metadata.width ?? FRAME.width;
|
|
1212
|
+
const height = metadata.height ?? FRAME.height;
|
|
1213
|
+
const aspect = width / height;
|
|
1214
|
+
const thumbnailWidth = 96;
|
|
1215
|
+
const thumbnailHeight = 170;
|
|
1216
|
+
const sample = await sharp(Buffer.from(input))
|
|
1217
|
+
.rotate()
|
|
1218
|
+
.resize(thumbnailWidth, thumbnailHeight, { fit: "fill" })
|
|
1219
|
+
.grayscale()
|
|
1220
|
+
.raw()
|
|
1221
|
+
.toBuffer();
|
|
1222
|
+
const edgeContrast = edgeBandContrast(
|
|
1223
|
+
sample,
|
|
1224
|
+
thumbnailWidth,
|
|
1225
|
+
thumbnailHeight,
|
|
1226
|
+
);
|
|
1227
|
+
return Math.abs(aspect - 9 / 16) + edgeContrast;
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
async function chooseLayoutFromImage(
|
|
1231
|
+
imageBytes: Uint8Array,
|
|
1232
|
+
overlayText: string,
|
|
1233
|
+
): Promise<Layout> {
|
|
1234
|
+
const thumbnailWidth = 108;
|
|
1235
|
+
const thumbnailHeight = 192;
|
|
1236
|
+
const sample = await sharp(Buffer.from(imageBytes))
|
|
1237
|
+
.resize(thumbnailWidth, thumbnailHeight, { fit: "fill" })
|
|
1238
|
+
.grayscale()
|
|
1239
|
+
.raw()
|
|
1240
|
+
.toBuffer();
|
|
1241
|
+
|
|
1242
|
+
const candidates = buildLayoutCandidates(overlayText);
|
|
1243
|
+
|
|
1244
|
+
let best = defaultLayout();
|
|
1245
|
+
let bestScore = Number.POSITIVE_INFINITY;
|
|
1246
|
+
for (const candidate of candidates) {
|
|
1247
|
+
const estimate = estimateTextBlock(overlayText, candidate);
|
|
1248
|
+
const region = regionForLayout(
|
|
1249
|
+
candidate,
|
|
1250
|
+
estimate.blockWidth,
|
|
1251
|
+
estimate.blockHeight,
|
|
1252
|
+
);
|
|
1253
|
+
const score =
|
|
1254
|
+
scoreLayoutRegion(sample, thumbnailWidth, thumbnailHeight, region) +
|
|
1255
|
+
candidate.stylePenalty;
|
|
1256
|
+
if (score < bestScore) {
|
|
1257
|
+
best = candidate;
|
|
1258
|
+
bestScore = score;
|
|
1259
|
+
}
|
|
1260
|
+
}
|
|
1261
|
+
|
|
1262
|
+
return {
|
|
1263
|
+
zone: best.zone,
|
|
1264
|
+
align: best.align,
|
|
1265
|
+
maxWidthPercent: best.maxWidthPercent,
|
|
1266
|
+
anchorXPercent: best.anchorXPercent,
|
|
1267
|
+
anchorYPercent: best.anchorYPercent,
|
|
1268
|
+
justification: `${best.justification} Selected by local safe-zone scoring to minimize busy backgrounds and avoid TikTok UI chrome.`,
|
|
1269
|
+
};
|
|
1270
|
+
}
|
|
1271
|
+
|
|
1272
|
+
async function chooseSlideLayout(
|
|
1273
|
+
ctx: TemplateJobContext,
|
|
1274
|
+
input: {
|
|
1275
|
+
provider: ImageProvider;
|
|
1276
|
+
model: string;
|
|
1277
|
+
imageBytes: Uint8Array;
|
|
1278
|
+
imageUrl: string | null;
|
|
1279
|
+
overlayText: string;
|
|
1280
|
+
},
|
|
1281
|
+
) {
|
|
1282
|
+
if (input.imageUrl) {
|
|
1283
|
+
try {
|
|
1284
|
+
const analysis = await ctx.providers.analyzeImageLayout({
|
|
1285
|
+
provider: input.provider,
|
|
1286
|
+
model: input.model,
|
|
1287
|
+
imageUrl: input.imageUrl,
|
|
1288
|
+
overlayText: input.overlayText,
|
|
1289
|
+
});
|
|
1290
|
+
return normalizeAiLayout(analysis);
|
|
1291
|
+
} catch (error) {
|
|
1292
|
+
ctx.logger.warn(
|
|
1293
|
+
"AI layout analysis failed, falling back to local scorer",
|
|
1294
|
+
{
|
|
1295
|
+
provider: input.provider,
|
|
1296
|
+
model: input.model,
|
|
1297
|
+
message: error instanceof Error ? error.message : String(error),
|
|
1298
|
+
},
|
|
1299
|
+
);
|
|
1300
|
+
}
|
|
1301
|
+
}
|
|
1302
|
+
return chooseLayoutFromImage(input.imageBytes, input.overlayText);
|
|
1303
|
+
}
|
|
1304
|
+
|
|
1305
|
+
function normalizeAiLayout(input: {
|
|
1306
|
+
zone: "top" | "center" | "bottom";
|
|
1307
|
+
align: "left" | "center" | "right";
|
|
1308
|
+
maxWidthPercent: number;
|
|
1309
|
+
justification: string;
|
|
1310
|
+
}): Layout {
|
|
1311
|
+
const normalizedAlign = input.align === "right" ? "center" : input.align;
|
|
1312
|
+
const anchorXPercent =
|
|
1313
|
+
normalizedAlign === "center"
|
|
1314
|
+
? 0.5
|
|
1315
|
+
: normalizedAlign === "left"
|
|
1316
|
+
? 0.38
|
|
1317
|
+
: 0.62;
|
|
1318
|
+
const anchorYPercent =
|
|
1319
|
+
input.zone === "top" ? 0.18 : input.zone === "bottom" ? 0.38 : 0.28;
|
|
1320
|
+
|
|
1321
|
+
return {
|
|
1322
|
+
zone: input.zone === "bottom" ? "center" : input.zone,
|
|
1323
|
+
align: normalizedAlign,
|
|
1324
|
+
maxWidthPercent: clamp(input.maxWidthPercent, 46, 62),
|
|
1325
|
+
anchorXPercent,
|
|
1326
|
+
anchorYPercent,
|
|
1327
|
+
justification: `${input.justification} Normalized toward centered TikTok-native caption placement.`,
|
|
1328
|
+
};
|
|
1329
|
+
}
|
|
1330
|
+
|
|
1331
|
+
function estimateTextBlock(text: string, layout: Layout) {
|
|
1332
|
+
const safeWidth =
|
|
1333
|
+
FRAME.width - TIKTOK_SAFE_AREA.left - TIKTOK_SAFE_AREA.right;
|
|
1334
|
+
const centeredBounds = resolveNativeUiTextPaddingBounds();
|
|
1335
|
+
const availableWidth =
|
|
1336
|
+
layout.align === "center"
|
|
1337
|
+
? FRAME.width - centeredBounds.minLeft * 2
|
|
1338
|
+
: safeWidth;
|
|
1339
|
+
const blockWidth = Math.round(
|
|
1340
|
+
availableWidth * clamp(layout.maxWidthPercent / 100, 0.4, 0.62),
|
|
1341
|
+
);
|
|
1342
|
+
const { fontSize, lines, lineHeight } = fitLinesToBlock(
|
|
1343
|
+
text,
|
|
1344
|
+
blockWidth,
|
|
1345
|
+
"montserrat",
|
|
1346
|
+
);
|
|
1347
|
+
return {
|
|
1348
|
+
blockWidth,
|
|
1349
|
+
blockHeight: Math.round(lines.length * lineHeight + 10),
|
|
1350
|
+
fontSize,
|
|
1351
|
+
};
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
function regionForLayout(
|
|
1355
|
+
layout: Layout,
|
|
1356
|
+
blockWidth: number,
|
|
1357
|
+
blockHeight: number,
|
|
1358
|
+
) {
|
|
1359
|
+
return resolveTextBlockRect(layout, blockWidth, blockHeight);
|
|
1360
|
+
}
|
|
1361
|
+
|
|
1362
|
+
function resolveTextBlockRect(
|
|
1363
|
+
layout: Layout,
|
|
1364
|
+
blockWidth: number,
|
|
1365
|
+
blockHeight: number,
|
|
1366
|
+
) {
|
|
1367
|
+
const safeWidth =
|
|
1368
|
+
FRAME.width - TIKTOK_SAFE_AREA.left - TIKTOK_SAFE_AREA.right;
|
|
1369
|
+
const safeHeight =
|
|
1370
|
+
FRAME.height - TIKTOK_SAFE_AREA.top - TIKTOK_SAFE_AREA.bottom;
|
|
1371
|
+
const centeredBounds = resolveNativeUiTextPaddingBounds(blockWidth);
|
|
1372
|
+
if (
|
|
1373
|
+
layout.anchorXPercent !== undefined &&
|
|
1374
|
+
layout.anchorYPercent !== undefined
|
|
1375
|
+
) {
|
|
1376
|
+
const centeredX = clamp(
|
|
1377
|
+
Math.round((FRAME.width - blockWidth) / 2),
|
|
1378
|
+
centeredBounds.minLeft,
|
|
1379
|
+
centeredBounds.maxLeft,
|
|
1380
|
+
);
|
|
1381
|
+
return {
|
|
1382
|
+
left:
|
|
1383
|
+
layout.align === "center"
|
|
1384
|
+
? centeredX
|
|
1385
|
+
: TIKTOK_SAFE_AREA.left +
|
|
1386
|
+
Math.round(
|
|
1387
|
+
(safeWidth - blockWidth) * clamp(layout.anchorXPercent, 0, 1),
|
|
1388
|
+
),
|
|
1389
|
+
top:
|
|
1390
|
+
TIKTOK_SAFE_AREA.top +
|
|
1391
|
+
Math.round(
|
|
1392
|
+
(safeHeight - blockHeight) * clamp(layout.anchorYPercent, 0, 1),
|
|
1393
|
+
),
|
|
1394
|
+
width: blockWidth,
|
|
1395
|
+
height: blockHeight,
|
|
1396
|
+
};
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
const x =
|
|
1400
|
+
layout.align === "left"
|
|
1401
|
+
? TIKTOK_SAFE_AREA.left
|
|
1402
|
+
: layout.align === "center"
|
|
1403
|
+
? clamp(
|
|
1404
|
+
Math.round((FRAME.width - blockWidth) / 2),
|
|
1405
|
+
centeredBounds.minLeft,
|
|
1406
|
+
centeredBounds.maxLeft,
|
|
1407
|
+
)
|
|
1408
|
+
: FRAME.width - TIKTOK_SAFE_AREA.right - blockWidth;
|
|
1409
|
+
const y =
|
|
1410
|
+
layout.zone === "top"
|
|
1411
|
+
? TIKTOK_SAFE_AREA.top + 72
|
|
1412
|
+
: layout.zone === "center"
|
|
1413
|
+
? TIKTOK_SAFE_AREA.top + Math.round(safeHeight * 0.2)
|
|
1414
|
+
: FRAME.height - TIKTOK_SAFE_AREA.bottom - blockHeight - 120;
|
|
1415
|
+
|
|
1416
|
+
return {
|
|
1417
|
+
left: x,
|
|
1418
|
+
top: y,
|
|
1419
|
+
width: blockWidth,
|
|
1420
|
+
height: blockHeight,
|
|
1421
|
+
};
|
|
1422
|
+
}
|
|
1423
|
+
|
|
1424
|
+
function resolveNativeUiTextPaddingBounds(blockWidth = 0) {
|
|
1425
|
+
const horizontalPadding = Math.round(
|
|
1426
|
+
FRAME.width *
|
|
1427
|
+
((NATIVE_UI_TEXT_ZONE.maxCenterXPercent -
|
|
1428
|
+
NATIVE_UI_TEXT_ZONE.minCenterXPercent) /
|
|
1429
|
+
2),
|
|
1430
|
+
);
|
|
1431
|
+
return {
|
|
1432
|
+
minLeft: horizontalPadding,
|
|
1433
|
+
maxLeft: Math.max(
|
|
1434
|
+
horizontalPadding,
|
|
1435
|
+
FRAME.width - horizontalPadding - blockWidth,
|
|
1436
|
+
),
|
|
1437
|
+
};
|
|
1438
|
+
}
|
|
1439
|
+
|
|
1440
|
+
function scoreLayoutRegion(
|
|
1441
|
+
sample: Buffer,
|
|
1442
|
+
width: number,
|
|
1443
|
+
height: number,
|
|
1444
|
+
region: { left: number; top: number; width: number; height: number },
|
|
1445
|
+
) {
|
|
1446
|
+
const inner = sampleStats(sample, width, height, region, 0);
|
|
1447
|
+
const outer = sampleStats(sample, width, height, region, 44);
|
|
1448
|
+
const halo = sampleStats(sample, width, height, region, 110);
|
|
1449
|
+
const edgePenalty =
|
|
1450
|
+
(region.left < 112 ? 6 : 0) +
|
|
1451
|
+
(region.top < 340 ? 18 : 0) +
|
|
1452
|
+
(region.top + region.height > FRAME.height - 560 ? 18 : 0);
|
|
1453
|
+
const readabilityPenalty = Math.max(0, inner.mean - 205) * 0.24;
|
|
1454
|
+
const lowContrastPenalty = Math.abs(inner.mean - outer.mean) < 10 ? 6 : 0;
|
|
1455
|
+
const haloPenalty = halo.detail * 0.35 + halo.variance * 0.01;
|
|
1456
|
+
|
|
1457
|
+
return (
|
|
1458
|
+
inner.detail * 1.35 +
|
|
1459
|
+
inner.variance * 0.022 +
|
|
1460
|
+
outer.detail * 0.45 +
|
|
1461
|
+
readabilityPenalty +
|
|
1462
|
+
lowContrastPenalty +
|
|
1463
|
+
haloPenalty +
|
|
1464
|
+
edgePenalty
|
|
1465
|
+
);
|
|
1466
|
+
}
|
|
1467
|
+
|
|
1468
|
+
function sampleStats(
|
|
1469
|
+
sample: Buffer,
|
|
1470
|
+
width: number,
|
|
1471
|
+
height: number,
|
|
1472
|
+
region: { left: number; top: number; width: number; height: number },
|
|
1473
|
+
padding: number,
|
|
1474
|
+
) {
|
|
1475
|
+
const left = clamp(
|
|
1476
|
+
Math.floor((region.left / FRAME.width) * width),
|
|
1477
|
+
0,
|
|
1478
|
+
width - 1,
|
|
1479
|
+
);
|
|
1480
|
+
const top = clamp(
|
|
1481
|
+
Math.floor((region.top / FRAME.height) * height),
|
|
1482
|
+
0,
|
|
1483
|
+
height - 1,
|
|
1484
|
+
);
|
|
1485
|
+
const right = clamp(
|
|
1486
|
+
Math.ceil(((region.left + region.width) / FRAME.width) * width),
|
|
1487
|
+
left + 1,
|
|
1488
|
+
width,
|
|
1489
|
+
);
|
|
1490
|
+
const bottom = clamp(
|
|
1491
|
+
Math.ceil(((region.top + region.height) / FRAME.height) * height),
|
|
1492
|
+
top + 1,
|
|
1493
|
+
height,
|
|
1494
|
+
);
|
|
1495
|
+
const padX = Math.round((padding / FRAME.width) * width);
|
|
1496
|
+
const padY = Math.round((padding / FRAME.height) * height);
|
|
1497
|
+
const paddedLeft = clamp(left - padX, 0, width - 1);
|
|
1498
|
+
const paddedTop = clamp(top - padY, 0, height - 1);
|
|
1499
|
+
const paddedRight = clamp(right + padX, paddedLeft + 1, width);
|
|
1500
|
+
const paddedBottom = clamp(bottom + padY, paddedTop + 1, height);
|
|
1501
|
+
let total = 0;
|
|
1502
|
+
let totalSq = 0;
|
|
1503
|
+
let detail = 0;
|
|
1504
|
+
let count = 0;
|
|
1505
|
+
|
|
1506
|
+
for (let y = paddedTop; y < paddedBottom; y += 1) {
|
|
1507
|
+
for (let x = paddedLeft; x < paddedRight; x += 1) {
|
|
1508
|
+
if (padding > 0 && x >= left && x < right && y >= top && y < bottom) {
|
|
1509
|
+
continue;
|
|
1510
|
+
}
|
|
1511
|
+
const index = y * width + x;
|
|
1512
|
+
const current = sample[index] ?? 0;
|
|
1513
|
+
const leftPixel = x > 0 ? (sample[index - 1] ?? current) : current;
|
|
1514
|
+
const upPixel = y > 0 ? (sample[index - width] ?? current) : current;
|
|
1515
|
+
total += current;
|
|
1516
|
+
totalSq += current * current;
|
|
1517
|
+
detail += Math.abs(current - leftPixel) + Math.abs(current - upPixel);
|
|
1518
|
+
count += 1;
|
|
1519
|
+
}
|
|
1520
|
+
}
|
|
1521
|
+
|
|
1522
|
+
const mean = total / Math.max(count, 1);
|
|
1523
|
+
const variance = Math.max(0, totalSq / Math.max(count, 1) - mean * mean);
|
|
1524
|
+
return {
|
|
1525
|
+
mean,
|
|
1526
|
+
variance,
|
|
1527
|
+
detail: detail / Math.max(count, 1),
|
|
1528
|
+
};
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
function edgeBandContrast(sample: Buffer, width: number, height: number) {
|
|
1532
|
+
const bandHeight = Math.max(8, Math.floor(height * 0.16));
|
|
1533
|
+
const top = bandActivity(sample, width, height, 0, bandHeight);
|
|
1534
|
+
const bottom = bandActivity(
|
|
1535
|
+
sample,
|
|
1536
|
+
width,
|
|
1537
|
+
height,
|
|
1538
|
+
height - bandHeight,
|
|
1539
|
+
height,
|
|
1540
|
+
);
|
|
1541
|
+
return (top + bottom) / 220;
|
|
1542
|
+
}
|
|
1543
|
+
|
|
1544
|
+
function bandActivity(
|
|
1545
|
+
sample: Buffer,
|
|
1546
|
+
width: number,
|
|
1547
|
+
height: number,
|
|
1548
|
+
startY: number,
|
|
1549
|
+
endY: number,
|
|
1550
|
+
) {
|
|
1551
|
+
let detail = 0;
|
|
1552
|
+
let count = 0;
|
|
1553
|
+
for (let y = startY; y < endY; y += 1) {
|
|
1554
|
+
for (let x = 1; x < width; x += 1) {
|
|
1555
|
+
const index = y * width + x;
|
|
1556
|
+
const current = sample[index] ?? 0;
|
|
1557
|
+
const prev = sample[index - 1] ?? current;
|
|
1558
|
+
detail += Math.abs(current - prev);
|
|
1559
|
+
count += 1;
|
|
1560
|
+
}
|
|
1561
|
+
}
|
|
1562
|
+
return detail / Math.max(count, 1);
|
|
1563
|
+
}
|
|
1564
|
+
|
|
1565
|
+
function clamp(value: number, min: number, max: number) {
|
|
1566
|
+
return Math.min(Math.max(value, min), max);
|
|
1567
|
+
}
|
|
1568
|
+
|
|
1569
|
+
function escapePango(text: string) {
|
|
1570
|
+
return text
|
|
1571
|
+
.replaceAll("&", "&")
|
|
1572
|
+
.replaceAll("<", "<")
|
|
1573
|
+
.replaceAll(">", ">")
|
|
1574
|
+
.replaceAll('"', """);
|
|
1575
|
+
}
|
|
1576
|
+
|
|
1577
|
+
async function clipCompositeLayer(layer: {
|
|
1578
|
+
input: Buffer;
|
|
1579
|
+
left: number;
|
|
1580
|
+
top: number;
|
|
1581
|
+
}) {
|
|
1582
|
+
const metadata = await sharp(layer.input).metadata();
|
|
1583
|
+
const width = metadata.width ?? 0;
|
|
1584
|
+
const height = metadata.height ?? 0;
|
|
1585
|
+
if (!width || !height) {
|
|
1586
|
+
return null;
|
|
1587
|
+
}
|
|
1588
|
+
|
|
1589
|
+
const left = Math.max(0, layer.left);
|
|
1590
|
+
const top = Math.max(0, layer.top);
|
|
1591
|
+
const cropLeft = Math.max(0, -layer.left);
|
|
1592
|
+
const cropTop = Math.max(0, -layer.top);
|
|
1593
|
+
const availableWidth = FRAME.width - left;
|
|
1594
|
+
const availableHeight = FRAME.height - top;
|
|
1595
|
+
const cropWidth = Math.min(width - cropLeft, availableWidth);
|
|
1596
|
+
const cropHeight = Math.min(height - cropTop, availableHeight);
|
|
1597
|
+
|
|
1598
|
+
if (cropWidth <= 0 || cropHeight <= 0) {
|
|
1599
|
+
return null;
|
|
1600
|
+
}
|
|
1601
|
+
|
|
1602
|
+
if (
|
|
1603
|
+
cropLeft === 0 &&
|
|
1604
|
+
cropTop === 0 &&
|
|
1605
|
+
cropWidth === width &&
|
|
1606
|
+
cropHeight === height
|
|
1607
|
+
) {
|
|
1608
|
+
return {
|
|
1609
|
+
input: layer.input,
|
|
1610
|
+
left,
|
|
1611
|
+
top,
|
|
1612
|
+
};
|
|
1613
|
+
}
|
|
1614
|
+
|
|
1615
|
+
const input = await sharp(layer.input)
|
|
1616
|
+
.extract({
|
|
1617
|
+
left: cropLeft,
|
|
1618
|
+
top: cropTop,
|
|
1619
|
+
width: cropWidth,
|
|
1620
|
+
height: cropHeight,
|
|
1621
|
+
})
|
|
1622
|
+
.png()
|
|
1623
|
+
.toBuffer();
|
|
1624
|
+
|
|
1625
|
+
return {
|
|
1626
|
+
input,
|
|
1627
|
+
left,
|
|
1628
|
+
top,
|
|
1629
|
+
};
|
|
1630
|
+
}
|
|
1631
|
+
|
|
1632
|
+
function buildLayoutCandidates(
|
|
1633
|
+
overlayText: string,
|
|
1634
|
+
): Array<Layout & { stylePenalty: number }> {
|
|
1635
|
+
const prefersCenteredText = overlayText.trim().length <= 42;
|
|
1636
|
+
const widthPercents = prefersCenteredText ? [48, 52, 56] : [50, 54, 58];
|
|
1637
|
+
const centerAnchors = prefersCenteredText
|
|
1638
|
+
? [0.44, 0.5, 0.56]
|
|
1639
|
+
: [0.42, 0.5, 0.58];
|
|
1640
|
+
const yAnchors = [0.16, 0.22, 0.28, 0.34, 0.4];
|
|
1641
|
+
const candidates: Array<Layout & { stylePenalty: number }> = [];
|
|
1642
|
+
|
|
1643
|
+
for (const maxWidthPercent of widthPercents) {
|
|
1644
|
+
for (const anchorYPercent of yAnchors) {
|
|
1645
|
+
for (const anchorXPercent of centerAnchors) {
|
|
1646
|
+
candidates.push({
|
|
1647
|
+
zone: anchorYPercent < 0.22 ? "top" : "center",
|
|
1648
|
+
align: "center",
|
|
1649
|
+
maxWidthPercent,
|
|
1650
|
+
anchorXPercent,
|
|
1651
|
+
anchorYPercent,
|
|
1652
|
+
justification: "Image-aware centered native TikTok caption zone.",
|
|
1653
|
+
stylePenalty:
|
|
1654
|
+
scoreNativeUiAnchor(anchorXPercent, anchorYPercent) +
|
|
1655
|
+
(prefersCenteredText ? 0 : 4),
|
|
1656
|
+
});
|
|
1657
|
+
}
|
|
1658
|
+
}
|
|
1659
|
+
}
|
|
1660
|
+
|
|
1661
|
+
return candidates;
|
|
1662
|
+
}
|
|
1663
|
+
|
|
1664
|
+
function scoreNativeUiAnchor(anchorXPercent: number, anchorYPercent: number) {
|
|
1665
|
+
let penalty = 0;
|
|
1666
|
+
if (
|
|
1667
|
+
anchorXPercent < NATIVE_UI_TEXT_ZONE.minCenterXPercent ||
|
|
1668
|
+
anchorXPercent > NATIVE_UI_TEXT_ZONE.maxCenterXPercent
|
|
1669
|
+
) {
|
|
1670
|
+
penalty += 10;
|
|
1671
|
+
}
|
|
1672
|
+
if (
|
|
1673
|
+
anchorYPercent < NATIVE_UI_TEXT_ZONE.minCenterYPercent ||
|
|
1674
|
+
anchorYPercent > NATIVE_UI_TEXT_ZONE.maxCenterYPercent
|
|
1675
|
+
) {
|
|
1676
|
+
penalty += 10;
|
|
1677
|
+
}
|
|
1678
|
+
penalty += Math.abs(anchorXPercent - 0.5) * 24;
|
|
1679
|
+
penalty += Math.abs(anchorYPercent - 0.28) * 18;
|
|
1680
|
+
return penalty;
|
|
1681
|
+
}
|
|
1682
|
+
|
|
1683
|
+
function resolveRemotionEntryPoint() {
|
|
1684
|
+
const builtPath = fileURLToPath(
|
|
1685
|
+
new URL("./remotion/index.js", import.meta.url),
|
|
1686
|
+
);
|
|
1687
|
+
if (existsSync(builtPath)) {
|
|
1688
|
+
return builtPath;
|
|
1689
|
+
}
|
|
1690
|
+
return fileURLToPath(new URL("./remotion/index.tsx", import.meta.url));
|
|
1691
|
+
}
|
|
1692
|
+
|
|
1693
|
+
function resolveTemplateRuntimeConfig() {
|
|
1694
|
+
const candidates = [
|
|
1695
|
+
fileURLToPath(new URL("../template.config.json", import.meta.url)),
|
|
1696
|
+
path.resolve(process.cwd(), "templates/template_0000/template.config.json"),
|
|
1697
|
+
];
|
|
1698
|
+
|
|
1699
|
+
for (const candidate of candidates) {
|
|
1700
|
+
if (!existsSync(candidate)) {
|
|
1701
|
+
continue;
|
|
1702
|
+
}
|
|
1703
|
+
return JSON.parse(readFileSync(candidate, "utf8")) as {
|
|
1704
|
+
remotion?: {
|
|
1705
|
+
serve_url?: string;
|
|
1706
|
+
};
|
|
1707
|
+
};
|
|
1708
|
+
}
|
|
1709
|
+
|
|
1710
|
+
return {};
|
|
1711
|
+
}
|
|
1712
|
+
|
|
1713
|
+
function resolveSkillPath() {
|
|
1714
|
+
const builtPath = fileURLToPath(new URL("../SKILL.md", import.meta.url));
|
|
1715
|
+
if (existsSync(builtPath)) {
|
|
1716
|
+
return builtPath;
|
|
1717
|
+
}
|
|
1718
|
+
return path.resolve(process.cwd(), "templates/template_0000/SKILL.md");
|
|
1719
|
+
}
|
|
1720
|
+
|
|
1721
|
+
function resolveOverlayFonts() {
|
|
1722
|
+
const resolved = Object.fromEntries(
|
|
1723
|
+
TEMPLATE_FONT_OPTIONS.map((option) => {
|
|
1724
|
+
const candidates = [
|
|
1725
|
+
fileURLToPath(
|
|
1726
|
+
new URL(`../assets/${option.assetFile}`, import.meta.url),
|
|
1727
|
+
),
|
|
1728
|
+
path.resolve(
|
|
1729
|
+
process.cwd(),
|
|
1730
|
+
`templates/template_0000/assets/${option.assetFile}`,
|
|
1731
|
+
),
|
|
1732
|
+
];
|
|
1733
|
+
|
|
1734
|
+
for (const candidate of candidates) {
|
|
1735
|
+
if (existsSync(candidate)) {
|
|
1736
|
+
return [option.id, { path: candidate }];
|
|
1737
|
+
}
|
|
1738
|
+
}
|
|
1739
|
+
|
|
1740
|
+
throw new Error(
|
|
1741
|
+
`Could not resolve local overlay font asset for ${option.label}.`,
|
|
1742
|
+
);
|
|
1743
|
+
}),
|
|
1744
|
+
);
|
|
1745
|
+
|
|
1746
|
+
return resolved as Record<TemplateFontId, { path: string }>;
|
|
1747
|
+
}
|