@mevdragon/vidfarm-devcli 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.env.example +6 -39
  2. package/GETTING_STARTED.developers.md +87 -0
  3. package/README.md +94 -238
  4. package/SKILL.developer.md +430 -104
  5. package/dist/src/account-pages.js +1 -1
  6. package/dist/src/app.js +93 -5
  7. package/dist/src/cli.js +456 -8
  8. package/dist/src/config.js +3 -2
  9. package/dist/src/context.js +30 -11
  10. package/dist/src/db.js +2 -57
  11. package/dist/src/dev-app.js +0 -1
  12. package/dist/src/index.js +4 -2
  13. package/dist/src/lib/template-paths.js +21 -0
  14. package/dist/src/runtime.js +3 -1
  15. package/dist/src/services/auth.js +4 -4
  16. package/dist/src/services/job-logs.js +186 -0
  17. package/dist/src/services/jobs.js +3 -2
  18. package/dist/src/services/providers.js +14 -6
  19. package/dist/src/services/storage.js +85 -2
  20. package/dist/src/services/template-sources.js +29 -3
  21. package/dist/templates/template_0000/src/lib/images.js +46 -86
  22. package/dist/templates/template_0000/src/template.js +277 -53
  23. package/package.json +5 -6
  24. package/templates/template_0000/README.md +8 -52
  25. package/templates/template_0000/SKILL.md +35 -3
  26. package/templates/template_0000/package.json +3 -6
  27. package/templates/template_0000/src/lib/images.js +46 -86
  28. package/templates/template_0000/src/lib/images.ts +55 -98
  29. package/templates/template_0000/src/template-dna.js +9 -0
  30. package/templates/template_0000/src/template.js +523 -199
  31. package/templates/template_0000/src/template.ts +356 -61
  32. package/templates/template_0000/template.config.json +7 -12
  33. package/AWS_REMOTION_HANDOFF.md +0 -311
  34. package/PLATFORM_SPEC.md +0 -1039
  35. package/SKILL.director.md +0 -599
  36. package/dist/infra/cdk/bin/vidfarm-prod.js +0 -59
  37. package/dist/infra/cdk/lib/vidfarm-prod-stack.js +0 -212
  38. package/templates/template_0000/package-lock.json +0 -5505
  39. package/templates/template_0000/scripts/create-site.mjs +0 -27
  40. package/templates/template_0000/scripts/render-cloud.mjs +0 -72
@@ -4,63 +4,113 @@ import path from "node:path";
4
4
  import { fileURLToPath } from "node:url";
5
5
  import sharp from "sharp";
6
6
  import { normalizeToPortraitFrame } from "./lib/images.js";
7
- import { defineTemplate } from "./sdk.js";
8
- import { TEMPLATE_FONT_IDS, TEMPLATE_FONT_OPTIONS, TEMPLATE_TEXT_BACKGROUND_COLOR_IDS, TEMPLATE_TEXT_BACKGROUND_COLOR_OPTIONS } from "./style-options.js";
9
- const TEMPLATE_ID = "template-0000";
7
+ import { defineTemplate, } from "./sdk.js";
8
+ import { TEMPLATE_FONT_IDS, TEMPLATE_FONT_OPTIONS, TEMPLATE_TEXT_BACKGROUND_COLOR_IDS, TEMPLATE_TEXT_BACKGROUND_COLOR_OPTIONS, } from "./style-options.js";
9
+ import { templateLinkToOriginal, templateVisualDna, templateViralDna } from "./template-dna.js";
10
+ const TEMPLATE_ID = "4c7a7e1a-7f35-4f30-9f86-9c8a63c7f2db";
11
+ const TEMPLATE_SLUG_ID = "template_0000";
10
12
  const COMPOSITION_ID = "template-0000";
13
+ const TEMPLATE_PREVIEW_MEDIA = [
14
+ "templates/template-0000/about/preview-01.jpg",
15
+ ];
11
16
  const FRAME = { width: 1080, height: 1920 };
12
17
  const TIKTOK_SAFE_AREA = {
13
18
  left: 78,
14
19
  right: 248,
15
20
  top: 196,
16
- bottom: 408
21
+ bottom: 408,
17
22
  };
18
23
  const NATIVE_UI_TEXT_ZONE = {
19
24
  minCenterXPercent: 0.38,
20
25
  maxCenterXPercent: 0.62,
21
26
  minCenterYPercent: 0.2,
22
- maxCenterYPercent: 0.58
27
+ maxCenterYPercent: 0.58,
23
28
  };
24
29
  const TEXT_BACKGROUND_NONE = "none";
30
+ // Documentation-only dependency map for humans and agents.
31
+ // The current template standard has no first-class dependencies field, so
32
+ // provider/model requirements must be declared in source and SKILL.md.
33
+ const TEMPLATE_PROVIDER_REQUIREMENTS = {
34
+ image: [
35
+ { provider: "openai", models: ["gpt-image-1", "gpt-image-2"], strict: false },
36
+ { provider: "gemini", models: ["gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"], strict: false },
37
+ { provider: "openrouter", models: ["bytedance/seedance-2.0", "bytedance-seed/seedream-4.5"], strict: false, planned: true },
38
+ ],
39
+ text: [
40
+ { provider: "openai", models: ["gpt-5.4"], strict: false },
41
+ { provider: "gemini", models: ["gemini-3.1-flash-lite", "gemini-2.5-flash-lite"], strict: false },
42
+ { provider: "openrouter", models: ["qwen/qwen3.6-flash"], strict: false },
43
+ ],
44
+ layout_analysis: [
45
+ { provider: "openai", models: ["gpt-5.4"], strict: false },
46
+ { provider: "gemini", models: ["gemini-3.1-flash-lite", "gemini-2.5-flash-lite"], strict: false },
47
+ { provider: "openrouter", models: ["qwen/qwen3.6-flash"], strict: false },
48
+ ],
49
+ video: [
50
+ { provider: "openai", models: ["sora-2"], strict: false, planned: true },
51
+ { provider: "gemini", models: ["veo-3.0-generate-001"], strict: false, planned: true },
52
+ ],
53
+ };
54
+ void TEMPLATE_PROVIDER_REQUIREMENTS;
25
55
  const legacySlideInputSchema = z.union([
26
56
  z.tuple([z.string().min(3), z.string().min(1)]),
27
- z.tuple([z.string().min(3), z.string().min(1), z.number().int().min(500).max(30000)])
57
+ z.tuple([
58
+ z.string().min(3),
59
+ z.string().min(1),
60
+ z.number().int().min(500).max(30000),
61
+ ]),
28
62
  ]);
29
63
  const structuredSlideInputSchema = z.object({
30
64
  image_prompt: z.string().min(3),
31
65
  image_prompt_attachments: z.array(z.string().url()).max(12).default([]),
32
66
  caption: z.string().min(1),
33
- duration_ms: z.number().int().min(500).max(30000).default(4000)
67
+ duration_ms: z.number().int().min(500).max(30000).default(4000),
34
68
  });
35
- const slideInputSchema = z.union([legacySlideInputSchema, structuredSlideInputSchema]);
69
+ const slideInputSchema = z.union([
70
+ legacySlideInputSchema,
71
+ structuredSlideInputSchema,
72
+ ]);
36
73
  const createSlideshowInputSchema = z.object({
37
74
  slides: z.array(slideInputSchema).min(1).max(20),
38
- meta_details_prompt: z.string().min(8).max(4000).optional()
75
+ meta_details_prompt: z.string().min(8).max(4000).optional(),
39
76
  });
40
77
  const renderSlideSchema = z.object({
41
78
  imageUrl: z.string().url(),
42
- durationMs: z.number().int().min(500).max(30000)
79
+ durationMs: z.number().int().min(500).max(30000),
43
80
  });
44
81
  const renderVideoInputSchema = z.object({
45
- slides: z.array(renderSlideSchema).min(1).max(20)
82
+ slides: z.array(renderSlideSchema).min(1).max(20),
46
83
  });
47
84
  const supportedImageProviders = ["openai", "gemini", "openrouter"];
48
85
  const remotionEntryPoint = resolveRemotionEntryPoint();
49
- const remotionRuntimeConfig = resolveTemplateRemotionRuntimeConfig();
86
+ const templateRuntimeConfig = resolveTemplateRuntimeConfig();
87
+ const remotionRuntimeConfig = {
88
+ serveUrl: templateRuntimeConfig.remotion?.serve_url ?? undefined,
89
+ };
50
90
  const skillPath = resolveSkillPath();
51
91
  const overlayFonts = resolveOverlayFonts();
52
92
  export const template0000Definition = defineTemplate({
53
93
  id: TEMPLATE_ID,
94
+ slugId: TEMPLATE_SLUG_ID,
54
95
  version: "1.0.0",
55
- description: "Starter TikTok-safe slideshow template with staged image generation, text compositing, and optional Remotion video rendering.",
96
+ about: {
97
+ title: "Template 0000",
98
+ description: "Starter TikTok-safe slideshow template with staged image generation, text compositing, and optional Remotion video rendering.",
99
+ viral_dna: templateViralDna,
100
+ visual_dna: templateVisualDna,
101
+ preview_media: [...TEMPLATE_PREVIEW_MEDIA],
102
+ link_to_original: templateLinkToOriginal,
103
+ },
56
104
  skillPath,
57
105
  configSchema: z.object({
58
106
  defaultProvider: z.enum(supportedImageProviders).default("gemini"),
59
107
  textModel: z.string().optional(),
60
108
  imageModel: z.string().optional(),
61
109
  captionFont: z.enum(TEMPLATE_FONT_IDS).default("montserrat"),
62
- captionBackgroundColor: z.enum([TEXT_BACKGROUND_NONE, ...TEMPLATE_TEXT_BACKGROUND_COLOR_IDS]).default(TEXT_BACKGROUND_NONE),
63
- renderCompositionId: z.string().default(COMPOSITION_ID)
110
+ captionBackgroundColor: z
111
+ .enum([TEXT_BACKGROUND_NONE, ...TEMPLATE_TEXT_BACKGROUND_COLOR_IDS])
112
+ .default(TEXT_BACKGROUND_NONE),
113
+ renderCompositionId: z.string().default(COMPOSITION_ID),
64
114
  }),
65
115
  operations: {
66
116
  create_slideshow: {
@@ -73,13 +123,15 @@ export const template0000Definition = defineTemplate({
73
123
  ["a founder at a desk", "Launch faster", 2400],
74
124
  {
75
125
  image_prompt: "close-up product photo",
76
- image_prompt_attachments: ["https://images.unsplash.com/photo-1520607162513-77705c0f0d4a?auto=format&fit=crop&w=1080&q=80"],
126
+ image_prompt_attachments: [
127
+ "https://images.unsplash.com/photo-1520607162513-77705c0f0d4a?auto=format&fit=crop&w=1080&q=80",
128
+ ],
77
129
  caption: "Built in-house",
78
- duration_ms: 3200
79
- }
130
+ duration_ms: 3200,
131
+ },
80
132
  ],
81
- meta_details_prompt: "Target US TikTok skincare and startup audiences. Make the hook curiosity-driven, natural, and UGC-native."
82
- }
133
+ meta_details_prompt: "Target US TikTok skincare and startup audiences. Make the hook curiosity-driven, natural, and UGC-native.",
134
+ },
83
135
  },
84
136
  render_video: {
85
137
  description: "Turn finished slideshow frames into an auto-advancing vertical video using Remotion.",
@@ -88,36 +140,60 @@ export const template0000Definition = defineTemplate({
88
140
  webhookSupport: true,
89
141
  smokeTestPayload: {
90
142
  slides: [
91
- { imageUrl: "https://images.unsplash.com/photo-1519389950473-47ba0277781c?auto=format&fit=crop&w=1080&q=80", durationMs: 2400 },
92
- { imageUrl: "https://images.unsplash.com/photo-1520607162513-77705c0f0d4a?auto=format&fit=crop&w=1080&q=80", durationMs: 3200 }
93
- ]
94
- }
95
- }
143
+ {
144
+ imageUrl: "https://images.unsplash.com/photo-1519389950473-47ba0277781c?auto=format&fit=crop&w=1080&q=80",
145
+ durationMs: 2400,
146
+ },
147
+ {
148
+ imageUrl: "https://images.unsplash.com/photo-1520607162513-77705c0f0d4a?auto=format&fit=crop&w=1080&q=80",
149
+ durationMs: 3200,
150
+ },
151
+ ],
152
+ },
153
+ },
96
154
  },
97
155
  jobs: {
98
156
  async createSlideshowWorkflow(ctx, input) {
99
157
  const payload = createSlideshowInputSchema.parse(input);
100
158
  ctx.logger.progress(0.04, "Starting template_0000 slideshow image stage");
101
159
  const provider = parseImageProvider(ctx.templateConfig.defaultProvider);
102
- const textModel = String(ctx.templateConfig.textModel ?? defaultTextModelForProvider(provider));
103
- const imageModel = String(ctx.templateConfig.imageModel ?? defaultImageModelForProvider(provider));
160
+ const configuredTextModel = typeof ctx.templateConfig.textModel === "string"
161
+ ? ctx.templateConfig.textModel
162
+ : null;
163
+ const configuredImageModel = typeof ctx.templateConfig.imageModel === "string"
164
+ ? ctx.templateConfig.imageModel
165
+ : null;
166
+ const textModel = String(configuredTextModel ?? defaultTextModelForProvider(provider));
167
+ const imageModel = String(configuredImageModel ?? defaultImageModelForProvider(provider));
104
168
  const textStyle = resolveTextStyleSpec(ctx.templateConfig);
105
169
  const slides = [];
106
170
  for (const [index, rawSlide] of payload.slides.entries()) {
107
- const { imagePrompt, imagePromptAttachments, overlayText, durationMs } = normalizeSlideInput(rawSlide);
108
- ctx.logger.progress(0.08 + (index / payload.slides.length) * 0.42, `Generating slide ${index + 1} image`);
109
- const image = await generatePortraitCandidateImage(ctx, {
110
- provider,
111
- imageModel,
112
- imagePrompt,
113
- imagePromptAttachments,
114
- overlayText
115
- });
116
- await ctx.billing.record({
117
- type: "ai_generation",
118
- costUsd: 0.04,
119
- metadata: { stage: "image_generation", slideIndex: index, model: imageModel }
120
- });
171
+ const { sourceType, sourceValue, imagePrompt, imagePromptAttachments, overlayText, durationMs, } = normalizeSlideInput(rawSlide);
172
+ ctx.logger.progress(0.08 + (index / payload.slides.length) * 0.42, sourceType === "image_source"
173
+ ? `Loading slide ${index + 1} source image`
174
+ : `Generating slide ${index + 1} image`);
175
+ const image = sourceType === "image_source"
176
+ ? await loadExistingSlideImage(sourceValue)
177
+ : await generatePortraitCandidateImage(ctx, {
178
+ provider,
179
+ imageModel,
180
+ configuredProvider: provider,
181
+ configuredImageModel,
182
+ imagePrompt,
183
+ imagePromptAttachments,
184
+ overlayText,
185
+ });
186
+ if (sourceType === "ai_prompt") {
187
+ await ctx.billing.record({
188
+ type: "ai_generation",
189
+ costUsd: 0.04,
190
+ metadata: {
191
+ stage: "image_generation",
192
+ slideIndex: index,
193
+ model: imageModel,
194
+ },
195
+ });
196
+ }
121
197
  ctx.logger.progress(0.13 + (index / payload.slides.length) * 0.3, `Normalizing slide ${index + 1} to strict 9:16 portrait`);
122
198
  const normalizedImage = await normalizeToPortraitFrame(image.bytes, FRAME);
123
199
  const backgroundArtifact = await ctx.storage.putBuffer(`backgrounds/slide-${pad2(index + 1)}.png`, normalizedImage.bytes, {
@@ -129,8 +205,8 @@ export const template0000Definition = defineTemplate({
129
205
  revisedPrompt: image.revisedPrompt,
130
206
  width: normalizedImage.width,
131
207
  height: normalizedImage.height,
132
- aspectRatio: "9:16"
133
- }
208
+ aspectRatio: "9:16",
209
+ },
134
210
  });
135
211
  ctx.logger.progress(0.18 + (index / payload.slides.length) * 0.28, `Scoring TikTok-safe text layout for slide ${index + 1}`);
136
212
  const layout = await chooseSlideLayout(ctx, {
@@ -138,7 +214,7 @@ export const template0000Definition = defineTemplate({
138
214
  model: textModel,
139
215
  imageBytes: normalizedImage.bytes,
140
216
  imageUrl: backgroundArtifact.url,
141
- overlayText
217
+ overlayText,
142
218
  });
143
219
  ctx.logger.progress(0.24 + (index / payload.slides.length) * 0.28, `Compositing caption into slide ${index + 1}`);
144
220
  const frameBytes = await renderFinishedSlide(normalizedImage.bytes, overlayText, layout, textStyle);
@@ -150,11 +226,13 @@ export const template0000Definition = defineTemplate({
150
226
  overlayText,
151
227
  layout,
152
228
  aspectRatio: "9:16",
153
- kind: "finished_slide_frame"
154
- }
229
+ kind: "finished_slide_frame",
230
+ },
155
231
  });
156
232
  slides.push({
157
233
  index,
234
+ sourceType,
235
+ sourceValue,
158
236
  imagePrompt,
159
237
  imagePromptAttachments,
160
238
  overlayText,
@@ -163,20 +241,22 @@ export const template0000Definition = defineTemplate({
163
241
  frameImageUrl: frameArtifact.url,
164
242
  prompt: image.prompt,
165
243
  revisedPrompt: image.revisedPrompt,
166
- layout
244
+ layout,
167
245
  });
168
246
  }
169
247
  ctx.logger.progress(0.72, "Generating post metadata");
170
248
  const metaDetails = await generateMetaDetails(ctx, {
171
249
  provider,
172
250
  textModel,
251
+ configuredProvider: provider,
252
+ configuredTextModel,
173
253
  slides,
174
- metaDetailsPrompt: payload.meta_details_prompt
254
+ metaDetailsPrompt: payload.meta_details_prompt,
175
255
  });
176
256
  await ctx.billing.record({
177
257
  type: "ai_generation",
178
258
  costUsd: 0.01,
179
- metadata: { stage: "meta_details_generation", model: textModel }
259
+ metadata: { stage: "meta_details_generation", model: textModel },
180
260
  });
181
261
  ctx.logger.progress(0.78, "Saving slideshow manifest");
182
262
  const manifest = {
@@ -190,19 +270,21 @@ export const template0000Definition = defineTemplate({
190
270
  backgroundColorHex: textStyle.backgroundColorHex,
191
271
  shadow: "soft native TikTok-style black drop shadow",
192
272
  availableFonts: TEMPLATE_FONT_OPTIONS,
193
- availableBackgroundColors: TEMPLATE_TEXT_BACKGROUND_COLOR_OPTIONS
273
+ availableBackgroundColors: TEMPLATE_TEXT_BACKGROUND_COLOR_OPTIONS,
194
274
  },
195
275
  safeArea: TIKTOK_SAFE_AREA,
196
276
  metaDetails,
197
- slides
277
+ slides,
198
278
  };
199
279
  const manifestArtifact = await ctx.storage.putJson(`manifests/${TEMPLATE_ID}.json`, manifest);
200
- const files = slides.flatMap((slide) => [slide.backgroundImageUrl, slide.frameImageUrl]).filter((value) => Boolean(value));
280
+ const files = slides
281
+ .flatMap((slide) => [slide.backgroundImageUrl, slide.frameImageUrl])
282
+ .filter((value) => Boolean(value));
201
283
  if (manifestArtifact.url) {
202
284
  files.push(manifestArtifact.url);
203
285
  }
204
286
  ctx.logger.progress(1, "Slideshow images complete", {
205
- fileCount: files.length
287
+ fileCount: files.length,
206
288
  });
207
289
  return {
208
290
  progress: 1,
@@ -215,19 +297,26 @@ export const template0000Definition = defineTemplate({
215
297
  meta_details_prompt: payload.meta_details_prompt ?? null,
216
298
  slides: slides.map((slide) => ({
217
299
  index: slide.index,
300
+ sourceType: slide.sourceType,
301
+ sourceValue: slide.sourceValue,
218
302
  imagePrompt: slide.imagePrompt,
219
303
  imagePromptAttachments: slide.imagePromptAttachments,
220
304
  overlayText: slide.overlayText,
221
- durationMs: slide.durationMs
222
- }))
305
+ durationMs: slide.durationMs,
306
+ })),
223
307
  },
224
308
  renderVideoInput: {
225
309
  slides: slides
226
- .map((slide) => slide.frameImageUrl ? { imageUrl: slide.frameImageUrl, durationMs: slide.durationMs } : null)
227
- .filter((slide) => Boolean(slide))
310
+ .map((slide) => slide.frameImageUrl
311
+ ? {
312
+ imageUrl: slide.frameImageUrl,
313
+ durationMs: slide.durationMs,
314
+ }
315
+ : null)
316
+ .filter((slide) => Boolean(slide)),
228
317
  },
229
- slides
230
- }
318
+ slides,
319
+ },
231
320
  };
232
321
  },
233
322
  async renderVideoWorkflow(ctx, input) {
@@ -243,13 +332,13 @@ export const template0000Definition = defineTemplate({
243
332
  outputKey: "renders/final.mp4",
244
333
  inputProps: {
245
334
  slides,
246
- fps: 30
247
- }
335
+ fps: 30,
336
+ },
248
337
  });
249
338
  await ctx.billing.record({
250
339
  type: "render",
251
340
  costUsd: 0.35,
252
- metadata: render.metadata
341
+ metadata: render.metadata,
253
342
  });
254
343
  const files = slides.map((slide) => slide.imageUrl);
255
344
  if (render.outputUrl) {
@@ -257,18 +346,18 @@ export const template0000Definition = defineTemplate({
257
346
  }
258
347
  ctx.logger.progress(1, "Slideshow video complete", {
259
348
  fileCount: files.length,
260
- renderId: render.renderId
349
+ renderId: render.renderId,
261
350
  });
262
351
  return {
263
352
  progress: 1,
264
353
  output: {
265
354
  files,
266
355
  render,
267
- slides
268
- }
356
+ slides,
357
+ },
269
358
  };
270
- }
271
- }
359
+ },
360
+ },
272
361
  });
273
362
  const IMAGE_PROMPT_SYSTEM_WRAPPER = [
274
363
  "Create an exact 9:16 portrait slideshow image for a TikTok-style vertical video.",
@@ -287,18 +376,20 @@ const IMAGE_PROMPT_SYSTEM_WRAPPER = [
287
376
  "That reserved lane is empty space only. Do not render the overlay or any placeholder text.",
288
377
  "Do not render any phone UI, social media UI, app chrome, search bars, tabs, buttons, likes, comments, avatars, battery icons, status bars, frames, stickers, or interface elements.",
289
378
  "Do not render any words, letters, captions, subtitles, titles, logos, signage, labels, watermarks, or typography inside the image.",
290
- "The final image must contain zero visible text.",
291
- "Use cinematic lighting, crisp detail, and framing that already fits an exact 9:16 portrait frame.",
292
- "Avoid giant blank ceiling, wall, floor, or sky bands that make the frame feel padded or zoomed out."
379
+ "The final image must contain zero visible text unless part of the scene itself.",
380
+ "Use cinematic lighting, crisp detail, and framing that already fits an exact 9:16 portrait frame, unless otherwise specified.",
381
+ "Avoid giant blank ceiling, wall, floor, or sky bands that make the frame feel padded or zoomed out.",
293
382
  ].join("\n");
294
383
  function buildImagePrompt(imagePrompt, overlayText, imagePromptAttachments = []) {
295
384
  return [
296
385
  IMAGE_PROMPT_SYSTEM_WRAPPER,
297
386
  `User visual prompt: ${imagePrompt}`,
298
387
  ...(imagePromptAttachments.length
299
- ? [`Use the attached reference media as visual guidance for subject matter, style, composition, or product details, but still return one fresh final image.`]
388
+ ? [
389
+ `Use the attached reference media as visual guidance for subject matter, style, composition, or product details, but still return one fresh final image.`,
390
+ ]
300
391
  : []),
301
- `Reserve room for a short editorial overlay approximately ${overlayText.length} characters long, but do not render the overlay text itself.`
392
+ `Reserve room for a short editorial overlay approximately ${overlayText.length} characters long, but do not render the overlay text itself.`,
302
393
  ].join("\n");
303
394
  }
304
395
  async function generatePortraitCandidateImage(ctx, input) {
@@ -310,44 +401,61 @@ async function generatePortraitCandidateImage(ctx, input) {
310
401
  "Return a true full-frame mobile portrait composition.",
311
402
  "Do not create a landscape scene floating inside a vertical canvas.",
312
403
  "The subject and horizon should already be composed for 9:16 viewing.",
313
- "Tighter framing is preferred over wide empty margins."
404
+ "Tighter framing is preferred over wide empty margins.",
314
405
  ].join("\n"),
315
406
  [
316
407
  buildImagePrompt(input.imagePrompt, input.overlayText, input.imagePromptAttachments),
317
408
  "Critical correction: return an unmistakably vertical composition.",
318
409
  "Push the camera closer if needed so the frame reads as native 9:16 portrait.",
319
410
  "No inset landscape window, no postcard composition, no wide aerial framing.",
320
- "Use a clean magazine-style composition with one obvious text lane."
411
+ "Use a clean magazine-style composition with one obvious text lane.",
321
412
  ].join("\n"),
322
413
  [
323
414
  buildImagePrompt(input.imagePrompt, input.overlayText, input.imagePromptAttachments),
324
415
  "Critical correction: the previous output likely had empty padding or weak mobile framing.",
325
416
  "Fill the frame with a real handheld-phone portrait composition.",
326
417
  "No white bars, no blank top or bottom bands, and no subject floating in the middle of excess space.",
327
- "Crop decisively and favor a native social-video camera distance."
328
- ].join("\n")
418
+ "Crop decisively and favor a native social-video camera distance.",
419
+ ].join("\n"),
329
420
  ];
330
421
  let best = null;
331
- for (const prompt of attempts) {
332
- const image = await ctx.providers.generateImage({
333
- provider: input.provider,
334
- model: input.imageModel,
335
- prompt,
336
- promptAttachments: input.imagePromptAttachments,
337
- size: sourceImageSizeForProvider(input.provider),
338
- aspectRatio: "9:16",
339
- imageSize: sourceImageOutputSizeForProvider(input.provider, input.imageModel)
340
- });
341
- const score = await portraitScore(image.bytes);
342
- if (!best || score < best.score) {
343
- best = { ...image, prompt, score };
422
+ let lastError = null;
423
+ for (const provider of providerFailoverOrder(input.provider)) {
424
+ const imageModel = resolveImageModelForAttempt(provider, input.configuredProvider, input.configuredImageModel);
425
+ try {
426
+ for (const prompt of attempts) {
427
+ const image = await ctx.providers.generateImage({
428
+ provider,
429
+ model: imageModel,
430
+ prompt,
431
+ promptAttachments: input.imagePromptAttachments,
432
+ size: sourceImageSizeForProvider(provider),
433
+ aspectRatio: "9:16",
434
+ imageSize: sourceImageOutputSizeForProvider(provider, imageModel),
435
+ });
436
+ const score = await portraitScore(image.bytes);
437
+ if (!best || score < best.score) {
438
+ best = { ...image, prompt, score };
439
+ }
440
+ if (score <= 0.16) {
441
+ return best;
442
+ }
443
+ }
444
+ if (best) {
445
+ return best;
446
+ }
344
447
  }
345
- if (score <= 0.16) {
346
- break;
448
+ catch (error) {
449
+ lastError = error instanceof Error ? error : new Error(String(error));
450
+ ctx.logger.warn("Image generation provider failed, trying next provider", {
451
+ provider,
452
+ model: imageModel,
453
+ message: lastError.message,
454
+ });
347
455
  }
348
456
  }
349
457
  if (!best) {
350
- throw new Error("No image candidate was generated.");
458
+ throw lastError ?? new Error("No image candidate was generated.");
351
459
  }
352
460
  return best;
353
461
  }
@@ -358,7 +466,7 @@ function defaultLayout() {
358
466
  maxWidthPercent: 58,
359
467
  anchorXPercent: 0.5,
360
468
  anchorYPercent: 0.28,
361
- justification: "Default placement uses a centered TikTok-native text zone in the middle third of the safe frame."
469
+ justification: "Default placement uses a centered TikTok-native text zone in the middle third of the safe frame.",
362
470
  };
363
471
  }
364
472
  function parseImageProvider(value) {
@@ -368,11 +476,12 @@ function parseImageProvider(value) {
368
476
  return "gemini";
369
477
  }
370
478
  function isImageProvider(value) {
371
- return typeof value === "string" && supportedImageProviders.includes(value);
479
+ return (typeof value === "string" &&
480
+ supportedImageProviders.includes(value));
372
481
  }
373
482
  function sourceImageSizeForProvider(provider) {
374
483
  if (provider === "openai") {
375
- return "1024x1792";
484
+ return "1024x1536";
376
485
  }
377
486
  return "1080x1920";
378
487
  }
@@ -383,7 +492,7 @@ function defaultImageModelForProvider(provider) {
383
492
  if (provider === "openrouter") {
384
493
  return "bytedance-seed/seedream-4.5";
385
494
  }
386
- return "gemini-3.1-flash-image-preview";
495
+ return "gemini-2.5-flash-image";
387
496
  }
388
497
  function defaultTextModelForProvider(provider) {
389
498
  if (provider === "openai") {
@@ -401,32 +510,56 @@ function sourceImageOutputSizeForProvider(provider, model) {
401
510
  if (provider !== "gemini") {
402
511
  return undefined;
403
512
  }
404
- if (model === "gemini-3.1-flash-image-preview" || model === "gemini-3-pro-image-preview") {
513
+ if (model === "gemini-3-pro-image-preview") {
405
514
  return "1K";
406
515
  }
407
516
  return undefined;
408
517
  }
518
+ function providerFailoverOrder(primary) {
519
+ return [
520
+ primary,
521
+ ...supportedImageProviders.filter((provider) => provider !== primary),
522
+ ];
523
+ }
524
+ function resolveImageModelForAttempt(provider, configuredProvider, configuredImageModel) {
525
+ if (provider === configuredProvider && configuredImageModel) {
526
+ return configuredImageModel;
527
+ }
528
+ return defaultImageModelForProvider(provider);
529
+ }
530
+ function resolveTextModelForAttempt(provider, configuredProvider, configuredTextModel) {
531
+ if (provider === configuredProvider && configuredTextModel) {
532
+ return configuredTextModel;
533
+ }
534
+ return defaultTextModelForProvider(provider);
535
+ }
409
536
  function resolveTextStyleSpec(config) {
410
- const fontId = isTemplateFontId(config.captionFont) ? config.captionFont : "montserrat";
411
- const backgroundColorId = config.captionBackgroundColor === TEXT_BACKGROUND_NONE || isTemplateBackgroundColorId(config.captionBackgroundColor)
537
+ const fontId = isTemplateFontId(config.captionFont)
538
+ ? config.captionFont
539
+ : "montserrat";
540
+ const backgroundColorId = config.captionBackgroundColor === TEXT_BACKGROUND_NONE ||
541
+ isTemplateBackgroundColorId(config.captionBackgroundColor)
412
542
  ? config.captionBackgroundColor
413
543
  : TEXT_BACKGROUND_NONE;
414
- const fontOption = TEMPLATE_FONT_OPTIONS.find((option) => option.id === fontId) ?? TEMPLATE_FONT_OPTIONS[1];
544
+ const fontOption = TEMPLATE_FONT_OPTIONS.find((option) => option.id === fontId) ??
545
+ TEMPLATE_FONT_OPTIONS[1];
415
546
  const backgroundOption = backgroundColorId && backgroundColorId !== TEXT_BACKGROUND_NONE
416
- ? TEMPLATE_TEXT_BACKGROUND_COLOR_OPTIONS.find((option) => option.id === backgroundColorId) ?? null
547
+ ? (TEMPLATE_TEXT_BACKGROUND_COLOR_OPTIONS.find((option) => option.id === backgroundColorId) ?? null)
417
548
  : null;
418
549
  return {
419
550
  fontId,
420
551
  fontFamily: fontOption.family,
421
552
  backgroundColorId: backgroundColorId ?? TEXT_BACKGROUND_NONE,
422
- backgroundColorHex: backgroundOption?.hex ?? null
553
+ backgroundColorHex: backgroundOption?.hex ?? null,
423
554
  };
424
555
  }
425
556
  function isTemplateFontId(value) {
426
- return typeof value === "string" && TEMPLATE_FONT_IDS.includes(value);
557
+ return (typeof value === "string" &&
558
+ TEMPLATE_FONT_IDS.includes(value));
427
559
  }
428
560
  function isTemplateBackgroundColorId(value) {
429
- return typeof value === "string" && TEMPLATE_TEXT_BACKGROUND_COLOR_IDS.includes(value);
561
+ return (typeof value === "string" &&
562
+ TEMPLATE_TEXT_BACKGROUND_COLOR_IDS.includes(value));
430
563
  }
431
564
  function pad2(value) {
432
565
  return String(value).padStart(2, "0");
@@ -439,42 +572,47 @@ const slideshowManifestSchema = z.object({
439
572
  size: z.object({
440
573
  width: z.number(),
441
574
  height: z.number(),
442
- aspectRatio: z.literal("9:16")
575
+ aspectRatio: z.literal("9:16"),
443
576
  }),
444
577
  textStyle: z.object({
445
578
  fontId: z.enum(TEMPLATE_FONT_IDS),
446
579
  fontFamily: z.string(),
447
580
  fill: z.string(),
448
- backgroundColorId: z.enum([TEXT_BACKGROUND_NONE, ...TEMPLATE_TEXT_BACKGROUND_COLOR_IDS]),
581
+ backgroundColorId: z.enum([
582
+ TEXT_BACKGROUND_NONE,
583
+ ...TEMPLATE_TEXT_BACKGROUND_COLOR_IDS,
584
+ ]),
449
585
  backgroundColorHex: z.string().nullable(),
450
586
  shadow: z.string(),
451
587
  availableFonts: z.array(z.object({
452
588
  id: z.enum(TEMPLATE_FONT_IDS),
453
589
  label: z.string(),
454
590
  family: z.string(),
455
- assetFile: z.string()
591
+ assetFile: z.string(),
456
592
  })),
457
593
  availableBackgroundColors: z.array(z.object({
458
594
  id: z.enum(TEMPLATE_TEXT_BACKGROUND_COLOR_IDS),
459
595
  label: z.string(),
460
- hex: z.string()
461
- }))
596
+ hex: z.string(),
597
+ })),
462
598
  }),
463
599
  safeArea: z.object({
464
600
  left: z.number(),
465
601
  right: z.number(),
466
602
  top: z.number(),
467
- bottom: z.number()
603
+ bottom: z.number(),
468
604
  }),
469
605
  metaDetails: z.object({
470
606
  title: z.string(),
471
607
  description: z.string(),
472
608
  pinned_comment: z.string(),
473
609
  location: z.string(),
474
- song: z.string()
610
+ song: z.string(),
475
611
  }),
476
612
  slides: z.array(z.object({
477
613
  index: z.number(),
614
+ sourceType: z.enum(["ai_prompt", "image_source"]),
615
+ sourceValue: z.string(),
478
616
  imagePrompt: z.string(),
479
617
  imagePromptAttachments: z.array(z.string().url()),
480
618
  overlayText: z.string(),
@@ -489,35 +627,160 @@ const slideshowManifestSchema = z.object({
489
627
  maxWidthPercent: z.number(),
490
628
  anchorXPercent: z.number().optional(),
491
629
  anchorYPercent: z.number().optional(),
492
- justification: z.string()
493
- })
494
- }))
630
+ justification: z.string(),
631
+ }),
632
+ })),
495
633
  });
496
634
  function normalizeSlideInput(input) {
497
635
  if (Array.isArray(input)) {
498
- return {
499
- imagePrompt: input[0],
636
+ return normalizeSlideFields({
637
+ primarySource: input[0],
500
638
  imagePromptAttachments: [],
501
639
  overlayText: input[1],
502
- durationMs: input[2] ?? 4000
503
- };
640
+ durationMs: input[2] ?? 4000,
641
+ });
504
642
  }
505
- return {
506
- imagePrompt: input.image_prompt,
643
+ return normalizeSlideFields({
644
+ primarySource: input.image_prompt,
507
645
  imagePromptAttachments: input.image_prompt_attachments,
508
646
  overlayText: input.caption,
509
- durationMs: input.duration_ms
647
+ durationMs: input.duration_ms,
648
+ });
649
+ }
650
+ function normalizeSlideFields(input) {
651
+ const sourceValue = input.primarySource.trim();
652
+ const existingImageSource = resolveExistingImageSource(sourceValue);
653
+ return {
654
+ sourceType: existingImageSource ? "image_source" : "ai_prompt",
655
+ sourceValue: existingImageSource ?? sourceValue,
656
+ imagePrompt: sourceValue,
657
+ imagePromptAttachments: input.imagePromptAttachments,
658
+ overlayText: input.overlayText,
659
+ durationMs: input.durationMs,
660
+ };
661
+ }
662
+ async function loadExistingSlideImage(sourceValue) {
663
+ const resolved = resolveExistingImageSource(sourceValue);
664
+ if (!resolved) {
665
+ throw new Error(`Slide source is not a supported image URL or file path: ${sourceValue}`);
666
+ }
667
+ const bytes = isLocalFileSource(resolved)
668
+ ? readFileSync(resolveLocalFileSourcePath(resolved))
669
+ : await fetchExternalImageBytes(resolved);
670
+ const contentType = await detectImageContentType(bytes);
671
+ return {
672
+ bytes,
673
+ contentType,
674
+ revisedPrompt: null,
675
+ prompt: sourceValue,
510
676
  };
511
677
  }
678
+ function resolveExistingImageSource(value) {
679
+ const trimmed = value.trim();
680
+ if (!trimmed) {
681
+ return null;
682
+ }
683
+ const parsedUrl = parseUrl(trimmed);
684
+ if (parsedUrl &&
685
+ ["http:", "https:", "file:", "data:"].includes(parsedUrl.protocol)) {
686
+ return trimmed;
687
+ }
688
+ const localPath = resolveLocalPathIfPresent(trimmed);
689
+ if (localPath) {
690
+ return localPath;
691
+ }
692
+ return null;
693
+ }
694
+ function parseUrl(value) {
695
+ try {
696
+ return new URL(value);
697
+ }
698
+ catch {
699
+ return null;
700
+ }
701
+ }
702
+ function resolveLocalPathIfPresent(value) {
703
+ const expandedPath = value.startsWith("~/")
704
+ ? path.join(process.env.HOME ?? "", value.slice(2))
705
+ : value;
706
+ const candidate = path.resolve(expandedPath);
707
+ return existsSync(candidate) ? candidate : null;
708
+ }
709
+ function isLocalFileSource(sourceValue) {
710
+ const parsedUrl = parseUrl(sourceValue);
711
+ return !parsedUrl || parsedUrl.protocol === "file:";
712
+ }
713
+ function resolveLocalFileSourcePath(sourceValue) {
714
+ const parsedUrl = parseUrl(sourceValue);
715
+ return parsedUrl?.protocol === "file:"
716
+ ? fileURLToPath(parsedUrl)
717
+ : path.resolve(sourceValue);
718
+ }
719
+ async function fetchExternalImageBytes(sourceValue) {
720
+ const response = await fetch(sourceValue);
721
+ if (!response.ok) {
722
+ throw new Error(`Could not fetch slide source image: ${response.status} ${response.statusText}`);
723
+ }
724
+ return new Uint8Array(await response.arrayBuffer());
725
+ }
726
+ async function detectImageContentType(bytes) {
727
+ try {
728
+ const metadata = await sharp(bytes).metadata();
729
+ if (metadata.format) {
730
+ return contentTypeForSharpFormat(metadata.format);
731
+ }
732
+ }
733
+ catch (error) {
734
+ throw new Error(`Slide source could not be decoded as an image: ${error instanceof Error ? error.message : String(error)}`);
735
+ }
736
+ return "image/png";
737
+ }
738
+ function contentTypeForSharpFormat(format) {
739
+ switch (format) {
740
+ case "jpeg":
741
+ return "image/jpeg";
742
+ case "png":
743
+ return "image/png";
744
+ case "webp":
745
+ return "image/webp";
746
+ case "gif":
747
+ return "image/gif";
748
+ case "avif":
749
+ return "image/avif";
750
+ case "tiff":
751
+ return "image/tiff";
752
+ case "svg":
753
+ return "image/svg+xml";
754
+ case "heif":
755
+ return "image/heif";
756
+ default:
757
+ return `image/${format}`;
758
+ }
759
+ }
512
760
  async function generateMetaDetails(ctx, input) {
513
761
  const prompt = buildMetaDetailsPrompt(input.slides, input.metaDetailsPrompt);
514
- const response = await ctx.providers.generateText({
515
- provider: input.provider,
516
- model: input.textModel,
517
- prompt,
518
- temperature: 0.7
519
- });
520
- return parseMetaDetailsResponse(response.text, input.slides);
762
+ let lastError = null;
763
+ for (const provider of providerFailoverOrder(input.provider)) {
764
+ const textModel = resolveTextModelForAttempt(provider, input.configuredProvider, input.configuredTextModel);
765
+ try {
766
+ const response = await ctx.providers.generateText({
767
+ provider,
768
+ model: textModel,
769
+ prompt,
770
+ temperature: 0.7,
771
+ });
772
+ return parseMetaDetailsResponse(response.text, input.slides);
773
+ }
774
+ catch (error) {
775
+ lastError = error instanceof Error ? error : new Error(String(error));
776
+ ctx.logger.warn("Metadata provider failed, trying next provider", {
777
+ provider,
778
+ model: textModel,
779
+ message: lastError.message,
780
+ });
781
+ }
782
+ }
783
+ throw lastError ?? new Error("No metadata provider succeeded.");
521
784
  }
522
785
  function buildMetaDetailsPrompt(slides, metaDetailsPrompt) {
523
786
  return [
@@ -531,28 +794,38 @@ function buildMetaDetailsPrompt(slides, metaDetailsPrompt) {
531
794
  "Keep song under 120 characters.",
532
795
  "Write concise, high-performing, native-feeling TikTok social copy.",
533
796
  "Use the slide captions and visual prompts to infer the post theme.",
534
- metaDetailsPrompt ? `Additional user guidance:\n${metaDetailsPrompt}` : "No extra user guidance was supplied.",
797
+ metaDetailsPrompt
798
+ ? `Additional user guidance:\n${metaDetailsPrompt}`
799
+ : "No extra user guidance was supplied.",
535
800
  "Slides:",
536
801
  ...slides.map((slide) => [
537
802
  `Slide ${slide.index + 1}:`,
538
803
  `caption=${slide.overlayText}`,
539
804
  `image_prompt=${slide.imagePrompt}`,
540
- slide.imagePromptAttachments.length ? `attachments=${slide.imagePromptAttachments.join(", ")}` : "attachments=none",
541
- `duration_ms=${slide.durationMs}`
542
- ].join("\n"))
805
+ slide.imagePromptAttachments.length
806
+ ? `attachments=${slide.imagePromptAttachments.join(", ")}`
807
+ : "attachments=none",
808
+ `duration_ms=${slide.durationMs}`,
809
+ ].join("\n")),
543
810
  ].join("\n\n");
544
811
  }
545
812
  function parseMetaDetailsResponse(raw, slides) {
546
813
  const fallback = buildFallbackMetaDetails(slides);
547
- const cleaned = raw.trim().replace(/^```json\s*/i, "").replace(/^```\s*/i, "").replace(/\s*```$/i, "");
814
+ const cleaned = raw
815
+ .trim()
816
+ .replace(/^```json\s*/i, "")
817
+ .replace(/^```\s*/i, "")
818
+ .replace(/\s*```$/i, "");
548
819
  try {
549
- const parsed = z.object({
820
+ const parsed = z
821
+ .object({
550
822
  title: z.string().min(1),
551
823
  description: z.string().min(1),
552
824
  pinned_comment: z.string().min(1),
553
825
  location: z.string().min(1),
554
- song: z.string().min(1)
555
- }).parse(JSON.parse(cleaned));
826
+ song: z.string().min(1),
827
+ })
828
+ .parse(JSON.parse(cleaned));
556
829
  return parsed;
557
830
  }
558
831
  catch {
@@ -567,7 +840,7 @@ function buildFallbackMetaDetails(slides) {
567
840
  description: truncateText(`${leadCaption}. ${supportingCaption}.`, 300),
568
841
  pinned_comment: truncateText(`Which slide hit hardest: "${leadCaption}" or "${supportingCaption}"?`, 120),
569
842
  location: "United States",
570
- song: "Original Sound"
843
+ song: "Original Sound",
571
844
  };
572
845
  }
573
846
  function truncateText(value, maxLength) {
@@ -578,10 +851,7 @@ function truncateText(value, maxLength) {
578
851
  }
579
852
  async function renderFinishedSlide(imageBytes, overlayText, layout, textStyle) {
580
853
  const layers = await buildTextLayers(overlayText, layout, textStyle);
581
- return sharp(Buffer.from(imageBytes))
582
- .composite(layers)
583
- .png()
584
- .toBuffer();
854
+ return sharp(Buffer.from(imageBytes)).composite(layers).png().toBuffer();
585
855
  }
586
856
  async function buildTextLayers(overlayText, layout, textStyle) {
587
857
  const safeWidth = FRAME.width - TIKTOK_SAFE_AREA.left - TIKTOK_SAFE_AREA.right;
@@ -598,7 +868,7 @@ async function buildTextLayers(overlayText, layout, textStyle) {
598
868
  blockX: x,
599
869
  blockY: y,
600
870
  layout,
601
- textStyle
871
+ textStyle,
602
872
  })))).flat();
603
873
  const clippedLayers = await Promise.all(rawLayers.map((layer) => clipCompositeLayer(layer)));
604
874
  return clippedLayers.filter((layer) => Boolean(layer));
@@ -608,7 +878,8 @@ function fitLinesToBlock(text, blockWidth, fontId) {
608
878
  let lineHeight = pickLineHeight(fontSize, fontId);
609
879
  let maxChars = approximateMaxChars(blockWidth, fontSize);
610
880
  let lines = wrapText(text, maxChars);
611
- while ((lines.length > 6 || longestLine(lines) > maxChars + 1) && fontSize > 34) {
881
+ while ((lines.length > 6 || longestLine(lines) > maxChars + 1) &&
882
+ fontSize > 34) {
612
883
  fontSize -= 3;
613
884
  lineHeight = pickLineHeight(fontSize, fontId);
614
885
  maxChars = approximateMaxChars(blockWidth, fontSize);
@@ -651,19 +922,19 @@ function pickFontSize(text, fontId) {
651
922
  }
652
923
  function pickLineHeight(fontSize, fontId) {
653
924
  if (fontId === "yesteryear") {
654
- return Math.round(fontSize * 0.88);
925
+ return Math.round(fontSize * 1.04);
655
926
  }
656
927
  if (fontId === "dm_serif_display") {
657
- return Math.round(fontSize * 0.98);
928
+ return Math.round(fontSize * 1.08);
658
929
  }
659
- return Math.round(fontSize * 0.94);
930
+ return Math.round(fontSize * 1.12);
660
931
  }
661
932
  function approximateMaxChars(blockWidth, fontSize) {
662
933
  return clamp(Math.floor(blockWidth / Math.max(20, fontSize * 0.42)), 12, 34);
663
934
  }
664
935
  async function buildLineLayers(input) {
665
936
  const lineTop = input.blockY + input.lineIndex * input.lineHeight;
666
- const shadowPadding = 24;
937
+ const shadowPadding = 40;
667
938
  const fontAsset = overlayFonts[input.textStyle.fontId];
668
939
  const measured = await renderTrimmedTextBuffer({
669
940
  text: input.line,
@@ -672,7 +943,7 @@ async function buildLineLayers(input) {
672
943
  fontSize: input.fontSize,
673
944
  fontFamily: input.textStyle.fontFamily,
674
945
  fontPath: fontAsset.path,
675
- color: "#ffffff"
946
+ color: "#ffffff",
676
947
  });
677
948
  const lineLeft = input.layout.align === "center"
678
949
  ? input.blockX + Math.round((input.blockWidth - measured.width) / 2)
@@ -684,12 +955,12 @@ async function buildLineLayers(input) {
684
955
  const chip = await buildTextChip({
685
956
  width: measured.width,
686
957
  height: measured.height,
687
- fill: input.textStyle.backgroundColorHex
958
+ fill: input.textStyle.backgroundColorHex,
688
959
  });
689
960
  layers.push({
690
961
  input: chip,
691
962
  left: lineLeft - 14,
692
- top: lineTop - 8
963
+ top: lineTop - 8,
693
964
  });
694
965
  }
695
966
  const shadowFar = await renderShadowLayer({
@@ -697,16 +968,24 @@ async function buildLineLayers(input) {
697
968
  width: measured.width,
698
969
  height: measured.height,
699
970
  shadowPadding,
700
- blur: 3.1
971
+ blur: 8,
701
972
  });
702
973
  const shadowNear = await renderShadowLayer({
703
974
  textBuffer: measured.buffer,
704
975
  width: measured.width,
705
976
  height: measured.height,
706
977
  shadowPadding,
707
- blur: 1.15
978
+ blur: 2.4,
708
979
  });
709
- layers.push({ input: shadowFar, left: lineLeft - shadowPadding, top: lineTop - shadowPadding + 2 }, { input: shadowNear, left: lineLeft - shadowPadding, top: lineTop - shadowPadding + 1 }, { input: measured.buffer, left: lineLeft, top: lineTop });
980
+ layers.push({
981
+ input: shadowFar,
982
+ left: lineLeft - shadowPadding,
983
+ top: lineTop - shadowPadding + 8,
984
+ }, {
985
+ input: shadowNear,
986
+ left: lineLeft - shadowPadding,
987
+ top: lineTop - shadowPadding + 4,
988
+ }, { input: measured.buffer, left: lineLeft, top: lineTop });
710
989
  return layers;
711
990
  }
712
991
  async function renderTrimmedTextBuffer(input) {
@@ -720,8 +999,8 @@ async function renderTrimmedTextBuffer(input) {
720
999
  align: "left",
721
1000
  spacing: 0,
722
1001
  font: `${input.fontFamily} ${input.fontSize}`,
723
- fontfile: input.fontPath
724
- }
1002
+ fontfile: input.fontPath,
1003
+ },
725
1004
  })
726
1005
  .trim()
727
1006
  .png()
@@ -730,7 +1009,7 @@ async function renderTrimmedTextBuffer(input) {
730
1009
  return {
731
1010
  buffer: rendered,
732
1011
  width: metadata.width ?? input.blockWidth,
733
- height: metadata.height ?? input.lineHeight
1012
+ height: metadata.height ?? input.lineHeight,
734
1013
  };
735
1014
  }
736
1015
  async function renderShadowLayer(input) {
@@ -739,10 +1018,16 @@ async function renderShadowLayer(input) {
739
1018
  width: input.width + input.shadowPadding * 2,
740
1019
  height: input.height + input.shadowPadding * 2,
741
1020
  channels: 4,
742
- background: { r: 0, g: 0, b: 0, alpha: 0 }
743
- }
1021
+ background: { r: 0, g: 0, b: 0, alpha: 0 },
1022
+ },
744
1023
  })
745
- .composite([{ input: input.textBuffer, left: input.shadowPadding, top: input.shadowPadding }])
1024
+ .composite([
1025
+ {
1026
+ input: input.textBuffer,
1027
+ left: input.shadowPadding,
1028
+ top: input.shadowPadding,
1029
+ },
1030
+ ])
746
1031
  .modulate({ brightness: 0 })
747
1032
  .linear(0, 0)
748
1033
  .blur(input.blur)
@@ -793,7 +1078,8 @@ async function portraitScore(input) {
793
1078
  .raw()
794
1079
  .toBuffer();
795
1080
  const edgeContrast = edgeBandContrast(sample, thumbnailWidth, thumbnailHeight);
796
- return Math.abs(aspect - (9 / 16)) + edgeContrast;
1081
+ const edgeFlatnessPenalty = edgeBandFlatnessPenalty(sample, thumbnailWidth, thumbnailHeight);
1082
+ return Math.abs(aspect - 9 / 16) + edgeContrast + edgeFlatnessPenalty;
797
1083
  }
798
1084
  async function chooseLayoutFromImage(imageBytes, overlayText) {
799
1085
  const thumbnailWidth = 108;
@@ -809,7 +1095,8 @@ async function chooseLayoutFromImage(imageBytes, overlayText) {
809
1095
  for (const candidate of candidates) {
810
1096
  const estimate = estimateTextBlock(overlayText, candidate);
811
1097
  const region = regionForLayout(candidate, estimate.blockWidth, estimate.blockHeight);
812
- const score = scoreLayoutRegion(sample, thumbnailWidth, thumbnailHeight, region) + candidate.stylePenalty;
1098
+ const score = scoreLayoutRegion(sample, thumbnailWidth, thumbnailHeight, region) +
1099
+ candidate.stylePenalty;
813
1100
  if (score < bestScore) {
814
1101
  best = candidate;
815
1102
  bestScore = score;
@@ -821,7 +1108,7 @@ async function chooseLayoutFromImage(imageBytes, overlayText) {
821
1108
  maxWidthPercent: best.maxWidthPercent,
822
1109
  anchorXPercent: best.anchorXPercent,
823
1110
  anchorYPercent: best.anchorYPercent,
824
- justification: `${best.justification} Selected by local safe-zone scoring to minimize busy backgrounds and avoid TikTok UI chrome.`
1111
+ justification: `${best.justification} Selected by local safe-zone scoring to minimize busy backgrounds and avoid TikTok UI chrome.`,
825
1112
  };
826
1113
  }
827
1114
  async function chooseSlideLayout(ctx, input) {
@@ -831,7 +1118,7 @@ async function chooseSlideLayout(ctx, input) {
831
1118
  provider: input.provider,
832
1119
  model: input.model,
833
1120
  imageUrl: input.imageUrl,
834
- overlayText: input.overlayText
1121
+ overlayText: input.overlayText,
835
1122
  });
836
1123
  return normalizeAiLayout(analysis);
837
1124
  }
@@ -839,7 +1126,7 @@ async function chooseSlideLayout(ctx, input) {
839
1126
  ctx.logger.warn("AI layout analysis failed, falling back to local scorer", {
840
1127
  provider: input.provider,
841
1128
  model: input.model,
842
- message: error instanceof Error ? error.message : String(error)
1129
+ message: error instanceof Error ? error.message : String(error),
843
1130
  });
844
1131
  }
845
1132
  }
@@ -852,28 +1139,28 @@ function normalizeAiLayout(input) {
852
1139
  : normalizedAlign === "left"
853
1140
  ? 0.38
854
1141
  : 0.62;
855
- const anchorYPercent = input.zone === "top"
856
- ? 0.18
857
- : input.zone === "bottom"
858
- ? 0.38
859
- : 0.28;
1142
+ const anchorYPercent = input.zone === "top" ? 0.18 : input.zone === "bottom" ? 0.38 : 0.28;
860
1143
  return {
861
1144
  zone: input.zone === "bottom" ? "center" : input.zone,
862
1145
  align: normalizedAlign,
863
1146
  maxWidthPercent: clamp(input.maxWidthPercent, 46, 62),
864
1147
  anchorXPercent,
865
1148
  anchorYPercent,
866
- justification: `${input.justification} Normalized toward centered TikTok-native caption placement.`
1149
+ justification: `${input.justification} Normalized toward centered TikTok-native caption placement.`,
867
1150
  };
868
1151
  }
869
1152
  function estimateTextBlock(text, layout) {
870
1153
  const safeWidth = FRAME.width - TIKTOK_SAFE_AREA.left - TIKTOK_SAFE_AREA.right;
871
- const blockWidth = Math.round(safeWidth * clamp(layout.maxWidthPercent / 100, 0.4, 0.62));
1154
+ const centeredBounds = resolveNativeUiTextPaddingBounds();
1155
+ const availableWidth = layout.align === "center"
1156
+ ? FRAME.width - centeredBounds.minLeft * 2
1157
+ : safeWidth;
1158
+ const blockWidth = Math.round(availableWidth * clamp(layout.maxWidthPercent / 100, 0.4, 0.62));
872
1159
  const { fontSize, lines, lineHeight } = fitLinesToBlock(text, blockWidth, "montserrat");
873
1160
  return {
874
1161
  blockWidth,
875
1162
  blockHeight: Math.round(lines.length * lineHeight + 10),
876
- fontSize
1163
+ fontSize,
877
1164
  };
878
1165
  }
879
1166
  function regionForLayout(layout, blockWidth, blockHeight) {
@@ -882,18 +1169,25 @@ function regionForLayout(layout, blockWidth, blockHeight) {
882
1169
  function resolveTextBlockRect(layout, blockWidth, blockHeight) {
883
1170
  const safeWidth = FRAME.width - TIKTOK_SAFE_AREA.left - TIKTOK_SAFE_AREA.right;
884
1171
  const safeHeight = FRAME.height - TIKTOK_SAFE_AREA.top - TIKTOK_SAFE_AREA.bottom;
885
- if (layout.anchorXPercent !== undefined && layout.anchorYPercent !== undefined) {
1172
+ const centeredBounds = resolveNativeUiTextPaddingBounds(blockWidth);
1173
+ if (layout.anchorXPercent !== undefined &&
1174
+ layout.anchorYPercent !== undefined) {
1175
+ const centeredX = clamp(Math.round((FRAME.width - blockWidth) / 2), centeredBounds.minLeft, centeredBounds.maxLeft);
886
1176
  return {
887
- left: TIKTOK_SAFE_AREA.left + Math.round((safeWidth - blockWidth) * clamp(layout.anchorXPercent, 0, 1)),
888
- top: TIKTOK_SAFE_AREA.top + Math.round((safeHeight - blockHeight) * clamp(layout.anchorYPercent, 0, 1)),
1177
+ left: layout.align === "center"
1178
+ ? centeredX
1179
+ : TIKTOK_SAFE_AREA.left +
1180
+ Math.round((safeWidth - blockWidth) * clamp(layout.anchorXPercent, 0, 1)),
1181
+ top: TIKTOK_SAFE_AREA.top +
1182
+ Math.round((safeHeight - blockHeight) * clamp(layout.anchorYPercent, 0, 1)),
889
1183
  width: blockWidth,
890
- height: blockHeight
1184
+ height: blockHeight,
891
1185
  };
892
1186
  }
893
1187
  const x = layout.align === "left"
894
1188
  ? TIKTOK_SAFE_AREA.left
895
1189
  : layout.align === "center"
896
- ? TIKTOK_SAFE_AREA.left + Math.round((safeWidth - blockWidth) / 2)
1190
+ ? clamp(Math.round((FRAME.width - blockWidth) / 2), centeredBounds.minLeft, centeredBounds.maxLeft)
897
1191
  : FRAME.width - TIKTOK_SAFE_AREA.right - blockWidth;
898
1192
  const y = layout.zone === "top"
899
1193
  ? TIKTOK_SAFE_AREA.top + 72
@@ -904,7 +1198,17 @@ function resolveTextBlockRect(layout, blockWidth, blockHeight) {
904
1198
  left: x,
905
1199
  top: y,
906
1200
  width: blockWidth,
907
- height: blockHeight
1201
+ height: blockHeight,
1202
+ };
1203
+ }
1204
+ function resolveNativeUiTextPaddingBounds(blockWidth = 0) {
1205
+ const horizontalPadding = Math.round(FRAME.width *
1206
+ ((NATIVE_UI_TEXT_ZONE.maxCenterXPercent -
1207
+ NATIVE_UI_TEXT_ZONE.minCenterXPercent) /
1208
+ 2));
1209
+ return {
1210
+ minLeft: horizontalPadding,
1211
+ maxLeft: Math.max(horizontalPadding, FRAME.width - horizontalPadding - blockWidth),
908
1212
  };
909
1213
  }
910
1214
  function scoreLayoutRegion(sample, width, height, region) {
@@ -947,8 +1251,8 @@ function sampleStats(sample, width, height, region, padding) {
947
1251
  }
948
1252
  const index = y * width + x;
949
1253
  const current = sample[index] ?? 0;
950
- const leftPixel = x > 0 ? sample[index - 1] ?? current : current;
951
- const upPixel = y > 0 ? sample[index - width] ?? current : current;
1254
+ const leftPixel = x > 0 ? (sample[index - 1] ?? current) : current;
1255
+ const upPixel = y > 0 ? (sample[index - width] ?? current) : current;
952
1256
  total += current;
953
1257
  totalSq += current * current;
954
1258
  detail += Math.abs(current - leftPixel) + Math.abs(current - upPixel);
@@ -960,7 +1264,7 @@ function sampleStats(sample, width, height, region, padding) {
960
1264
  return {
961
1265
  mean,
962
1266
  variance,
963
- detail: detail / Math.max(count, 1)
1267
+ detail: detail / Math.max(count, 1),
964
1268
  };
965
1269
  }
966
1270
  function edgeBandContrast(sample, width, height) {
@@ -969,6 +1273,23 @@ function edgeBandContrast(sample, width, height) {
969
1273
  const bottom = bandActivity(sample, width, height, height - bandHeight, height);
970
1274
  return (top + bottom) / 220;
971
1275
  }
1276
+ function edgeBandFlatnessPenalty(sample, width, height) {
1277
+ const bandHeight = Math.max(8, Math.floor(height * 0.16));
1278
+ const centerStart = Math.max(0, Math.floor(height * 0.34));
1279
+ const centerEnd = Math.min(height, Math.ceil(height * 0.66));
1280
+ const top = bandActivity(sample, width, height, 0, bandHeight);
1281
+ const bottom = bandActivity(sample, width, height, height - bandHeight, height);
1282
+ const center = bandActivity(sample, width, height, centerStart, centerEnd);
1283
+ const edgeAverage = (top + bottom) / 2;
1284
+ if (center <= 0) {
1285
+ return 0;
1286
+ }
1287
+ const ratio = edgeAverage / center;
1288
+ if (ratio >= 0.72) {
1289
+ return 0;
1290
+ }
1291
+ return (0.72 - ratio) * 3.4;
1292
+ }
972
1293
  function bandActivity(sample, width, height, startY, endY) {
973
1294
  let detail = 0;
974
1295
  let count = 0;
@@ -1011,11 +1332,14 @@ async function clipCompositeLayer(layer) {
1011
1332
  if (cropWidth <= 0 || cropHeight <= 0) {
1012
1333
  return null;
1013
1334
  }
1014
- if (cropLeft === 0 && cropTop === 0 && cropWidth === width && cropHeight === height) {
1335
+ if (cropLeft === 0 &&
1336
+ cropTop === 0 &&
1337
+ cropWidth === width &&
1338
+ cropHeight === height) {
1015
1339
  return {
1016
1340
  input: layer.input,
1017
1341
  left,
1018
- top
1342
+ top,
1019
1343
  };
1020
1344
  }
1021
1345
  const input = await sharp(layer.input)
@@ -1023,20 +1347,22 @@ async function clipCompositeLayer(layer) {
1023
1347
  left: cropLeft,
1024
1348
  top: cropTop,
1025
1349
  width: cropWidth,
1026
- height: cropHeight
1350
+ height: cropHeight,
1027
1351
  })
1028
1352
  .png()
1029
1353
  .toBuffer();
1030
1354
  return {
1031
1355
  input,
1032
1356
  left,
1033
- top
1357
+ top,
1034
1358
  };
1035
1359
  }
1036
1360
  function buildLayoutCandidates(overlayText) {
1037
1361
  const prefersCenteredText = overlayText.trim().length <= 42;
1038
1362
  const widthPercents = prefersCenteredText ? [48, 52, 56] : [50, 54, 58];
1039
- const centerAnchors = prefersCenteredText ? [0.44, 0.5, 0.56] : [0.42, 0.5, 0.58];
1363
+ const centerAnchors = prefersCenteredText
1364
+ ? [0.44, 0.5, 0.56]
1365
+ : [0.42, 0.5, 0.58];
1040
1366
  const yAnchors = [0.16, 0.22, 0.28, 0.34, 0.4];
1041
1367
  const candidates = [];
1042
1368
  for (const maxWidthPercent of widthPercents) {
@@ -1049,7 +1375,8 @@ function buildLayoutCandidates(overlayText) {
1049
1375
  anchorXPercent,
1050
1376
  anchorYPercent,
1051
1377
  justification: "Image-aware centered native TikTok caption zone.",
1052
- stylePenalty: scoreNativeUiAnchor(anchorXPercent, anchorYPercent) + (prefersCenteredText ? 0 : 4)
1378
+ stylePenalty: scoreNativeUiAnchor(anchorXPercent, anchorYPercent) +
1379
+ (prefersCenteredText ? 0 : 4),
1053
1380
  });
1054
1381
  }
1055
1382
  }
@@ -1058,10 +1385,12 @@ function buildLayoutCandidates(overlayText) {
1058
1385
  }
1059
1386
  function scoreNativeUiAnchor(anchorXPercent, anchorYPercent) {
1060
1387
  let penalty = 0;
1061
- if (anchorXPercent < NATIVE_UI_TEXT_ZONE.minCenterXPercent || anchorXPercent > NATIVE_UI_TEXT_ZONE.maxCenterXPercent) {
1388
+ if (anchorXPercent < NATIVE_UI_TEXT_ZONE.minCenterXPercent ||
1389
+ anchorXPercent > NATIVE_UI_TEXT_ZONE.maxCenterXPercent) {
1062
1390
  penalty += 10;
1063
1391
  }
1064
- if (anchorYPercent < NATIVE_UI_TEXT_ZONE.minCenterYPercent || anchorYPercent > NATIVE_UI_TEXT_ZONE.maxCenterYPercent) {
1392
+ if (anchorYPercent < NATIVE_UI_TEXT_ZONE.minCenterYPercent ||
1393
+ anchorYPercent > NATIVE_UI_TEXT_ZONE.maxCenterYPercent) {
1065
1394
  penalty += 10;
1066
1395
  }
1067
1396
  penalty += Math.abs(anchorXPercent - 0.5) * 24;
@@ -1075,23 +1404,18 @@ function resolveRemotionEntryPoint() {
1075
1404
  }
1076
1405
  return fileURLToPath(new URL("./remotion/index.tsx", import.meta.url));
1077
1406
  }
1078
- function resolveTemplateRemotionRuntimeConfig() {
1407
+ function resolveTemplateRuntimeConfig() {
1079
1408
  const candidates = [
1080
1409
  fileURLToPath(new URL("../template.config.json", import.meta.url)),
1081
- path.resolve(process.cwd(), "templates/template_0000/template.config.json")
1410
+ path.resolve(process.cwd(), "templates/template_0000/template.config.json"),
1082
1411
  ];
1083
1412
  for (const candidate of candidates) {
1084
1413
  if (!existsSync(candidate)) {
1085
1414
  continue;
1086
1415
  }
1087
- const parsed = JSON.parse(readFileSync(candidate, "utf8"));
1088
- return {
1089
- serveUrl: parsed.remotion?.serve_url ?? undefined
1090
- };
1416
+ return JSON.parse(readFileSync(candidate, "utf8"));
1091
1417
  }
1092
- return {
1093
- serveUrl: undefined
1094
- };
1418
+ return {};
1095
1419
  }
1096
1420
  function resolveSkillPath() {
1097
1421
  const builtPath = fileURLToPath(new URL("../SKILL.md", import.meta.url));
@@ -1104,7 +1428,7 @@ function resolveOverlayFonts() {
1104
1428
  const resolved = Object.fromEntries(TEMPLATE_FONT_OPTIONS.map((option) => {
1105
1429
  const candidates = [
1106
1430
  fileURLToPath(new URL(`../assets/${option.assetFile}`, import.meta.url)),
1107
- path.resolve(process.cwd(), `templates/template_0000/assets/${option.assetFile}`)
1431
+ path.resolve(process.cwd(), `templates/template_0000/assets/${option.assetFile}`),
1108
1432
  ];
1109
1433
  for (const candidate of candidates) {
1110
1434
  if (existsSync(candidate)) {