@mevdragon/vidfarm-devcli 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import { promisify } from "node:util";
5
5
  import { config } from "../config.js";
6
6
  import { database } from "../db.js";
7
7
  import { createId } from "../lib/ids.js";
8
+ import { defaultSkillPathForTemplateModule, deriveTemplateRootDirFromModulePath } from "../lib/template-paths.js";
8
9
  import { nowIso } from "../lib/time.js";
9
10
  import { loadTemplateFromModule } from "./template-loader.js";
10
11
  import { TemplateCertificationService } from "./template-certification.js";
@@ -18,6 +19,7 @@ export class TemplateSourceService {
18
19
  return database.listTemplateReleases(templateId);
19
20
  }
20
21
  registerSource(input) {
22
+ deriveTemplateRootDirFromModulePath(input.templateModulePath);
21
23
  const existingByTemplateId = database.getTemplateSourceByTemplateId(input.templateId);
22
24
  if (existingByTemplateId) {
23
25
  throw new Error("A template with this template_id already exists. Generate a new UUIDv4 and try again.");
@@ -33,7 +35,7 @@ export class TemplateSourceService {
33
35
  repoUrl: input.repoUrl,
34
36
  branch: input.branch ?? "production",
35
37
  templateModulePath: input.templateModulePath,
36
- skillPath: input.skillPath ?? "SKILL.md",
38
+ skillPath: input.skillPath ?? defaultSkillPathForTemplateModule(input.templateModulePath),
37
39
  installCommand: input.installCommand ?? "npm install",
38
40
  buildCommand: input.buildCommand ?? "npm run build",
39
41
  status: "active"
@@ -47,7 +49,6 @@ export class TemplateSourceService {
47
49
  const commitSha = input.commitSha ?? await this.resolveBranchHead(source.repoUrl, source.branch);
48
50
  const checkoutPath = path.join(config.TEMPLATE_SOURCE_ROOT, source.templateId, commitSha);
49
51
  const skillPath = path.join(checkoutPath, source.skillPath);
50
- const modulePath = path.join(checkoutPath, source.templateModulePath);
51
52
  if (!existsSync(checkoutPath)) {
52
53
  mkdirSync(path.dirname(checkoutPath), { recursive: true });
53
54
  await this.runShell(["git", "clone", "--branch", source.branch, source.repoUrl, checkoutPath], process.cwd());
@@ -59,6 +60,7 @@ export class TemplateSourceService {
59
60
  await this.runCommandString(source.buildCommand, checkoutPath);
60
61
  }
61
62
  }
63
+ const modulePath = this.resolveImportableModulePath(checkoutPath, source.templateModulePath);
62
64
  const template = await loadTemplateFromModule(modulePath);
63
65
  if (template.id !== source.templateId) {
64
66
  throw new Error(`Imported template id ${template.id} does not match source template id ${source.templateId}.`);
@@ -130,6 +132,30 @@ export class TemplateSourceService {
130
132
  await execFileAsync(args[0], args.slice(1), { cwd });
131
133
  }
132
134
  async runCommandString(command, cwd) {
133
- await execFileAsync("/bin/sh", ["-lc", command], { cwd });
135
+ await execFileAsync("/bin/sh", ["-lc", command], {
136
+ cwd,
137
+ env: {
138
+ ...process.env,
139
+ NODE_ENV: "development",
140
+ npm_config_production: "false",
141
+ NPM_CONFIG_PRODUCTION: "false",
142
+ NPM_CONFIG_INCLUDE: "dev"
143
+ }
144
+ });
145
+ }
146
+ resolveImportableModulePath(checkoutPath, declaredModulePath) {
147
+ const sourceModulePath = path.join(checkoutPath, declaredModulePath);
148
+ const extension = path.extname(sourceModulePath).toLowerCase();
149
+ if (extension === ".ts" || extension === ".tsx") {
150
+ const compiledModulePath = sourceModulePath.replace(/\.(ts|tsx)$/i, ".js");
151
+ if (existsSync(compiledModulePath)) {
152
+ return compiledModulePath;
153
+ }
154
+ throw new Error(`Template module path ${declaredModulePath} is TypeScript, but the compiled module ${path.relative(checkoutPath, compiledModulePath)} was not found after build.`);
155
+ }
156
+ if (existsSync(sourceModulePath)) {
157
+ return sourceModulePath;
158
+ }
159
+ throw new Error(`Template module path ${declaredModulePath} was not found in the imported release.`);
134
160
  }
135
161
  }
@@ -4,25 +4,60 @@ export async function normalizeToPortraitFrame(input, target = { width: 1080, he
4
4
  const oriented = sharp(buffer, { density: 144 }).rotate();
5
5
  const trimmed = await trimFlatBorders(oriented);
6
6
  const targetAspect = target.width / target.height;
7
- // If the model already returned a near-native vertical frame, avoid the
8
- // activity crop pass because it can misread low-detail edges as padding.
9
7
  const shouldPreserveFraming = await isCloseToAspect(trimmed, targetAspect, 0.025);
10
8
  const cropped = shouldPreserveFraming ? trimmed : await cropToActiveImageRegion(trimmed);
11
- const portraitCrop = await cropToPortraitAspect(cropped, targetAspect);
12
- const output = await portraitCrop
9
+ const normalized = await resizeWithBlurredContain(cropped, target, targetAspect);
10
+ return {
11
+ bytes: normalized,
12
+ contentType: "image/png",
13
+ width: target.width,
14
+ height: target.height
15
+ };
16
+ }
17
+ async function resizeWithBlurredContain(image, target, targetAspect) {
18
+ const metadata = await image.metadata();
19
+ const width = metadata.width ?? 0;
20
+ const height = metadata.height ?? 0;
21
+ if (!width || !height) {
22
+ return image
23
+ .resize(target.width, target.height, {
24
+ fit: "cover",
25
+ position: sharp.strategy.attention
26
+ })
27
+ .png()
28
+ .toBuffer();
29
+ }
30
+ const aspect = width / height;
31
+ if (Math.abs(aspect - targetAspect) <= 0.015) {
32
+ return image
33
+ .resize(target.width, target.height, {
34
+ fit: "fill"
35
+ })
36
+ .png()
37
+ .toBuffer();
38
+ }
39
+ const foreground = await image
40
+ .clone()
41
+ .resize(target.width, target.height, {
42
+ fit: "contain",
43
+ background: { r: 0, g: 0, b: 0, alpha: 0 }
44
+ })
45
+ .png()
46
+ .toBuffer();
47
+ const background = await image
48
+ .clone()
13
49
  .resize(target.width, target.height, {
14
50
  fit: "cover",
15
51
  position: sharp.strategy.attention
16
52
  })
53
+ .blur(20)
54
+ .modulate({ brightness: 0.92, saturation: 0.9 })
55
+ .png()
56
+ .toBuffer();
57
+ return sharp(background)
58
+ .composite([{ input: foreground }])
17
59
  .png()
18
60
  .toBuffer();
19
- const exact = await ensureExactPixelSize(output, target);
20
- return {
21
- bytes: exact,
22
- contentType: "image/png",
23
- width: target.width,
24
- height: target.height
25
- };
26
61
  }
27
62
  async function isCloseToAspect(image, targetAspect, tolerance) {
28
63
  const metadata = await image.metadata();
@@ -77,27 +112,6 @@ async function cropToActiveImageRegion(image) {
77
112
  const height = Math.min(sourceHeight - top, Math.max(1, Math.ceil((bounds.height / sampleHeight) * sourceHeight)));
78
113
  return image.extract({ left, top, width, height });
79
114
  }
80
- async function cropToPortraitAspect(image, targetAspect) {
81
- const metadata = await image.metadata();
82
- const sourceWidth = metadata.width ?? 0;
83
- const sourceHeight = metadata.height ?? 0;
84
- if (!sourceWidth || !sourceHeight) {
85
- return image;
86
- }
87
- const sourceAspect = sourceWidth / sourceHeight;
88
- if (Math.abs(sourceAspect - targetAspect) < 0.015) {
89
- return image;
90
- }
91
- const focus = await detectFocusPoint(image, sourceWidth, sourceHeight);
92
- if (sourceAspect > targetAspect) {
93
- const cropWidth = Math.max(1, Math.min(sourceWidth, Math.round(sourceHeight * targetAspect)));
94
- const left = clamp(Math.round(focus.x - cropWidth / 2), 0, sourceWidth - cropWidth);
95
- return image.extract({ left, top: 0, width: cropWidth, height: sourceHeight });
96
- }
97
- const cropHeight = Math.max(1, Math.min(sourceHeight, Math.round(sourceWidth / targetAspect)));
98
- const top = clamp(Math.round(focus.y - cropHeight / 2), 0, sourceHeight - cropHeight);
99
- return image.extract({ left: 0, top, width: sourceWidth, height: cropHeight });
100
- }
101
115
  function detectActiveBounds(sample, width, height) {
102
116
  const rowActivity = new Array(height).fill(0);
103
117
  const colActivity = new Array(width).fill(0);
@@ -142,46 +156,6 @@ function detectActiveBounds(sample, width, height) {
142
156
  height: croppedHeight
143
157
  };
144
158
  }
145
- async function detectFocusPoint(image, sourceWidth, sourceHeight) {
146
- const sampleWidth = 120;
147
- const sampleHeight = Math.max(1, Math.round((sourceHeight / Math.max(sourceWidth, 1)) * sampleWidth));
148
- const sample = await image
149
- .clone()
150
- .resize(sampleWidth, sampleHeight, { fit: "fill" })
151
- .grayscale()
152
- .raw()
153
- .toBuffer();
154
- let weightedX = 0;
155
- let weightedY = 0;
156
- let totalWeight = 0;
157
- for (let y = 0; y < sampleHeight; y += 1) {
158
- for (let x = 0; x < sampleWidth; x += 1) {
159
- const index = y * sampleWidth + x;
160
- const current = sample[index] ?? 0;
161
- const left = x > 0 ? sample[index - 1] ?? current : current;
162
- const up = y > 0 ? sample[index - sampleWidth] ?? current : current;
163
- const right = x < sampleWidth - 1 ? sample[index + 1] ?? current : current;
164
- const down = y < sampleHeight - 1 ? sample[index + sampleWidth] ?? current : current;
165
- const energy = Math.abs(current - left) +
166
- Math.abs(current - right) +
167
- Math.abs(current - up) +
168
- Math.abs(current - down);
169
- const centerBiasX = 1 - Math.abs((x + 0.5) / sampleWidth - 0.5) * 0.45;
170
- const centerBiasY = 1 - Math.abs((y + 0.5) / sampleHeight - 0.5) * 0.35;
171
- const weight = Math.max(energy, 1) * centerBiasX * centerBiasY;
172
- weightedX += (x + 0.5) * weight;
173
- weightedY += (y + 0.5) * weight;
174
- totalWeight += weight;
175
- }
176
- }
177
- if (totalWeight <= 0) {
178
- return { x: sourceWidth / 2, y: sourceHeight / 2 };
179
- }
180
- return {
181
- x: (weightedX / totalWeight / sampleWidth) * sourceWidth,
182
- y: (weightedY / totalWeight / sampleHeight) * sourceHeight
183
- };
184
- }
185
159
  function findActiveRange(values, minSpan) {
186
160
  const mean = values.reduce((sum, value) => sum + value, 0) / Math.max(values.length, 1);
187
161
  const max = values.reduce((best, value) => Math.max(best, value), 0);
@@ -226,17 +200,3 @@ function smoothSeries(values, radius) {
226
200
  function clamp(value, min, max) {
227
201
  return Math.min(Math.max(value, min), max);
228
202
  }
229
- async function ensureExactPixelSize(input, target) {
230
- const metadata = await sharp(input).metadata();
231
- if (metadata.width === target.width && metadata.height === target.height) {
232
- return input;
233
- }
234
- const exactCrop = await cropToPortraitAspect(sharp(input), target.width / target.height);
235
- return exactCrop
236
- .resize(target.width, target.height, {
237
- fit: "cover",
238
- position: sharp.strategy.attention
239
- })
240
- .png()
241
- .toBuffer();
242
- }
@@ -27,6 +27,31 @@ const NATIVE_UI_TEXT_ZONE = {
27
27
  maxCenterYPercent: 0.58,
28
28
  };
29
29
  const TEXT_BACKGROUND_NONE = "none";
30
+ // Documentation-only dependency map for humans and agents.
31
+ // The current template standard has no first-class dependencies field, so
32
+ // provider/model requirements must be declared in source and SKILL.md.
33
+ const TEMPLATE_PROVIDER_REQUIREMENTS = {
34
+ image: [
35
+ { provider: "openai", models: ["gpt-image-1", "gpt-image-2"], strict: false },
36
+ { provider: "gemini", models: ["gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"], strict: false },
37
+ { provider: "openrouter", models: ["bytedance/seedance-2.0", "bytedance-seed/seedream-4.5"], strict: false, planned: true },
38
+ ],
39
+ text: [
40
+ { provider: "openai", models: ["gpt-5.4"], strict: false },
41
+ { provider: "gemini", models: ["gemini-3.1-flash-lite", "gemini-2.5-flash-lite"], strict: false },
42
+ { provider: "openrouter", models: ["qwen/qwen3.6-flash"], strict: false },
43
+ ],
44
+ layout_analysis: [
45
+ { provider: "openai", models: ["gpt-5.4"], strict: false },
46
+ { provider: "gemini", models: ["gemini-3.1-flash-lite", "gemini-2.5-flash-lite"], strict: false },
47
+ { provider: "openrouter", models: ["qwen/qwen3.6-flash"], strict: false },
48
+ ],
49
+ video: [
50
+ { provider: "openai", models: ["sora-2"], strict: false, planned: true },
51
+ { provider: "gemini", models: ["veo-3.0-generate-001"], strict: false, planned: true },
52
+ ],
53
+ };
54
+ void TEMPLATE_PROVIDER_REQUIREMENTS;
30
55
  const legacySlideInputSchema = z.union([
31
56
  z.tuple([z.string().min(3), z.string().min(1)]),
32
57
  z.tuple([
@@ -132,29 +157,43 @@ export const template0000Definition = defineTemplate({
132
157
  const payload = createSlideshowInputSchema.parse(input);
133
158
  ctx.logger.progress(0.04, "Starting template_0000 slideshow image stage");
134
159
  const provider = parseImageProvider(ctx.templateConfig.defaultProvider);
135
- const textModel = String(ctx.templateConfig.textModel ?? defaultTextModelForProvider(provider));
136
- const imageModel = String(ctx.templateConfig.imageModel ?? defaultImageModelForProvider(provider));
160
+ const configuredTextModel = typeof ctx.templateConfig.textModel === "string"
161
+ ? ctx.templateConfig.textModel
162
+ : null;
163
+ const configuredImageModel = typeof ctx.templateConfig.imageModel === "string"
164
+ ? ctx.templateConfig.imageModel
165
+ : null;
166
+ const textModel = String(configuredTextModel ?? defaultTextModelForProvider(provider));
167
+ const imageModel = String(configuredImageModel ?? defaultImageModelForProvider(provider));
137
168
  const textStyle = resolveTextStyleSpec(ctx.templateConfig);
138
169
  const slides = [];
139
170
  for (const [index, rawSlide] of payload.slides.entries()) {
140
- const { imagePrompt, imagePromptAttachments, overlayText, durationMs } = normalizeSlideInput(rawSlide);
141
- ctx.logger.progress(0.08 + (index / payload.slides.length) * 0.42, `Generating slide ${index + 1} image`);
142
- const image = await generatePortraitCandidateImage(ctx, {
143
- provider,
144
- imageModel,
145
- imagePrompt,
146
- imagePromptAttachments,
147
- overlayText,
148
- });
149
- await ctx.billing.record({
150
- type: "ai_generation",
151
- costUsd: 0.04,
152
- metadata: {
153
- stage: "image_generation",
154
- slideIndex: index,
155
- model: imageModel,
156
- },
157
- });
171
+ const { sourceType, sourceValue, imagePrompt, imagePromptAttachments, overlayText, durationMs, } = normalizeSlideInput(rawSlide);
172
+ ctx.logger.progress(0.08 + (index / payload.slides.length) * 0.42, sourceType === "image_source"
173
+ ? `Loading slide ${index + 1} source image`
174
+ : `Generating slide ${index + 1} image`);
175
+ const image = sourceType === "image_source"
176
+ ? await loadExistingSlideImage(sourceValue)
177
+ : await generatePortraitCandidateImage(ctx, {
178
+ provider,
179
+ imageModel,
180
+ configuredProvider: provider,
181
+ configuredImageModel,
182
+ imagePrompt,
183
+ imagePromptAttachments,
184
+ overlayText,
185
+ });
186
+ if (sourceType === "ai_prompt") {
187
+ await ctx.billing.record({
188
+ type: "ai_generation",
189
+ costUsd: 0.04,
190
+ metadata: {
191
+ stage: "image_generation",
192
+ slideIndex: index,
193
+ model: imageModel,
194
+ },
195
+ });
196
+ }
158
197
  ctx.logger.progress(0.13 + (index / payload.slides.length) * 0.3, `Normalizing slide ${index + 1} to strict 9:16 portrait`);
159
198
  const normalizedImage = await normalizeToPortraitFrame(image.bytes, FRAME);
160
199
  const backgroundArtifact = await ctx.storage.putBuffer(`backgrounds/slide-${pad2(index + 1)}.png`, normalizedImage.bytes, {
@@ -192,6 +231,8 @@ export const template0000Definition = defineTemplate({
192
231
  });
193
232
  slides.push({
194
233
  index,
234
+ sourceType,
235
+ sourceValue,
195
236
  imagePrompt,
196
237
  imagePromptAttachments,
197
238
  overlayText,
@@ -207,6 +248,8 @@ export const template0000Definition = defineTemplate({
207
248
  const metaDetails = await generateMetaDetails(ctx, {
208
249
  provider,
209
250
  textModel,
251
+ configuredProvider: provider,
252
+ configuredTextModel,
210
253
  slides,
211
254
  metaDetailsPrompt: payload.meta_details_prompt,
212
255
  });
@@ -254,6 +297,8 @@ export const template0000Definition = defineTemplate({
254
297
  meta_details_prompt: payload.meta_details_prompt ?? null,
255
298
  slides: slides.map((slide) => ({
256
299
  index: slide.index,
300
+ sourceType: slide.sourceType,
301
+ sourceValue: slide.sourceValue,
257
302
  imagePrompt: slide.imagePrompt,
258
303
  imagePromptAttachments: slide.imagePromptAttachments,
259
304
  overlayText: slide.overlayText,
@@ -374,26 +419,43 @@ async function generatePortraitCandidateImage(ctx, input) {
374
419
  ].join("\n"),
375
420
  ];
376
421
  let best = null;
377
- for (const prompt of attempts) {
378
- const image = await ctx.providers.generateImage({
379
- provider: input.provider,
380
- model: input.imageModel,
381
- prompt,
382
- promptAttachments: input.imagePromptAttachments,
383
- size: sourceImageSizeForProvider(input.provider),
384
- aspectRatio: "9:16",
385
- imageSize: sourceImageOutputSizeForProvider(input.provider, input.imageModel),
386
- });
387
- const score = await portraitScore(image.bytes);
388
- if (!best || score < best.score) {
389
- best = { ...image, prompt, score };
422
+ let lastError = null;
423
+ for (const provider of providerFailoverOrder(input.provider)) {
424
+ const imageModel = resolveImageModelForAttempt(provider, input.configuredProvider, input.configuredImageModel);
425
+ try {
426
+ for (const prompt of attempts) {
427
+ const image = await ctx.providers.generateImage({
428
+ provider,
429
+ model: imageModel,
430
+ prompt,
431
+ promptAttachments: input.imagePromptAttachments,
432
+ size: sourceImageSizeForProvider(provider),
433
+ aspectRatio: "9:16",
434
+ imageSize: sourceImageOutputSizeForProvider(provider, imageModel),
435
+ });
436
+ const score = await portraitScore(image.bytes);
437
+ if (!best || score < best.score) {
438
+ best = { ...image, prompt, score };
439
+ }
440
+ if (score <= 0.16) {
441
+ return best;
442
+ }
443
+ }
444
+ if (best) {
445
+ return best;
446
+ }
390
447
  }
391
- if (score <= 0.16) {
392
- break;
448
+ catch (error) {
449
+ lastError = error instanceof Error ? error : new Error(String(error));
450
+ ctx.logger.warn("Image generation provider failed, trying next provider", {
451
+ provider,
452
+ model: imageModel,
453
+ message: lastError.message,
454
+ });
393
455
  }
394
456
  }
395
457
  if (!best) {
396
- throw new Error("No image candidate was generated.");
458
+ throw lastError ?? new Error("No image candidate was generated.");
397
459
  }
398
460
  return best;
399
461
  }
@@ -419,7 +481,7 @@ function isImageProvider(value) {
419
481
  }
420
482
  function sourceImageSizeForProvider(provider) {
421
483
  if (provider === "openai") {
422
- return "1024x1792";
484
+ return "1024x1536";
423
485
  }
424
486
  return "1080x1920";
425
487
  }
@@ -430,7 +492,7 @@ function defaultImageModelForProvider(provider) {
430
492
  if (provider === "openrouter") {
431
493
  return "bytedance-seed/seedream-4.5";
432
494
  }
433
- return "gemini-3.1-flash-image-preview";
495
+ return "gemini-2.5-flash-image";
434
496
  }
435
497
  function defaultTextModelForProvider(provider) {
436
498
  if (provider === "openai") {
@@ -448,12 +510,29 @@ function sourceImageOutputSizeForProvider(provider, model) {
448
510
  if (provider !== "gemini") {
449
511
  return undefined;
450
512
  }
451
- if (model === "gemini-3.1-flash-image-preview" ||
452
- model === "gemini-3-pro-image-preview") {
513
+ if (model === "gemini-3-pro-image-preview") {
453
514
  return "1K";
454
515
  }
455
516
  return undefined;
456
517
  }
518
+ function providerFailoverOrder(primary) {
519
+ return [
520
+ primary,
521
+ ...supportedImageProviders.filter((provider) => provider !== primary),
522
+ ];
523
+ }
524
+ function resolveImageModelForAttempt(provider, configuredProvider, configuredImageModel) {
525
+ if (provider === configuredProvider && configuredImageModel) {
526
+ return configuredImageModel;
527
+ }
528
+ return defaultImageModelForProvider(provider);
529
+ }
530
+ function resolveTextModelForAttempt(provider, configuredProvider, configuredTextModel) {
531
+ if (provider === configuredProvider && configuredTextModel) {
532
+ return configuredTextModel;
533
+ }
534
+ return defaultTextModelForProvider(provider);
535
+ }
457
536
  function resolveTextStyleSpec(config) {
458
537
  const fontId = isTemplateFontId(config.captionFont)
459
538
  ? config.captionFont
@@ -532,6 +611,8 @@ const slideshowManifestSchema = z.object({
532
611
  }),
533
612
  slides: z.array(z.object({
534
613
  index: z.number(),
614
+ sourceType: z.enum(["ai_prompt", "image_source"]),
615
+ sourceValue: z.string(),
535
616
  imagePrompt: z.string(),
536
617
  imagePromptAttachments: z.array(z.string().url()),
537
618
  overlayText: z.string(),
@@ -552,29 +633,154 @@ const slideshowManifestSchema = z.object({
552
633
  });
553
634
  function normalizeSlideInput(input) {
554
635
  if (Array.isArray(input)) {
555
- return {
556
- imagePrompt: input[0],
636
+ return normalizeSlideFields({
637
+ primarySource: input[0],
557
638
  imagePromptAttachments: [],
558
639
  overlayText: input[1],
559
640
  durationMs: input[2] ?? 4000,
560
- };
641
+ });
561
642
  }
562
- return {
563
- imagePrompt: input.image_prompt,
643
+ return normalizeSlideFields({
644
+ primarySource: input.image_prompt,
564
645
  imagePromptAttachments: input.image_prompt_attachments,
565
646
  overlayText: input.caption,
566
647
  durationMs: input.duration_ms,
648
+ });
649
+ }
650
+ function normalizeSlideFields(input) {
651
+ const sourceValue = input.primarySource.trim();
652
+ const existingImageSource = resolveExistingImageSource(sourceValue);
653
+ return {
654
+ sourceType: existingImageSource ? "image_source" : "ai_prompt",
655
+ sourceValue: existingImageSource ?? sourceValue,
656
+ imagePrompt: sourceValue,
657
+ imagePromptAttachments: input.imagePromptAttachments,
658
+ overlayText: input.overlayText,
659
+ durationMs: input.durationMs,
567
660
  };
568
661
  }
662
+ async function loadExistingSlideImage(sourceValue) {
663
+ const resolved = resolveExistingImageSource(sourceValue);
664
+ if (!resolved) {
665
+ throw new Error(`Slide source is not a supported image URL or file path: ${sourceValue}`);
666
+ }
667
+ const bytes = isLocalFileSource(resolved)
668
+ ? readFileSync(resolveLocalFileSourcePath(resolved))
669
+ : await fetchExternalImageBytes(resolved);
670
+ const contentType = await detectImageContentType(bytes);
671
+ return {
672
+ bytes,
673
+ contentType,
674
+ revisedPrompt: null,
675
+ prompt: sourceValue,
676
+ };
677
+ }
678
+ function resolveExistingImageSource(value) {
679
+ const trimmed = value.trim();
680
+ if (!trimmed) {
681
+ return null;
682
+ }
683
+ const parsedUrl = parseUrl(trimmed);
684
+ if (parsedUrl &&
685
+ ["http:", "https:", "file:", "data:"].includes(parsedUrl.protocol)) {
686
+ return trimmed;
687
+ }
688
+ const localPath = resolveLocalPathIfPresent(trimmed);
689
+ if (localPath) {
690
+ return localPath;
691
+ }
692
+ return null;
693
+ }
694
+ function parseUrl(value) {
695
+ try {
696
+ return new URL(value);
697
+ }
698
+ catch {
699
+ return null;
700
+ }
701
+ }
702
+ function resolveLocalPathIfPresent(value) {
703
+ const expandedPath = value.startsWith("~/")
704
+ ? path.join(process.env.HOME ?? "", value.slice(2))
705
+ : value;
706
+ const candidate = path.resolve(expandedPath);
707
+ return existsSync(candidate) ? candidate : null;
708
+ }
709
+ function isLocalFileSource(sourceValue) {
710
+ const parsedUrl = parseUrl(sourceValue);
711
+ return !parsedUrl || parsedUrl.protocol === "file:";
712
+ }
713
+ function resolveLocalFileSourcePath(sourceValue) {
714
+ const parsedUrl = parseUrl(sourceValue);
715
+ return parsedUrl?.protocol === "file:"
716
+ ? fileURLToPath(parsedUrl)
717
+ : path.resolve(sourceValue);
718
+ }
719
+ async function fetchExternalImageBytes(sourceValue) {
720
+ const response = await fetch(sourceValue);
721
+ if (!response.ok) {
722
+ throw new Error(`Could not fetch slide source image: ${response.status} ${response.statusText}`);
723
+ }
724
+ return new Uint8Array(await response.arrayBuffer());
725
+ }
726
+ async function detectImageContentType(bytes) {
727
+ try {
728
+ const metadata = await sharp(bytes).metadata();
729
+ if (metadata.format) {
730
+ return contentTypeForSharpFormat(metadata.format);
731
+ }
732
+ }
733
+ catch (error) {
734
+ throw new Error(`Slide source could not be decoded as an image: ${error instanceof Error ? error.message : String(error)}`);
735
+ }
736
+ return "image/png";
737
+ }
738
+ function contentTypeForSharpFormat(format) {
739
+ switch (format) {
740
+ case "jpeg":
741
+ return "image/jpeg";
742
+ case "png":
743
+ return "image/png";
744
+ case "webp":
745
+ return "image/webp";
746
+ case "gif":
747
+ return "image/gif";
748
+ case "avif":
749
+ return "image/avif";
750
+ case "tiff":
751
+ return "image/tiff";
752
+ case "svg":
753
+ return "image/svg+xml";
754
+ case "heif":
755
+ return "image/heif";
756
+ default:
757
+ return `image/${format}`;
758
+ }
759
+ }
569
760
  async function generateMetaDetails(ctx, input) {
570
761
  const prompt = buildMetaDetailsPrompt(input.slides, input.metaDetailsPrompt);
571
- const response = await ctx.providers.generateText({
572
- provider: input.provider,
573
- model: input.textModel,
574
- prompt,
575
- temperature: 0.7,
576
- });
577
- return parseMetaDetailsResponse(response.text, input.slides);
762
+ let lastError = null;
763
+ for (const provider of providerFailoverOrder(input.provider)) {
764
+ const textModel = resolveTextModelForAttempt(provider, input.configuredProvider, input.configuredTextModel);
765
+ try {
766
+ const response = await ctx.providers.generateText({
767
+ provider,
768
+ model: textModel,
769
+ prompt,
770
+ temperature: 0.7,
771
+ });
772
+ return parseMetaDetailsResponse(response.text, input.slides);
773
+ }
774
+ catch (error) {
775
+ lastError = error instanceof Error ? error : new Error(String(error));
776
+ ctx.logger.warn("Metadata provider failed, trying next provider", {
777
+ provider,
778
+ model: textModel,
779
+ message: lastError.message,
780
+ });
781
+ }
782
+ }
783
+ throw lastError ?? new Error("No metadata provider succeeded.");
578
784
  }
579
785
  function buildMetaDetailsPrompt(slides, metaDetailsPrompt) {
580
786
  return [
@@ -872,7 +1078,8 @@ async function portraitScore(input) {
872
1078
  .raw()
873
1079
  .toBuffer();
874
1080
  const edgeContrast = edgeBandContrast(sample, thumbnailWidth, thumbnailHeight);
875
- return Math.abs(aspect - 9 / 16) + edgeContrast;
1081
+ const edgeFlatnessPenalty = edgeBandFlatnessPenalty(sample, thumbnailWidth, thumbnailHeight);
1082
+ return Math.abs(aspect - 9 / 16) + edgeContrast + edgeFlatnessPenalty;
876
1083
  }
877
1084
  async function chooseLayoutFromImage(imageBytes, overlayText) {
878
1085
  const thumbnailWidth = 108;
@@ -1066,6 +1273,23 @@ function edgeBandContrast(sample, width, height) {
1066
1273
  const bottom = bandActivity(sample, width, height, height - bandHeight, height);
1067
1274
  return (top + bottom) / 220;
1068
1275
  }
1276
+ function edgeBandFlatnessPenalty(sample, width, height) {
1277
+ const bandHeight = Math.max(8, Math.floor(height * 0.16));
1278
+ const centerStart = Math.max(0, Math.floor(height * 0.34));
1279
+ const centerEnd = Math.min(height, Math.ceil(height * 0.66));
1280
+ const top = bandActivity(sample, width, height, 0, bandHeight);
1281
+ const bottom = bandActivity(sample, width, height, height - bandHeight, height);
1282
+ const center = bandActivity(sample, width, height, centerStart, centerEnd);
1283
+ const edgeAverage = (top + bottom) / 2;
1284
+ if (center <= 0) {
1285
+ return 0;
1286
+ }
1287
+ const ratio = edgeAverage / center;
1288
+ if (ratio >= 0.72) {
1289
+ return 0;
1290
+ }
1291
+ return (0.72 - ratio) * 3.4;
1292
+ }
1069
1293
  function bandActivity(sample, width, height, startY, endY) {
1070
1294
  let detail = 0;
1071
1295
  let count = 0;