@mevdragon/vidfarm-devcli 0.2.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GETTING_STARTED.developers.md +51 -142
- package/README.md +44 -4
- package/SKILL.developer.md +428 -101
- package/dist/src/account-pages.js +1 -1
- package/dist/src/app.js +93 -5
- package/dist/src/cli.js +456 -8
- package/dist/src/config.js +3 -2
- package/dist/src/context.js +30 -11
- package/dist/src/db.js +2 -57
- package/dist/src/dev-app.js +0 -1
- package/dist/src/index.js +4 -2
- package/dist/src/lib/template-paths.js +21 -0
- package/dist/src/runtime.js +3 -1
- package/dist/src/services/auth.js +4 -4
- package/dist/src/services/job-logs.js +186 -0
- package/dist/src/services/jobs.js +3 -2
- package/dist/src/services/providers.js +14 -6
- package/dist/src/services/storage.js +85 -2
- package/dist/src/services/template-sources.js +29 -3
- package/dist/templates/template_0000/src/lib/images.js +46 -86
- package/dist/templates/template_0000/src/template.js +277 -53
- package/package.json +2 -2
- package/templates/template_0000/README.md +2 -1
- package/templates/template_0000/SKILL.md +32 -0
- package/templates/template_0000/src/lib/images.js +46 -86
- package/templates/template_0000/src/lib/images.ts +55 -98
- package/templates/template_0000/src/template-dna.js +9 -0
- package/templates/template_0000/src/template.js +523 -199
- package/templates/template_0000/src/template.ts +356 -61
- package/templates/template_0000/template.config.json +1 -1
|
@@ -5,6 +5,7 @@ import { promisify } from "node:util";
|
|
|
5
5
|
import { config } from "../config.js";
|
|
6
6
|
import { database } from "../db.js";
|
|
7
7
|
import { createId } from "../lib/ids.js";
|
|
8
|
+
import { defaultSkillPathForTemplateModule, deriveTemplateRootDirFromModulePath } from "../lib/template-paths.js";
|
|
8
9
|
import { nowIso } from "../lib/time.js";
|
|
9
10
|
import { loadTemplateFromModule } from "./template-loader.js";
|
|
10
11
|
import { TemplateCertificationService } from "./template-certification.js";
|
|
@@ -18,6 +19,7 @@ export class TemplateSourceService {
|
|
|
18
19
|
return database.listTemplateReleases(templateId);
|
|
19
20
|
}
|
|
20
21
|
registerSource(input) {
|
|
22
|
+
deriveTemplateRootDirFromModulePath(input.templateModulePath);
|
|
21
23
|
const existingByTemplateId = database.getTemplateSourceByTemplateId(input.templateId);
|
|
22
24
|
if (existingByTemplateId) {
|
|
23
25
|
throw new Error("A template with this template_id already exists. Generate a new UUIDv4 and try again.");
|
|
@@ -33,7 +35,7 @@ export class TemplateSourceService {
|
|
|
33
35
|
repoUrl: input.repoUrl,
|
|
34
36
|
branch: input.branch ?? "production",
|
|
35
37
|
templateModulePath: input.templateModulePath,
|
|
36
|
-
skillPath: input.skillPath ??
|
|
38
|
+
skillPath: input.skillPath ?? defaultSkillPathForTemplateModule(input.templateModulePath),
|
|
37
39
|
installCommand: input.installCommand ?? "npm install",
|
|
38
40
|
buildCommand: input.buildCommand ?? "npm run build",
|
|
39
41
|
status: "active"
|
|
@@ -47,7 +49,6 @@ export class TemplateSourceService {
|
|
|
47
49
|
const commitSha = input.commitSha ?? await this.resolveBranchHead(source.repoUrl, source.branch);
|
|
48
50
|
const checkoutPath = path.join(config.TEMPLATE_SOURCE_ROOT, source.templateId, commitSha);
|
|
49
51
|
const skillPath = path.join(checkoutPath, source.skillPath);
|
|
50
|
-
const modulePath = path.join(checkoutPath, source.templateModulePath);
|
|
51
52
|
if (!existsSync(checkoutPath)) {
|
|
52
53
|
mkdirSync(path.dirname(checkoutPath), { recursive: true });
|
|
53
54
|
await this.runShell(["git", "clone", "--branch", source.branch, source.repoUrl, checkoutPath], process.cwd());
|
|
@@ -59,6 +60,7 @@ export class TemplateSourceService {
|
|
|
59
60
|
await this.runCommandString(source.buildCommand, checkoutPath);
|
|
60
61
|
}
|
|
61
62
|
}
|
|
63
|
+
const modulePath = this.resolveImportableModulePath(checkoutPath, source.templateModulePath);
|
|
62
64
|
const template = await loadTemplateFromModule(modulePath);
|
|
63
65
|
if (template.id !== source.templateId) {
|
|
64
66
|
throw new Error(`Imported template id ${template.id} does not match source template id ${source.templateId}.`);
|
|
@@ -130,6 +132,30 @@ export class TemplateSourceService {
|
|
|
130
132
|
await execFileAsync(args[0], args.slice(1), { cwd });
|
|
131
133
|
}
|
|
132
134
|
async runCommandString(command, cwd) {
|
|
133
|
-
await execFileAsync("/bin/sh", ["-lc", command], {
|
|
135
|
+
await execFileAsync("/bin/sh", ["-lc", command], {
|
|
136
|
+
cwd,
|
|
137
|
+
env: {
|
|
138
|
+
...process.env,
|
|
139
|
+
NODE_ENV: "development",
|
|
140
|
+
npm_config_production: "false",
|
|
141
|
+
NPM_CONFIG_PRODUCTION: "false",
|
|
142
|
+
NPM_CONFIG_INCLUDE: "dev"
|
|
143
|
+
}
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
resolveImportableModulePath(checkoutPath, declaredModulePath) {
|
|
147
|
+
const sourceModulePath = path.join(checkoutPath, declaredModulePath);
|
|
148
|
+
const extension = path.extname(sourceModulePath).toLowerCase();
|
|
149
|
+
if (extension === ".ts" || extension === ".tsx") {
|
|
150
|
+
const compiledModulePath = sourceModulePath.replace(/\.(ts|tsx)$/i, ".js");
|
|
151
|
+
if (existsSync(compiledModulePath)) {
|
|
152
|
+
return compiledModulePath;
|
|
153
|
+
}
|
|
154
|
+
throw new Error(`Template module path ${declaredModulePath} is TypeScript, but the compiled module ${path.relative(checkoutPath, compiledModulePath)} was not found after build.`);
|
|
155
|
+
}
|
|
156
|
+
if (existsSync(sourceModulePath)) {
|
|
157
|
+
return sourceModulePath;
|
|
158
|
+
}
|
|
159
|
+
throw new Error(`Template module path ${declaredModulePath} was not found in the imported release.`);
|
|
134
160
|
}
|
|
135
161
|
}
|
|
@@ -4,25 +4,60 @@ export async function normalizeToPortraitFrame(input, target = { width: 1080, he
|
|
|
4
4
|
const oriented = sharp(buffer, { density: 144 }).rotate();
|
|
5
5
|
const trimmed = await trimFlatBorders(oriented);
|
|
6
6
|
const targetAspect = target.width / target.height;
|
|
7
|
-
// If the model already returned a near-native vertical frame, avoid the
|
|
8
|
-
// activity crop pass because it can misread low-detail edges as padding.
|
|
9
7
|
const shouldPreserveFraming = await isCloseToAspect(trimmed, targetAspect, 0.025);
|
|
10
8
|
const cropped = shouldPreserveFraming ? trimmed : await cropToActiveImageRegion(trimmed);
|
|
11
|
-
const
|
|
12
|
-
|
|
9
|
+
const normalized = await resizeWithBlurredContain(cropped, target, targetAspect);
|
|
10
|
+
return {
|
|
11
|
+
bytes: normalized,
|
|
12
|
+
contentType: "image/png",
|
|
13
|
+
width: target.width,
|
|
14
|
+
height: target.height
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
async function resizeWithBlurredContain(image, target, targetAspect) {
|
|
18
|
+
const metadata = await image.metadata();
|
|
19
|
+
const width = metadata.width ?? 0;
|
|
20
|
+
const height = metadata.height ?? 0;
|
|
21
|
+
if (!width || !height) {
|
|
22
|
+
return image
|
|
23
|
+
.resize(target.width, target.height, {
|
|
24
|
+
fit: "cover",
|
|
25
|
+
position: sharp.strategy.attention
|
|
26
|
+
})
|
|
27
|
+
.png()
|
|
28
|
+
.toBuffer();
|
|
29
|
+
}
|
|
30
|
+
const aspect = width / height;
|
|
31
|
+
if (Math.abs(aspect - targetAspect) <= 0.015) {
|
|
32
|
+
return image
|
|
33
|
+
.resize(target.width, target.height, {
|
|
34
|
+
fit: "fill"
|
|
35
|
+
})
|
|
36
|
+
.png()
|
|
37
|
+
.toBuffer();
|
|
38
|
+
}
|
|
39
|
+
const foreground = await image
|
|
40
|
+
.clone()
|
|
41
|
+
.resize(target.width, target.height, {
|
|
42
|
+
fit: "contain",
|
|
43
|
+
background: { r: 0, g: 0, b: 0, alpha: 0 }
|
|
44
|
+
})
|
|
45
|
+
.png()
|
|
46
|
+
.toBuffer();
|
|
47
|
+
const background = await image
|
|
48
|
+
.clone()
|
|
13
49
|
.resize(target.width, target.height, {
|
|
14
50
|
fit: "cover",
|
|
15
51
|
position: sharp.strategy.attention
|
|
16
52
|
})
|
|
53
|
+
.blur(20)
|
|
54
|
+
.modulate({ brightness: 0.92, saturation: 0.9 })
|
|
55
|
+
.png()
|
|
56
|
+
.toBuffer();
|
|
57
|
+
return sharp(background)
|
|
58
|
+
.composite([{ input: foreground }])
|
|
17
59
|
.png()
|
|
18
60
|
.toBuffer();
|
|
19
|
-
const exact = await ensureExactPixelSize(output, target);
|
|
20
|
-
return {
|
|
21
|
-
bytes: exact,
|
|
22
|
-
contentType: "image/png",
|
|
23
|
-
width: target.width,
|
|
24
|
-
height: target.height
|
|
25
|
-
};
|
|
26
61
|
}
|
|
27
62
|
async function isCloseToAspect(image, targetAspect, tolerance) {
|
|
28
63
|
const metadata = await image.metadata();
|
|
@@ -77,27 +112,6 @@ async function cropToActiveImageRegion(image) {
|
|
|
77
112
|
const height = Math.min(sourceHeight - top, Math.max(1, Math.ceil((bounds.height / sampleHeight) * sourceHeight)));
|
|
78
113
|
return image.extract({ left, top, width, height });
|
|
79
114
|
}
|
|
80
|
-
async function cropToPortraitAspect(image, targetAspect) {
|
|
81
|
-
const metadata = await image.metadata();
|
|
82
|
-
const sourceWidth = metadata.width ?? 0;
|
|
83
|
-
const sourceHeight = metadata.height ?? 0;
|
|
84
|
-
if (!sourceWidth || !sourceHeight) {
|
|
85
|
-
return image;
|
|
86
|
-
}
|
|
87
|
-
const sourceAspect = sourceWidth / sourceHeight;
|
|
88
|
-
if (Math.abs(sourceAspect - targetAspect) < 0.015) {
|
|
89
|
-
return image;
|
|
90
|
-
}
|
|
91
|
-
const focus = await detectFocusPoint(image, sourceWidth, sourceHeight);
|
|
92
|
-
if (sourceAspect > targetAspect) {
|
|
93
|
-
const cropWidth = Math.max(1, Math.min(sourceWidth, Math.round(sourceHeight * targetAspect)));
|
|
94
|
-
const left = clamp(Math.round(focus.x - cropWidth / 2), 0, sourceWidth - cropWidth);
|
|
95
|
-
return image.extract({ left, top: 0, width: cropWidth, height: sourceHeight });
|
|
96
|
-
}
|
|
97
|
-
const cropHeight = Math.max(1, Math.min(sourceHeight, Math.round(sourceWidth / targetAspect)));
|
|
98
|
-
const top = clamp(Math.round(focus.y - cropHeight / 2), 0, sourceHeight - cropHeight);
|
|
99
|
-
return image.extract({ left: 0, top, width: sourceWidth, height: cropHeight });
|
|
100
|
-
}
|
|
101
115
|
function detectActiveBounds(sample, width, height) {
|
|
102
116
|
const rowActivity = new Array(height).fill(0);
|
|
103
117
|
const colActivity = new Array(width).fill(0);
|
|
@@ -142,46 +156,6 @@ function detectActiveBounds(sample, width, height) {
|
|
|
142
156
|
height: croppedHeight
|
|
143
157
|
};
|
|
144
158
|
}
|
|
145
|
-
async function detectFocusPoint(image, sourceWidth, sourceHeight) {
|
|
146
|
-
const sampleWidth = 120;
|
|
147
|
-
const sampleHeight = Math.max(1, Math.round((sourceHeight / Math.max(sourceWidth, 1)) * sampleWidth));
|
|
148
|
-
const sample = await image
|
|
149
|
-
.clone()
|
|
150
|
-
.resize(sampleWidth, sampleHeight, { fit: "fill" })
|
|
151
|
-
.grayscale()
|
|
152
|
-
.raw()
|
|
153
|
-
.toBuffer();
|
|
154
|
-
let weightedX = 0;
|
|
155
|
-
let weightedY = 0;
|
|
156
|
-
let totalWeight = 0;
|
|
157
|
-
for (let y = 0; y < sampleHeight; y += 1) {
|
|
158
|
-
for (let x = 0; x < sampleWidth; x += 1) {
|
|
159
|
-
const index = y * sampleWidth + x;
|
|
160
|
-
const current = sample[index] ?? 0;
|
|
161
|
-
const left = x > 0 ? sample[index - 1] ?? current : current;
|
|
162
|
-
const up = y > 0 ? sample[index - sampleWidth] ?? current : current;
|
|
163
|
-
const right = x < sampleWidth - 1 ? sample[index + 1] ?? current : current;
|
|
164
|
-
const down = y < sampleHeight - 1 ? sample[index + sampleWidth] ?? current : current;
|
|
165
|
-
const energy = Math.abs(current - left) +
|
|
166
|
-
Math.abs(current - right) +
|
|
167
|
-
Math.abs(current - up) +
|
|
168
|
-
Math.abs(current - down);
|
|
169
|
-
const centerBiasX = 1 - Math.abs((x + 0.5) / sampleWidth - 0.5) * 0.45;
|
|
170
|
-
const centerBiasY = 1 - Math.abs((y + 0.5) / sampleHeight - 0.5) * 0.35;
|
|
171
|
-
const weight = Math.max(energy, 1) * centerBiasX * centerBiasY;
|
|
172
|
-
weightedX += (x + 0.5) * weight;
|
|
173
|
-
weightedY += (y + 0.5) * weight;
|
|
174
|
-
totalWeight += weight;
|
|
175
|
-
}
|
|
176
|
-
}
|
|
177
|
-
if (totalWeight <= 0) {
|
|
178
|
-
return { x: sourceWidth / 2, y: sourceHeight / 2 };
|
|
179
|
-
}
|
|
180
|
-
return {
|
|
181
|
-
x: (weightedX / totalWeight / sampleWidth) * sourceWidth,
|
|
182
|
-
y: (weightedY / totalWeight / sampleHeight) * sourceHeight
|
|
183
|
-
};
|
|
184
|
-
}
|
|
185
159
|
function findActiveRange(values, minSpan) {
|
|
186
160
|
const mean = values.reduce((sum, value) => sum + value, 0) / Math.max(values.length, 1);
|
|
187
161
|
const max = values.reduce((best, value) => Math.max(best, value), 0);
|
|
@@ -226,17 +200,3 @@ function smoothSeries(values, radius) {
|
|
|
226
200
|
function clamp(value, min, max) {
|
|
227
201
|
return Math.min(Math.max(value, min), max);
|
|
228
202
|
}
|
|
229
|
-
async function ensureExactPixelSize(input, target) {
|
|
230
|
-
const metadata = await sharp(input).metadata();
|
|
231
|
-
if (metadata.width === target.width && metadata.height === target.height) {
|
|
232
|
-
return input;
|
|
233
|
-
}
|
|
234
|
-
const exactCrop = await cropToPortraitAspect(sharp(input), target.width / target.height);
|
|
235
|
-
return exactCrop
|
|
236
|
-
.resize(target.width, target.height, {
|
|
237
|
-
fit: "cover",
|
|
238
|
-
position: sharp.strategy.attention
|
|
239
|
-
})
|
|
240
|
-
.png()
|
|
241
|
-
.toBuffer();
|
|
242
|
-
}
|
|
@@ -27,6 +27,31 @@ const NATIVE_UI_TEXT_ZONE = {
|
|
|
27
27
|
maxCenterYPercent: 0.58,
|
|
28
28
|
};
|
|
29
29
|
const TEXT_BACKGROUND_NONE = "none";
|
|
30
|
+
// Documentation-only dependency map for humans and agents.
|
|
31
|
+
// The current template standard has no first-class dependencies field, so
|
|
32
|
+
// provider/model requirements must be declared in source and SKILL.md.
|
|
33
|
+
const TEMPLATE_PROVIDER_REQUIREMENTS = {
|
|
34
|
+
image: [
|
|
35
|
+
{ provider: "openai", models: ["gpt-image-1", "gpt-image-2"], strict: false },
|
|
36
|
+
{ provider: "gemini", models: ["gemini-3.1-flash-image-preview", "gemini-2.5-flash-image"], strict: false },
|
|
37
|
+
{ provider: "openrouter", models: ["bytedance/seedance-2.0", "bytedance-seed/seedream-4.5"], strict: false, planned: true },
|
|
38
|
+
],
|
|
39
|
+
text: [
|
|
40
|
+
{ provider: "openai", models: ["gpt-5.4"], strict: false },
|
|
41
|
+
{ provider: "gemini", models: ["gemini-3.1-flash-lite", "gemini-2.5-flash-lite"], strict: false },
|
|
42
|
+
{ provider: "openrouter", models: ["qwen/qwen3.6-flash"], strict: false },
|
|
43
|
+
],
|
|
44
|
+
layout_analysis: [
|
|
45
|
+
{ provider: "openai", models: ["gpt-5.4"], strict: false },
|
|
46
|
+
{ provider: "gemini", models: ["gemini-3.1-flash-lite", "gemini-2.5-flash-lite"], strict: false },
|
|
47
|
+
{ provider: "openrouter", models: ["qwen/qwen3.6-flash"], strict: false },
|
|
48
|
+
],
|
|
49
|
+
video: [
|
|
50
|
+
{ provider: "openai", models: ["sora-2"], strict: false, planned: true },
|
|
51
|
+
{ provider: "gemini", models: ["veo-3.0-generate-001"], strict: false, planned: true },
|
|
52
|
+
],
|
|
53
|
+
};
|
|
54
|
+
void TEMPLATE_PROVIDER_REQUIREMENTS;
|
|
30
55
|
const legacySlideInputSchema = z.union([
|
|
31
56
|
z.tuple([z.string().min(3), z.string().min(1)]),
|
|
32
57
|
z.tuple([
|
|
@@ -132,29 +157,43 @@ export const template0000Definition = defineTemplate({
|
|
|
132
157
|
const payload = createSlideshowInputSchema.parse(input);
|
|
133
158
|
ctx.logger.progress(0.04, "Starting template_0000 slideshow image stage");
|
|
134
159
|
const provider = parseImageProvider(ctx.templateConfig.defaultProvider);
|
|
135
|
-
const
|
|
136
|
-
|
|
160
|
+
const configuredTextModel = typeof ctx.templateConfig.textModel === "string"
|
|
161
|
+
? ctx.templateConfig.textModel
|
|
162
|
+
: null;
|
|
163
|
+
const configuredImageModel = typeof ctx.templateConfig.imageModel === "string"
|
|
164
|
+
? ctx.templateConfig.imageModel
|
|
165
|
+
: null;
|
|
166
|
+
const textModel = String(configuredTextModel ?? defaultTextModelForProvider(provider));
|
|
167
|
+
const imageModel = String(configuredImageModel ?? defaultImageModelForProvider(provider));
|
|
137
168
|
const textStyle = resolveTextStyleSpec(ctx.templateConfig);
|
|
138
169
|
const slides = [];
|
|
139
170
|
for (const [index, rawSlide] of payload.slides.entries()) {
|
|
140
|
-
const { imagePrompt, imagePromptAttachments, overlayText, durationMs } = normalizeSlideInput(rawSlide);
|
|
141
|
-
ctx.logger.progress(0.08 + (index / payload.slides.length) * 0.42,
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
171
|
+
const { sourceType, sourceValue, imagePrompt, imagePromptAttachments, overlayText, durationMs, } = normalizeSlideInput(rawSlide);
|
|
172
|
+
ctx.logger.progress(0.08 + (index / payload.slides.length) * 0.42, sourceType === "image_source"
|
|
173
|
+
? `Loading slide ${index + 1} source image`
|
|
174
|
+
: `Generating slide ${index + 1} image`);
|
|
175
|
+
const image = sourceType === "image_source"
|
|
176
|
+
? await loadExistingSlideImage(sourceValue)
|
|
177
|
+
: await generatePortraitCandidateImage(ctx, {
|
|
178
|
+
provider,
|
|
179
|
+
imageModel,
|
|
180
|
+
configuredProvider: provider,
|
|
181
|
+
configuredImageModel,
|
|
182
|
+
imagePrompt,
|
|
183
|
+
imagePromptAttachments,
|
|
184
|
+
overlayText,
|
|
185
|
+
});
|
|
186
|
+
if (sourceType === "ai_prompt") {
|
|
187
|
+
await ctx.billing.record({
|
|
188
|
+
type: "ai_generation",
|
|
189
|
+
costUsd: 0.04,
|
|
190
|
+
metadata: {
|
|
191
|
+
stage: "image_generation",
|
|
192
|
+
slideIndex: index,
|
|
193
|
+
model: imageModel,
|
|
194
|
+
},
|
|
195
|
+
});
|
|
196
|
+
}
|
|
158
197
|
ctx.logger.progress(0.13 + (index / payload.slides.length) * 0.3, `Normalizing slide ${index + 1} to strict 9:16 portrait`);
|
|
159
198
|
const normalizedImage = await normalizeToPortraitFrame(image.bytes, FRAME);
|
|
160
199
|
const backgroundArtifact = await ctx.storage.putBuffer(`backgrounds/slide-${pad2(index + 1)}.png`, normalizedImage.bytes, {
|
|
@@ -192,6 +231,8 @@ export const template0000Definition = defineTemplate({
|
|
|
192
231
|
});
|
|
193
232
|
slides.push({
|
|
194
233
|
index,
|
|
234
|
+
sourceType,
|
|
235
|
+
sourceValue,
|
|
195
236
|
imagePrompt,
|
|
196
237
|
imagePromptAttachments,
|
|
197
238
|
overlayText,
|
|
@@ -207,6 +248,8 @@ export const template0000Definition = defineTemplate({
|
|
|
207
248
|
const metaDetails = await generateMetaDetails(ctx, {
|
|
208
249
|
provider,
|
|
209
250
|
textModel,
|
|
251
|
+
configuredProvider: provider,
|
|
252
|
+
configuredTextModel,
|
|
210
253
|
slides,
|
|
211
254
|
metaDetailsPrompt: payload.meta_details_prompt,
|
|
212
255
|
});
|
|
@@ -254,6 +297,8 @@ export const template0000Definition = defineTemplate({
|
|
|
254
297
|
meta_details_prompt: payload.meta_details_prompt ?? null,
|
|
255
298
|
slides: slides.map((slide) => ({
|
|
256
299
|
index: slide.index,
|
|
300
|
+
sourceType: slide.sourceType,
|
|
301
|
+
sourceValue: slide.sourceValue,
|
|
257
302
|
imagePrompt: slide.imagePrompt,
|
|
258
303
|
imagePromptAttachments: slide.imagePromptAttachments,
|
|
259
304
|
overlayText: slide.overlayText,
|
|
@@ -374,26 +419,43 @@ async function generatePortraitCandidateImage(ctx, input) {
|
|
|
374
419
|
].join("\n"),
|
|
375
420
|
];
|
|
376
421
|
let best = null;
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
prompt
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
422
|
+
let lastError = null;
|
|
423
|
+
for (const provider of providerFailoverOrder(input.provider)) {
|
|
424
|
+
const imageModel = resolveImageModelForAttempt(provider, input.configuredProvider, input.configuredImageModel);
|
|
425
|
+
try {
|
|
426
|
+
for (const prompt of attempts) {
|
|
427
|
+
const image = await ctx.providers.generateImage({
|
|
428
|
+
provider,
|
|
429
|
+
model: imageModel,
|
|
430
|
+
prompt,
|
|
431
|
+
promptAttachments: input.imagePromptAttachments,
|
|
432
|
+
size: sourceImageSizeForProvider(provider),
|
|
433
|
+
aspectRatio: "9:16",
|
|
434
|
+
imageSize: sourceImageOutputSizeForProvider(provider, imageModel),
|
|
435
|
+
});
|
|
436
|
+
const score = await portraitScore(image.bytes);
|
|
437
|
+
if (!best || score < best.score) {
|
|
438
|
+
best = { ...image, prompt, score };
|
|
439
|
+
}
|
|
440
|
+
if (score <= 0.16) {
|
|
441
|
+
return best;
|
|
442
|
+
}
|
|
443
|
+
}
|
|
444
|
+
if (best) {
|
|
445
|
+
return best;
|
|
446
|
+
}
|
|
390
447
|
}
|
|
391
|
-
|
|
392
|
-
|
|
448
|
+
catch (error) {
|
|
449
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
450
|
+
ctx.logger.warn("Image generation provider failed, trying next provider", {
|
|
451
|
+
provider,
|
|
452
|
+
model: imageModel,
|
|
453
|
+
message: lastError.message,
|
|
454
|
+
});
|
|
393
455
|
}
|
|
394
456
|
}
|
|
395
457
|
if (!best) {
|
|
396
|
-
throw new Error("No image candidate was generated.");
|
|
458
|
+
throw lastError ?? new Error("No image candidate was generated.");
|
|
397
459
|
}
|
|
398
460
|
return best;
|
|
399
461
|
}
|
|
@@ -419,7 +481,7 @@ function isImageProvider(value) {
|
|
|
419
481
|
}
|
|
420
482
|
function sourceImageSizeForProvider(provider) {
|
|
421
483
|
if (provider === "openai") {
|
|
422
|
-
return "
|
|
484
|
+
return "1024x1536";
|
|
423
485
|
}
|
|
424
486
|
return "1080x1920";
|
|
425
487
|
}
|
|
@@ -430,7 +492,7 @@ function defaultImageModelForProvider(provider) {
|
|
|
430
492
|
if (provider === "openrouter") {
|
|
431
493
|
return "bytedance-seed/seedream-4.5";
|
|
432
494
|
}
|
|
433
|
-
return "gemini-
|
|
495
|
+
return "gemini-2.5-flash-image";
|
|
434
496
|
}
|
|
435
497
|
function defaultTextModelForProvider(provider) {
|
|
436
498
|
if (provider === "openai") {
|
|
@@ -448,12 +510,29 @@ function sourceImageOutputSizeForProvider(provider, model) {
|
|
|
448
510
|
if (provider !== "gemini") {
|
|
449
511
|
return undefined;
|
|
450
512
|
}
|
|
451
|
-
if (model === "gemini-3
|
|
452
|
-
model === "gemini-3-pro-image-preview") {
|
|
513
|
+
if (model === "gemini-3-pro-image-preview") {
|
|
453
514
|
return "1K";
|
|
454
515
|
}
|
|
455
516
|
return undefined;
|
|
456
517
|
}
|
|
518
|
+
function providerFailoverOrder(primary) {
|
|
519
|
+
return [
|
|
520
|
+
primary,
|
|
521
|
+
...supportedImageProviders.filter((provider) => provider !== primary),
|
|
522
|
+
];
|
|
523
|
+
}
|
|
524
|
+
function resolveImageModelForAttempt(provider, configuredProvider, configuredImageModel) {
|
|
525
|
+
if (provider === configuredProvider && configuredImageModel) {
|
|
526
|
+
return configuredImageModel;
|
|
527
|
+
}
|
|
528
|
+
return defaultImageModelForProvider(provider);
|
|
529
|
+
}
|
|
530
|
+
function resolveTextModelForAttempt(provider, configuredProvider, configuredTextModel) {
|
|
531
|
+
if (provider === configuredProvider && configuredTextModel) {
|
|
532
|
+
return configuredTextModel;
|
|
533
|
+
}
|
|
534
|
+
return defaultTextModelForProvider(provider);
|
|
535
|
+
}
|
|
457
536
|
function resolveTextStyleSpec(config) {
|
|
458
537
|
const fontId = isTemplateFontId(config.captionFont)
|
|
459
538
|
? config.captionFont
|
|
@@ -532,6 +611,8 @@ const slideshowManifestSchema = z.object({
|
|
|
532
611
|
}),
|
|
533
612
|
slides: z.array(z.object({
|
|
534
613
|
index: z.number(),
|
|
614
|
+
sourceType: z.enum(["ai_prompt", "image_source"]),
|
|
615
|
+
sourceValue: z.string(),
|
|
535
616
|
imagePrompt: z.string(),
|
|
536
617
|
imagePromptAttachments: z.array(z.string().url()),
|
|
537
618
|
overlayText: z.string(),
|
|
@@ -552,29 +633,154 @@ const slideshowManifestSchema = z.object({
|
|
|
552
633
|
});
|
|
553
634
|
function normalizeSlideInput(input) {
|
|
554
635
|
if (Array.isArray(input)) {
|
|
555
|
-
return {
|
|
556
|
-
|
|
636
|
+
return normalizeSlideFields({
|
|
637
|
+
primarySource: input[0],
|
|
557
638
|
imagePromptAttachments: [],
|
|
558
639
|
overlayText: input[1],
|
|
559
640
|
durationMs: input[2] ?? 4000,
|
|
560
|
-
};
|
|
641
|
+
});
|
|
561
642
|
}
|
|
562
|
-
return {
|
|
563
|
-
|
|
643
|
+
return normalizeSlideFields({
|
|
644
|
+
primarySource: input.image_prompt,
|
|
564
645
|
imagePromptAttachments: input.image_prompt_attachments,
|
|
565
646
|
overlayText: input.caption,
|
|
566
647
|
durationMs: input.duration_ms,
|
|
648
|
+
});
|
|
649
|
+
}
|
|
650
|
+
function normalizeSlideFields(input) {
|
|
651
|
+
const sourceValue = input.primarySource.trim();
|
|
652
|
+
const existingImageSource = resolveExistingImageSource(sourceValue);
|
|
653
|
+
return {
|
|
654
|
+
sourceType: existingImageSource ? "image_source" : "ai_prompt",
|
|
655
|
+
sourceValue: existingImageSource ?? sourceValue,
|
|
656
|
+
imagePrompt: sourceValue,
|
|
657
|
+
imagePromptAttachments: input.imagePromptAttachments,
|
|
658
|
+
overlayText: input.overlayText,
|
|
659
|
+
durationMs: input.durationMs,
|
|
567
660
|
};
|
|
568
661
|
}
|
|
662
|
+
async function loadExistingSlideImage(sourceValue) {
|
|
663
|
+
const resolved = resolveExistingImageSource(sourceValue);
|
|
664
|
+
if (!resolved) {
|
|
665
|
+
throw new Error(`Slide source is not a supported image URL or file path: ${sourceValue}`);
|
|
666
|
+
}
|
|
667
|
+
const bytes = isLocalFileSource(resolved)
|
|
668
|
+
? readFileSync(resolveLocalFileSourcePath(resolved))
|
|
669
|
+
: await fetchExternalImageBytes(resolved);
|
|
670
|
+
const contentType = await detectImageContentType(bytes);
|
|
671
|
+
return {
|
|
672
|
+
bytes,
|
|
673
|
+
contentType,
|
|
674
|
+
revisedPrompt: null,
|
|
675
|
+
prompt: sourceValue,
|
|
676
|
+
};
|
|
677
|
+
}
|
|
678
|
+
function resolveExistingImageSource(value) {
|
|
679
|
+
const trimmed = value.trim();
|
|
680
|
+
if (!trimmed) {
|
|
681
|
+
return null;
|
|
682
|
+
}
|
|
683
|
+
const parsedUrl = parseUrl(trimmed);
|
|
684
|
+
if (parsedUrl &&
|
|
685
|
+
["http:", "https:", "file:", "data:"].includes(parsedUrl.protocol)) {
|
|
686
|
+
return trimmed;
|
|
687
|
+
}
|
|
688
|
+
const localPath = resolveLocalPathIfPresent(trimmed);
|
|
689
|
+
if (localPath) {
|
|
690
|
+
return localPath;
|
|
691
|
+
}
|
|
692
|
+
return null;
|
|
693
|
+
}
|
|
694
|
+
function parseUrl(value) {
|
|
695
|
+
try {
|
|
696
|
+
return new URL(value);
|
|
697
|
+
}
|
|
698
|
+
catch {
|
|
699
|
+
return null;
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
function resolveLocalPathIfPresent(value) {
|
|
703
|
+
const expandedPath = value.startsWith("~/")
|
|
704
|
+
? path.join(process.env.HOME ?? "", value.slice(2))
|
|
705
|
+
: value;
|
|
706
|
+
const candidate = path.resolve(expandedPath);
|
|
707
|
+
return existsSync(candidate) ? candidate : null;
|
|
708
|
+
}
|
|
709
|
+
function isLocalFileSource(sourceValue) {
|
|
710
|
+
const parsedUrl = parseUrl(sourceValue);
|
|
711
|
+
return !parsedUrl || parsedUrl.protocol === "file:";
|
|
712
|
+
}
|
|
713
|
+
function resolveLocalFileSourcePath(sourceValue) {
|
|
714
|
+
const parsedUrl = parseUrl(sourceValue);
|
|
715
|
+
return parsedUrl?.protocol === "file:"
|
|
716
|
+
? fileURLToPath(parsedUrl)
|
|
717
|
+
: path.resolve(sourceValue);
|
|
718
|
+
}
|
|
719
|
+
async function fetchExternalImageBytes(sourceValue) {
|
|
720
|
+
const response = await fetch(sourceValue);
|
|
721
|
+
if (!response.ok) {
|
|
722
|
+
throw new Error(`Could not fetch slide source image: ${response.status} ${response.statusText}`);
|
|
723
|
+
}
|
|
724
|
+
return new Uint8Array(await response.arrayBuffer());
|
|
725
|
+
}
|
|
726
|
+
async function detectImageContentType(bytes) {
|
|
727
|
+
try {
|
|
728
|
+
const metadata = await sharp(bytes).metadata();
|
|
729
|
+
if (metadata.format) {
|
|
730
|
+
return contentTypeForSharpFormat(metadata.format);
|
|
731
|
+
}
|
|
732
|
+
}
|
|
733
|
+
catch (error) {
|
|
734
|
+
throw new Error(`Slide source could not be decoded as an image: ${error instanceof Error ? error.message : String(error)}`);
|
|
735
|
+
}
|
|
736
|
+
return "image/png";
|
|
737
|
+
}
|
|
738
|
+
function contentTypeForSharpFormat(format) {
|
|
739
|
+
switch (format) {
|
|
740
|
+
case "jpeg":
|
|
741
|
+
return "image/jpeg";
|
|
742
|
+
case "png":
|
|
743
|
+
return "image/png";
|
|
744
|
+
case "webp":
|
|
745
|
+
return "image/webp";
|
|
746
|
+
case "gif":
|
|
747
|
+
return "image/gif";
|
|
748
|
+
case "avif":
|
|
749
|
+
return "image/avif";
|
|
750
|
+
case "tiff":
|
|
751
|
+
return "image/tiff";
|
|
752
|
+
case "svg":
|
|
753
|
+
return "image/svg+xml";
|
|
754
|
+
case "heif":
|
|
755
|
+
return "image/heif";
|
|
756
|
+
default:
|
|
757
|
+
return `image/${format}`;
|
|
758
|
+
}
|
|
759
|
+
}
|
|
569
760
|
async function generateMetaDetails(ctx, input) {
|
|
570
761
|
const prompt = buildMetaDetailsPrompt(input.slides, input.metaDetailsPrompt);
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
762
|
+
let lastError = null;
|
|
763
|
+
for (const provider of providerFailoverOrder(input.provider)) {
|
|
764
|
+
const textModel = resolveTextModelForAttempt(provider, input.configuredProvider, input.configuredTextModel);
|
|
765
|
+
try {
|
|
766
|
+
const response = await ctx.providers.generateText({
|
|
767
|
+
provider,
|
|
768
|
+
model: textModel,
|
|
769
|
+
prompt,
|
|
770
|
+
temperature: 0.7,
|
|
771
|
+
});
|
|
772
|
+
return parseMetaDetailsResponse(response.text, input.slides);
|
|
773
|
+
}
|
|
774
|
+
catch (error) {
|
|
775
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
776
|
+
ctx.logger.warn("Metadata provider failed, trying next provider", {
|
|
777
|
+
provider,
|
|
778
|
+
model: textModel,
|
|
779
|
+
message: lastError.message,
|
|
780
|
+
});
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
throw lastError ?? new Error("No metadata provider succeeded.");
|
|
578
784
|
}
|
|
579
785
|
function buildMetaDetailsPrompt(slides, metaDetailsPrompt) {
|
|
580
786
|
return [
|
|
@@ -872,7 +1078,8 @@ async function portraitScore(input) {
|
|
|
872
1078
|
.raw()
|
|
873
1079
|
.toBuffer();
|
|
874
1080
|
const edgeContrast = edgeBandContrast(sample, thumbnailWidth, thumbnailHeight);
|
|
875
|
-
|
|
1081
|
+
const edgeFlatnessPenalty = edgeBandFlatnessPenalty(sample, thumbnailWidth, thumbnailHeight);
|
|
1082
|
+
return Math.abs(aspect - 9 / 16) + edgeContrast + edgeFlatnessPenalty;
|
|
876
1083
|
}
|
|
877
1084
|
async function chooseLayoutFromImage(imageBytes, overlayText) {
|
|
878
1085
|
const thumbnailWidth = 108;
|
|
@@ -1066,6 +1273,23 @@ function edgeBandContrast(sample, width, height) {
|
|
|
1066
1273
|
const bottom = bandActivity(sample, width, height, height - bandHeight, height);
|
|
1067
1274
|
return (top + bottom) / 220;
|
|
1068
1275
|
}
|
|
1276
|
+
function edgeBandFlatnessPenalty(sample, width, height) {
|
|
1277
|
+
const bandHeight = Math.max(8, Math.floor(height * 0.16));
|
|
1278
|
+
const centerStart = Math.max(0, Math.floor(height * 0.34));
|
|
1279
|
+
const centerEnd = Math.min(height, Math.ceil(height * 0.66));
|
|
1280
|
+
const top = bandActivity(sample, width, height, 0, bandHeight);
|
|
1281
|
+
const bottom = bandActivity(sample, width, height, height - bandHeight, height);
|
|
1282
|
+
const center = bandActivity(sample, width, height, centerStart, centerEnd);
|
|
1283
|
+
const edgeAverage = (top + bottom) / 2;
|
|
1284
|
+
if (center <= 0) {
|
|
1285
|
+
return 0;
|
|
1286
|
+
}
|
|
1287
|
+
const ratio = edgeAverage / center;
|
|
1288
|
+
if (ratio >= 0.72) {
|
|
1289
|
+
return 0;
|
|
1290
|
+
}
|
|
1291
|
+
return (0.72 - ratio) * 3.4;
|
|
1292
|
+
}
|
|
1069
1293
|
function bandActivity(sample, width, height, startY, endY) {
|
|
1070
1294
|
let detail = 0;
|
|
1071
1295
|
let count = 0;
|