@agentmedia/schema 0.2.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/.memory/cursor.json +3 -0
  2. package/.memory/memories.json +1 -0
  3. package/.memory/project.json +5 -0
  4. package/CLAUDE.md +7 -0
  5. package/LICENSE +199 -0
  6. package/README.md +2 -2
  7. package/dist/__tests__/character-pipeline.test.d.ts +2 -0
  8. package/dist/__tests__/character-pipeline.test.d.ts.map +1 -0
  9. package/dist/__tests__/character-pipeline.test.js +296 -0
  10. package/dist/__tests__/character-pipeline.test.js.map +1 -0
  11. package/dist/__tests__/parity.test.js +7 -0
  12. package/dist/__tests__/parity.test.js.map +1 -1
  13. package/dist/__tests__/text-to-video.test.d.ts +2 -0
  14. package/dist/__tests__/text-to-video.test.d.ts.map +1 -0
  15. package/dist/__tests__/text-to-video.test.js +67 -0
  16. package/dist/__tests__/text-to-video.test.js.map +1 -0
  17. package/dist/generators.d.ts +519 -7
  18. package/dist/generators.d.ts.map +1 -1
  19. package/dist/generators.js +24 -3
  20. package/dist/generators.js.map +1 -1
  21. package/dist/v2/character.d.ts +32 -0
  22. package/dist/v2/character.d.ts.map +1 -0
  23. package/dist/v2/character.js +29 -0
  24. package/dist/v2/character.js.map +1 -0
  25. package/dist/v2/generators.d.ts +69 -0
  26. package/dist/v2/generators.d.ts.map +1 -0
  27. package/dist/v2/generators.js +105 -0
  28. package/dist/v2/generators.js.map +1 -0
  29. package/dist/v2/index.d.ts +13 -0
  30. package/dist/v2/index.d.ts.map +1 -0
  31. package/dist/v2/index.js +14 -0
  32. package/dist/v2/index.js.map +1 -0
  33. package/dist/v2/selfie.d.ts +78 -0
  34. package/dist/v2/selfie.d.ts.map +1 -0
  35. package/dist/v2/selfie.js +87 -0
  36. package/dist/v2/selfie.js.map +1 -0
  37. package/dist/v2/subtitle.d.ts +31 -0
  38. package/dist/v2/subtitle.d.ts.map +1 -0
  39. package/dist/v2/subtitle.js +53 -0
  40. package/dist/v2/subtitle.js.map +1 -0
  41. package/dist/video.d.ts +628 -6
  42. package/dist/video.d.ts.map +1 -1
  43. package/dist/video.js +164 -4
  44. package/dist/video.js.map +1 -1
  45. package/package.json +36 -16
  46. package/scripts/generate-openapi.ts +87 -38
  47. package/scripts/generate-v2-docs.ts +328 -0
  48. package/src/__tests__/character-pipeline.test.ts +356 -0
  49. package/src/__tests__/parity.test.ts +8 -0
  50. package/src/__tests__/text-to-video.test.ts +79 -0
  51. package/src/generators.ts +29 -2
  52. package/src/v2/character.ts +39 -0
  53. package/src/v2/generators.ts +186 -0
  54. package/src/v2/index.ts +15 -0
  55. package/src/v2/selfie.ts +103 -0
  56. package/src/v2/subtitle.ts +62 -0
  57. package/src/video.ts +259 -5
@@ -27,6 +27,7 @@ import {
27
27
  PIP_ANIMATIONS,
28
28
  PIP_FRAME_STYLES,
29
29
  } from '../video.js';
30
+ import { GENERATOR_IDS, GENERATORS } from '../generators.js';
30
31
 
31
32
  // ── Helper: check if Zod accepts a body ─────────────────────────────────────
32
33
 
@@ -327,6 +328,13 @@ describe('Parity: pip_options', () => {
327
328
  // ── Enum completeness: verify schema arrays match production ────────────────
328
329
 
329
330
  describe('Enum completeness: values match production', () => {
331
+ it('promotes saas_review and keeps product_review as an undocumented legacy alias', () => {
332
+ expect(GENERATOR_IDS).toContain('saas_review');
333
+ expect(GENERATOR_IDS).not.toContain('product_review');
334
+ expect(GENERATORS.product_review.legacy).toBe(true);
335
+ expect(GENERATORS.product_review.inputSchema).toBe(GENERATORS.saas_review.inputSchema);
336
+ });
337
+
330
338
  it('DURATIONS matches production', () => {
331
339
  expect([...DURATIONS]).toEqual([5, 10, 15]);
332
340
  });
@@ -0,0 +1,79 @@
1
+ // Copyright 2026 agent-media contributors. Apache-2.0 license.
2
+
3
+ /**
4
+ * Validation tests for TextToVideoSchema — the pure-prompt generator
5
+ * used by the "Use prompts" batch mode and the one-shot
6
+ * POST /v1/generate/text_to_video endpoint.
7
+ *
8
+ * The schema is intentionally simple: prompt (20-1000 chars), duration
9
+ * (5/10/15), aspect_ratio (9:16/16:9/1:1), generate_audio (default true).
10
+ */
11
+
12
+ import { describe, it, expect } from 'vitest';
13
+ import { TextToVideoSchema, TEXT_TO_VIDEO_DURATIONS, TEXT_TO_VIDEO_RATIOS } from '../video.js';
14
+
15
+ const VALID_PROMPT = 'Handcrafted stylized stop-motion aesthetic with miniature practical sets, animation on 2s, warm magical lighting.';
16
+
17
+ describe('TextToVideoSchema', () => {
18
+ it('accepts minimum valid input (prompt only — defaults fill in)', () => {
19
+ const r = TextToVideoSchema.safeParse({ prompt: VALID_PROMPT });
20
+ expect(r.success).toBe(true);
21
+ if (r.success) {
22
+ expect(r.data.duration).toBe(10);
23
+ expect(r.data.aspect_ratio).toBe('9:16');
24
+ expect(r.data.generate_audio).toBe(true);
25
+ }
26
+ });
27
+
28
+ it('rejects prompt shorter than 20 characters', () => {
29
+ const r = TextToVideoSchema.safeParse({ prompt: 'too short' });
30
+ expect(r.success).toBe(false);
31
+ });
32
+
33
+ it('rejects prompt longer than 1000 characters', () => {
34
+ const r = TextToVideoSchema.safeParse({ prompt: 'A'.repeat(1001) });
35
+ expect(r.success).toBe(false);
36
+ });
37
+
38
+ it('accepts each supported duration', () => {
39
+ for (const d of TEXT_TO_VIDEO_DURATIONS) {
40
+ const r = TextToVideoSchema.safeParse({ prompt: VALID_PROMPT, duration: d });
41
+ expect(r.success).toBe(true);
42
+ }
43
+ });
44
+
45
+ it('rejects duration outside the allowed set', () => {
46
+ const r = TextToVideoSchema.safeParse({ prompt: VALID_PROMPT, duration: 7 });
47
+ expect(r.success).toBe(false);
48
+ });
49
+
50
+ it('accepts each supported aspect_ratio', () => {
51
+ for (const a of TEXT_TO_VIDEO_RATIOS) {
52
+ const r = TextToVideoSchema.safeParse({ prompt: VALID_PROMPT, aspect_ratio: a });
53
+ expect(r.success).toBe(true);
54
+ }
55
+ });
56
+
57
+ it('rejects unsupported aspect_ratio', () => {
58
+ const r = TextToVideoSchema.safeParse({ prompt: VALID_PROMPT, aspect_ratio: '21:9' });
59
+ expect(r.success).toBe(false);
60
+ });
61
+
62
+ it('accepts generate_audio=false', () => {
63
+ const r = TextToVideoSchema.safeParse({ prompt: VALID_PROMPT, generate_audio: false });
64
+ expect(r.success).toBe(true);
65
+ });
66
+
67
+ it('accepts a webhook_url that is HTTPS', () => {
68
+ const r = TextToVideoSchema.safeParse({
69
+ prompt: VALID_PROMPT,
70
+ webhook_url: 'https://example.com/hook',
71
+ });
72
+ expect(r.success).toBe(true);
73
+ });
74
+
75
+ it('rejects an invalid webhook_url', () => {
76
+ const r = TextToVideoSchema.safeParse({ prompt: VALID_PROMPT, webhook_url: 'not-a-url' });
77
+ expect(r.success).toBe(false);
78
+ });
79
+ });
package/src/generators.ts CHANGED
@@ -9,11 +9,15 @@
9
9
  */
10
10
 
11
11
  import {
12
+ CharacterVideoSchema,
12
13
  CreateVideoSchema,
14
+ LaptopUgcSchema,
13
15
  ProductActingSchema,
14
16
  ProductReviewSchema,
17
+ SaasReviewSchema,
15
18
  ShowYourAppSchema,
16
19
  SubtitleSchema,
20
+ TextToVideoSchema,
17
21
  } from './video.js';
18
22
 
19
23
  export const GENERATORS = {
@@ -22,10 +26,16 @@ export const GENERATORS = {
22
26
  inputSchema: CreateVideoSchema,
23
27
  output: 'video_url' as const,
24
28
  },
29
+ saas_review: {
30
+ description: 'Generate a SaaS review video from a SaaS product URL',
31
+ inputSchema: SaasReviewSchema,
32
+ output: 'video_url' as const,
33
+ },
25
34
  product_review: {
26
- description: 'Generate a product review video from a product URL',
35
+ description: 'Legacy alias for SaaS Review video generation',
27
36
  inputSchema: ProductReviewSchema,
28
37
  output: 'video_url' as const,
38
+ legacy: true,
29
39
  },
30
40
  subtitle: {
31
41
  description: 'Add styled subtitles to an existing video',
@@ -42,8 +52,25 @@ export const GENERATORS = {
42
52
  inputSchema: ProductActingSchema,
43
53
  output: 'video_url' as const,
44
54
  },
55
+ laptop_ugc: {
56
+ description: 'Generate a 3-scene laptop-UGC ad: actor holds laptop showing your app, scrolling B-roll, then a face-only selfie close',
57
+ inputSchema: LaptopUgcSchema,
58
+ output: 'video_url' as const,
59
+ },
60
+ character_video: {
61
+ description: 'Generate a video of a character. Pick an actor by slug, OR pass a short description and we generate a character sheet via gpt-image-2 behind the scenes. Then Seedance 2.0 animates the reference image with your storyboard text.',
62
+ inputSchema: CharacterVideoSchema,
63
+ output: 'video_url' as const,
64
+ },
65
+ text_to_video: {
66
+ description: 'Pure text-to-video via Seedance 2.0 — no character, no storyboard, no actor. The prompt IS the whole creative (style, subject, mood, composition). Best for stylistic / scene-driven content where you want the model to invent everything from the prompt text alone.',
67
+ inputSchema: TextToVideoSchema,
68
+ output: 'video_url' as const,
69
+ },
45
70
  } as const;
46
71
 
47
72
  export type GeneratorId = keyof typeof GENERATORS;
48
73
 
49
- export const GENERATOR_IDS = Object.keys(GENERATORS) as GeneratorId[];
74
+ export const GENERATOR_IDS = Object.keys(GENERATORS).filter(
75
+ (id) => !(GENERATORS[id as GeneratorId] as { legacy?: boolean }).legacy,
76
+ ) as GeneratorId[];
@@ -0,0 +1,39 @@
1
+ // Copyright 2026 agent-media contributors. Apache-2.0 license.
2
+
3
+ /**
4
+ * v2 · Character create input schema.
5
+ *
6
+ * Persists a reusable AI character. The user uploads a single photo;
7
+ * we generate a portrait + multi-pose character sheet (gpt-image-2),
8
+ * pin a Seedance seed, store everything in `user_characters`.
9
+ *
10
+ * Returns: { character_id: "char_xxxxxxxxxx" } that the user can pass
11
+ * to any v2 video generator (Selfie, then Product-in-hands, etc).
12
+ */
13
+
14
+ import { z } from 'zod';
15
+ import { V2_SHOT_PRESETS } from './selfie.js';
16
+
17
+ export const CharacterCreateSchema = z.object({
18
+ // Source photo — single file, frontal-ish, daylight ideally.
19
+ photo_url: z.string().url(),
20
+
21
+ // Required identity
22
+ display_name: z
23
+ .string()
24
+ .trim()
25
+ .min(2)
26
+ .max(40)
27
+ .regex(
28
+ /^[A-Za-z0-9 _-]+$/,
29
+ 'display_name may only contain letters, digits, spaces, underscores, or hyphens',
30
+ ),
31
+
32
+ description: z.string().min(8).max(400),
33
+
34
+ // Optional — defaults applied per-character if missing
35
+ voice_brief: z.string().min(4).max(240).optional(),
36
+ preset_default: z.enum(V2_SHOT_PRESETS).optional(),
37
+ });
38
+
39
+ export type CharacterCreateInput = z.infer<typeof CharacterCreateSchema>;
@@ -0,0 +1,186 @@
1
+ // Copyright 2026 agent-media contributors. Apache-2.0 license.
2
+
3
+ /**
4
+ * v2 · Generator registry.
5
+ *
6
+ * Greenfield registry for the v2 product line. Lives alongside the
7
+ * legacy `GENERATORS` export in ../generators.ts but in its own file
8
+ * with a richer record shape — CLI / MCP / REST / pricing metadata
9
+ * baked in so the SDK, CLI, MCP server, docs and SKILL.md can all
10
+ * derive themselves from this one source.
11
+ *
12
+ * Old code keeps reading `GENERATORS` from ../generators.ts. New code
13
+ * imports `V2_GENERATORS` from here. Two surfaces, zero shared state.
14
+ *
15
+ * The new website + docs + SKILL.md only see v2.
16
+ */
17
+
18
+ import type { z } from 'zod';
19
+ import { SelfieSchema } from './selfie.js';
20
+ import { CharacterCreateSchema } from './character.js';
21
+ import { SubtitleSchema } from './subtitle.js';
22
+
23
+ // ── Record shape ──────────────────────────────────────────────────────────
24
+
25
+ /**
26
+ * Lifecycle. Two states only — by design.
27
+ * `stable`: fully exposed, no warnings.
28
+ * `beta`: exposed, SDK/CLI print a one-time warning, MCP tool
29
+ * description starts with "[beta]".
30
+ *
31
+ * Retired ops are deleted from the registry, not deprecated. There's
32
+ * no `deprecated` tier because old code stays in its own place.
33
+ */
34
+ export type V2Status = 'stable' | 'beta';
35
+
36
+ export interface V2GeneratorRecord {
37
+ id: string;
38
+ status: V2Status;
39
+
40
+ summary: string; // single line, used in CLI help and tool listings
41
+ description: string; // multi-line, used in docs and MCP tool registration
42
+
43
+ /** Zod schema. The contract. */
44
+ inputSchema: z.ZodTypeAny;
45
+
46
+ /** What the generator returns. Affects how the SDK types the response. */
47
+ output: 'video_url' | 'character_id' | 'subtitled_video_url';
48
+
49
+ /** CLI surface. Omit to hide from the CLI. */
50
+ cli?: {
51
+ command: string; // e.g. "selfie", "character create"
52
+ fileFields?: string[]; // input fields that take --foo file.png and need upload coercion
53
+ examples?: string[];
54
+ };
55
+
56
+ /** MCP surface. Omit to hide from the MCP server. */
57
+ mcp?: {
58
+ toolName: string; // e.g. "create_selfie"
59
+ };
60
+
61
+ /** REST surface. Omit to hide from the public API. */
62
+ rest?: {
63
+ method: 'POST' | 'GET';
64
+ path: string; // e.g. "/v2/selfie"
65
+ };
66
+
67
+ /** Pricing. Single source so dashboard + CLI quoting + webhook agree. */
68
+ pricing?:
69
+ | { basis: 'per_clip'; baseCredits: number; perSecondCredits: number }
70
+ | { basis: 'one_shot'; baseCredits: number };
71
+ }
72
+
73
+ // ── The registry ──────────────────────────────────────────────────────────
74
+
75
+ export const V2_GENERATORS: Record<string, V2GeneratorRecord> = {
76
+ selfie: {
77
+ id: 'selfie',
78
+ status: 'stable',
79
+ summary: 'AI person talking to camera, handheld iPhone-style.',
80
+ description:
81
+ 'Generate a 9:16 vertical TikTok-style selfie clip. Pick a saved character (--character) ' +
82
+ 'OR pass a photo + description inline. The pipeline composes a portrait → multi-pose ' +
83
+ 'character sheet → per-scene wireframe, then Seedance 2.0 animates the scene with native ' +
84
+ 'audio. Output: an mp4 hosted on R2.',
85
+ inputSchema: SelfieSchema,
86
+ output: 'video_url',
87
+
88
+ cli: {
89
+ command: 'selfie',
90
+ fileFields: ['photo_url'],
91
+ examples: [
92
+ 'agent-media selfie --character char_8x2vqp --script "..."',
93
+ 'agent-media selfie --photo me.png --description "25, ..." --script "..."',
94
+ ],
95
+ },
96
+ mcp: { toolName: 'create_selfie' },
97
+ rest: { method: 'POST', path: '/v2/selfie' },
98
+
99
+ // Cost math (list price, 70% margin floor):
100
+ // 5s = $0.63 cost → 210 credits ($2.10)
101
+ // 8s = $0.93 cost → 310 credits ($3.10)
102
+ // 12s = $1.33 cost → 445 credits ($4.45)
103
+ // 15s = $1.63 cost → 545 credits ($5.45)
104
+ // Linear in duration after a fixed prelude (~$0.13 of gpt-image-2 + R2)
105
+ // so we express as base + per-second.
106
+ pricing: { basis: 'per_clip', baseCredits: 75, perSecondCredits: 30 },
107
+ },
108
+
109
+ character_create: {
110
+ id: 'character_create',
111
+ status: 'stable',
112
+ summary: 'Create a reusable AI character from a single photo.',
113
+ description:
114
+ 'Persists a character so subsequent video calls can reference it by id. Two gpt-image-2 ' +
115
+ 'calls (portrait + multi-pose character sheet) are made at create time and cached in R2. ' +
116
+ 'A pinned Seedance seed is stored on the row. Returns: { character_id }.',
117
+ inputSchema: CharacterCreateSchema,
118
+ output: 'character_id',
119
+
120
+ cli: {
121
+ command: 'character create',
122
+ fileFields: ['photo_url'],
123
+ examples: [
124
+ 'agent-media character create --photo X.png --name "sofia" --description "..."',
125
+ ],
126
+ },
127
+ mcp: { toolName: 'create_character' },
128
+ rest: { method: 'POST', path: '/v2/characters' },
129
+
130
+ // ~$0.08 our cost → 27 credits at 70% margin.
131
+ pricing: { basis: 'one_shot', baseCredits: 27 },
132
+ },
133
+
134
+ subtitle: {
135
+ id: 'subtitle',
136
+ status: 'stable',
137
+ summary: 'Burn styled subtitles onto an existing video.',
138
+ description:
139
+ 'Downloads the source video, transcribes it with Whisper (or accepts a caller-supplied ' +
140
+ 'transcript), generates an ASS subtitle file in the chosen style (Hormozi by default; ' +
141
+ '17 styles available), and burns the subs into a new mp4 via ffmpeg. Output: a new ' +
142
+ 'mp4 URL on R2. Source video is fetched once and discarded.',
143
+ inputSchema: SubtitleSchema,
144
+ output: 'video_url',
145
+
146
+ cli: {
147
+ // Command is `subs` (not `subtitle`) because `agent-media subtitle`
148
+ // is already registered by the legacy CLI surface. Both routes
149
+ // live in parallel: the legacy `subtitle` command calls the v1
150
+ // worker via /v1/generate/subtitle, the new `subs` command calls
151
+ // the v2 worker via /v2/subtitle.
152
+ command: 'subs',
153
+ examples: [
154
+ 'agent-media subs --video https://r2/clip.mp4 --style hormozi',
155
+ 'agent-media subs --video https://r2/clip.mp4 --transcript "exact script text" --style neon',
156
+ ],
157
+ },
158
+ mcp: { toolName: 'create_subtitle' },
159
+ rest: { method: 'POST', path: '/v2/subtitle' },
160
+
161
+ // Cost basis:
162
+ // Whisper: $0.006 / minute of audio → trivial at < 60s
163
+ // ffmpeg compute: a few cents at most for a short clip
164
+ // R2 download + re-upload: rounding error
165
+ // Realistic our-cost for an 8s clip: < $0.01. For a 60s clip: ~$0.02.
166
+ // 70% margin floor → priced generously at 3 credits/sec so a casual
167
+ // 8s subtitle is 24 credits ($0.24) — small enough that users don't
168
+ // think twice.
169
+ pricing: { basis: 'per_clip', baseCredits: 0, perSecondCredits: 3 },
170
+ },
171
+ } as const;
172
+
173
+ export type V2GeneratorId = keyof typeof V2_GENERATORS;
174
+
175
+ export const V2_GENERATOR_IDS = Object.keys(V2_GENERATORS) as V2GeneratorId[];
176
+
177
+ /**
178
+ * Pricing helper. Single source for CLI quoting, webhook, dashboard.
179
+ */
180
+ export function quoteV2Credits(id: V2GeneratorId, opts: { durationSeconds?: number } = {}): number {
181
+ const def = V2_GENERATORS[id];
182
+ if (!def?.pricing) return 0;
183
+ if (def.pricing.basis === 'one_shot') return def.pricing.baseCredits;
184
+ const seconds = opts.durationSeconds ?? 8;
185
+ return def.pricing.baseCredits + def.pricing.perSecondCredits * seconds;
186
+ }
@@ -0,0 +1,15 @@
1
+ // Copyright 2026 agent-media contributors. Apache-2.0 license.
2
+
3
+ /**
4
+ * @agent-media/schema/v2 — v2 product line surface.
5
+ *
6
+ * Old code keeps importing from `@agent-media/schema`. New code
7
+ * (sdk-ts/v2, sdk-python.v2, MCP loop, CLI v2 commands, api-v2
8
+ * /v2/* routes, new dashboard, new docs, new SKILL.md) imports
9
+ * exclusively from `@agent-media/schema/v2`.
10
+ */
11
+
12
+ export * from './selfie.js';
13
+ export * from './character.js';
14
+ export * from './subtitle.js';
15
+ export * from './generators.js';
@@ -0,0 +1,103 @@
1
+ // Copyright 2026 agent-media contributors. Apache-2.0 license.
2
+
3
+ /**
4
+ * v2 · Selfie input schema.
5
+ *
6
+ * The v1 Selfie product. Generates a 9:16 vertical TikTok-style clip
7
+ * of an AI person talking to camera in a chosen "shot grammar" preset.
8
+ *
9
+ * Validated end-to-end by the 4-stage pipeline (gpt-image-2 portrait
10
+ * → sheet → wireframe → Seedance 2.0 ref-to-video), see
11
+ * services/media-worker-v2/src/v2/selfie-pipeline.js.
12
+ *
13
+ * Two character paths:
14
+ * - Bring-your-own: pass `photo_url` + `description` (we synthesize
15
+ * a portrait + sheet on the fly, throwaway).
16
+ * - Saved character: pass `character_id` (we load the persisted
17
+ * portrait + sheet + pinned seed + voice brief from the DB).
18
+ *
19
+ * One of `photo_url + description` OR `character_id` is required.
20
+ */
21
+
22
+ import { z } from 'zod';
23
+
24
+ // ── Shot-grammar presets (locked v1 list) ────────────────────────────────
25
+ export const V2_SHOT_PRESETS = [
26
+ 'bedroom-morning-ritual',
27
+ 'getting-ready-mirror-edge',
28
+ 'bathroom-skincare-routine',
29
+ 'bedside-lamp-evening',
30
+ 'kitchen-glow-up',
31
+ 'backyard-morning-coffee',
32
+ 'picnic-blanket-outdoor',
33
+ 'car-quick-honest-review',
34
+ 'car-passenger-honest',
35
+ 'outdoor-walking-talking',
36
+ 'couch-haul-show-off',
37
+ 'closet-fit-check',
38
+ 'studio-apartment-tour',
39
+ 'balcony-evening-vibes',
40
+ 'desk-wfh-quick-pitch',
41
+ 'cafe-window-seat',
42
+ 'office-bathroom-discreet',
43
+ 'gym-post-workout',
44
+ 'salon-mirror-result',
45
+ 'travel-hotel-room-review',
46
+ ] as const;
47
+ export type V2ShotPreset = (typeof V2_SHOT_PRESETS)[number];
48
+
49
+ export const V2_VIBES = ['excited', 'calm', 'sassy', 'serious', 'curious'] as const;
50
+ export type V2Vibe = (typeof V2_VIBES)[number];
51
+
52
+ export const V2_DURATIONS = [5, 8, 12, 15] as const;
53
+ export type V2Duration = (typeof V2_DURATIONS)[number];
54
+
55
+ // ── Input schema ──────────────────────────────────────────────────────────
56
+ export const SelfieSchema = z
57
+ .object({
58
+ // Character — one path or the other
59
+ character_id: z
60
+ .string()
61
+ .regex(/^char_[A-Za-z0-9]{10,}$/, 'character_id must look like char_XXXXXXXXXX')
62
+ .optional(),
63
+ photo_url: z.string().url().optional(),
64
+ description: z.string().min(8).max(400).optional(),
65
+
66
+ // The line being said
67
+ script: z.string().min(4).max(600),
68
+
69
+ // Composition
70
+ preset: z.enum(V2_SHOT_PRESETS).default('bedroom-morning-ritual'),
71
+ vibe: z.enum(V2_VIBES).default('excited'),
72
+ duration: z
73
+ .union([z.literal(5), z.literal(8), z.literal(12), z.literal(15)])
74
+ .default(8),
75
+
76
+ // Voice direction (one line, natural language). Pulled from the
77
+ // character record when character_id is used; user can still
78
+ // override per-job.
79
+ voice_brief: z.string().min(4).max(240).optional(),
80
+
81
+ // Subtitles
82
+ subtitles: z.boolean().default(true),
83
+ })
84
+ .superRefine((val, ctx) => {
85
+ const hasSavedCharacter = !!val.character_id;
86
+ const hasInlineCharacter = !!val.photo_url && !!val.description;
87
+ if (!hasSavedCharacter && !hasInlineCharacter) {
88
+ ctx.addIssue({
89
+ code: z.ZodIssueCode.custom,
90
+ message:
91
+ 'Provide either character_id, OR both photo_url and description.',
92
+ });
93
+ }
94
+ if (hasSavedCharacter && (val.photo_url || val.description)) {
95
+ ctx.addIssue({
96
+ code: z.ZodIssueCode.custom,
97
+ message:
98
+ 'Use character_id OR (photo_url + description) — not both.',
99
+ });
100
+ }
101
+ });
102
+
103
+ export type SelfieInput = z.infer<typeof SelfieSchema>;
@@ -0,0 +1,62 @@
1
+ // Copyright 2026 agent-media contributors. Apache-2.0 license.
2
+
3
+ /**
4
+ * v2 · Subtitle input schema.
5
+ *
6
+ * Burns styled subtitles onto an existing video. Takes a public video
7
+ * URL, transcribes via Whisper (or accepts a caller-supplied transcript
8
+ * to skip transcription), generates an ASS subtitle file in the chosen
9
+ * style, and burns it into a new mp4 via ffmpeg.
10
+ *
11
+ * Output: a new mp4 URL on R2.
12
+ */
13
+
14
+ import { z } from 'zod';
15
+
16
+ // The 17 styles the ASS generator already supports (mirrors the legacy
17
+ // SUBTITLE_STYLES list in packages/schema/src/video.ts).
18
+ export const V2_SUBTITLE_STYLES = [
19
+ 'hormozi',
20
+ 'minimal',
21
+ 'bold',
22
+ 'karaoke',
23
+ 'clean',
24
+ 'tiktok',
25
+ 'neon',
26
+ 'fire',
27
+ 'glow',
28
+ 'pop',
29
+ 'aesthetic',
30
+ 'impact',
31
+ 'pastel',
32
+ 'electric',
33
+ 'boxed',
34
+ 'gradient',
35
+ 'spotlight',
36
+ ] as const;
37
+ export type V2SubtitleStyle = (typeof V2_SUBTITLE_STYLES)[number];
38
+
39
+ export const SubtitleSchema = z.object({
40
+ // The video to subtitle. Must be a publicly-fetchable URL — R2, S3,
41
+ // any CDN. We download, transcribe, burn, re-host.
42
+ video_url: z.string().url(),
43
+
44
+ // Visual style. Defaults to hormozi because that's what most users
45
+ // want for short-form vertical content.
46
+ style: z.enum(V2_SUBTITLE_STYLES).default('hormozi'),
47
+
48
+ // Optional override. When set, we skip Whisper and use this text as
49
+ // the transcript. Useful when the caller already has the script
50
+ // (e.g. they just generated the video from a known script).
51
+ transcript: z.string().min(1).max(5000).optional(),
52
+
53
+ // Spoken language hint for Whisper. ISO 639-1 (`en`, `es`, `pt`,
54
+ // …) or null to let Whisper detect. Most callers pass null.
55
+ language: z
56
+ .string()
57
+ .length(2)
58
+ .regex(/^[a-z]{2}$/, 'language must be a lowercase ISO 639-1 code')
59
+ .optional(),
60
+ });
61
+
62
+ export type SubtitleInput = z.infer<typeof SubtitleSchema>;