npm - @agentmedia/schema - Versions diffs - 0.3.0 → 0.5.0 - Mend

@agentmedia/schema 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

package/.memory/cursor.json +3 -0
package/.memory/memories.json +1 -0
package/.memory/project.json +5 -0
package/CLAUDE.md +7 -0
package/dist/__tests__/character-pipeline.test.d.ts +2 -0
package/dist/__tests__/character-pipeline.test.d.ts.map +1 -0
package/dist/__tests__/character-pipeline.test.js +296 -0
package/dist/__tests__/character-pipeline.test.js.map +1 -0
package/dist/__tests__/text-to-video.test.d.ts +2 -0
package/dist/__tests__/text-to-video.test.d.ts.map +1 -0
package/dist/__tests__/text-to-video.test.js +67 -0
package/dist/__tests__/text-to-video.test.js.map +1 -0
package/dist/generators.d.ts +67 -0
package/dist/generators.d.ts.map +1 -1
package/dist/generators.js +11 -1
package/dist/generators.js.map +1 -1
package/dist/v2/character.d.ts +32 -0
package/dist/v2/character.d.ts.map +1 -0
package/dist/v2/character.js +31 -0
package/dist/v2/character.js.map +1 -0
package/dist/v2/generators.d.ts +69 -0
package/dist/v2/generators.d.ts.map +1 -0
package/dist/v2/generators.js +105 -0
package/dist/v2/generators.js.map +1 -0
package/dist/v2/index.d.ts +13 -0
package/dist/v2/index.d.ts.map +1 -0
package/dist/v2/index.js +14 -0
package/dist/v2/index.js.map +1 -0
package/dist/v2/selfie.d.ts +78 -0
package/dist/v2/selfie.d.ts.map +1 -0
package/dist/v2/selfie.js +98 -0
package/dist/v2/selfie.js.map +1 -0
package/dist/v2/subtitle.d.ts +31 -0
package/dist/v2/subtitle.d.ts.map +1 -0
package/dist/v2/subtitle.js +53 -0
package/dist/v2/subtitle.js.map +1 -0
package/dist/video.d.ts +171 -0
package/dist/video.d.ts.map +1 -1
package/dist/video.js +89 -0
package/dist/video.js.map +1 -1
package/package.json +6 -1
package/scripts/generate-v2-docs.ts +548 -0
package/src/__tests__/character-pipeline.test.ts +356 -0
package/src/__tests__/text-to-video.test.ts +79 -0
package/src/generators.ts +12 -0
package/src/v2/character.ts +41 -0
package/src/v2/generators.ts +186 -0
package/src/v2/index.ts +15 -0
package/src/v2/selfie.ts +115 -0
package/src/v2/subtitle.ts +62 -0
package/src/video.ts +164 -0
package/.turbo/turbo-build.log +0 -4
package/.turbo/turbo-test.log +0 -14
package/.turbo/turbo-typecheck.log +0 -4

package/src/v2/index.ts ADDED Viewed

@@ -0,0 +1,15 @@
+// Copyright 2026 agent-media contributors. Apache-2.0 license.
+/**
+ * @agent-media/schema/v2 — v2 product line surface.
+ *
+ * Old code keeps importing from `@agent-media/schema`. New code
+ * (sdk-ts/v2, sdk-python.v2, MCP loop, CLI v2 commands, api-v2
+ * /v2/* routes, new dashboard, new docs, new SKILL.md) imports
+ * exclusively from `@agent-media/schema/v2`.
+ */
+export * from './selfie.js';
+export * from './character.js';
+export * from './subtitle.js';
+export * from './generators.js';

package/src/v2/selfie.ts ADDED Viewed

@@ -0,0 +1,115 @@
+// Copyright 2026 agent-media contributors. Apache-2.0 license.
+/**
+ * v2 · Selfie input schema.
+ *
+ * The v1 Selfie product. Generates a 9:16 vertical TikTok-style clip
+ * of an AI person talking to camera in a chosen "shot grammar" preset.
+ *
+ * Validated end-to-end by the 4-stage pipeline (gpt-image-2 portrait
+ * → sheet → wireframe → Seedance 2.0 ref-to-video), see
+ * services/media-worker-v2/src/v2/selfie-pipeline.js.
+ *
+ * Two character paths:
+ *   - Bring-your-own: pass `photo_url` + `description` (we synthesize
+ *     a portrait + sheet on the fly, throwaway).
+ *   - Saved character: pass `character_id` (we load the persisted
+ *     portrait + sheet + pinned seed + voice brief from the DB).
+ *
+ * One of `photo_url + description` OR `character_id` is required.
+ */
+import { z } from 'zod';
+// ── Shot-grammar presets (locked v1 list) ────────────────────────────────
+export const V2_SHOT_PRESETS = [
+  'bedroom-morning-ritual',
+  'getting-ready-mirror-edge',
+  'bathroom-skincare-routine',
+  'bedside-lamp-evening',
+  'kitchen-glow-up',
+  'backyard-morning-coffee',
+  'picnic-blanket-outdoor',
+  'car-quick-honest-review',
+  'car-passenger-honest',
+  'outdoor-walking-talking',
+  'couch-haul-show-off',
+  'closet-fit-check',
+  'studio-apartment-tour',
+  'balcony-evening-vibes',
+  'desk-wfh-quick-pitch',
+  'cafe-window-seat',
+  'office-bathroom-discreet',
+  'gym-post-workout',
+  'salon-mirror-result',
+  'travel-hotel-room-review',
+] as const;
+export type V2ShotPreset = (typeof V2_SHOT_PRESETS)[number];
+export const V2_VIBES = ['excited', 'calm', 'sassy', 'serious', 'curious'] as const;
+export type V2Vibe = (typeof V2_VIBES)[number];
+export const V2_DURATIONS = [5, 10, 15] as const;
+export type V2Duration = (typeof V2_DURATIONS)[number];
+// ── Input schema ──────────────────────────────────────────────────────────
+export const SelfieSchema = z
+  .object({
+    // Character — one path or the other
+    character_id: z
+      .string()
+      .regex(/^char_[A-Za-z0-9]{10,}$/, 'character_id must look like char_XXXXXXXXXX')
+      .optional(),
+    photo_url: z.string().url().optional(),
+    description: z.string().min(8).max(400).optional(),
+    // The line being said
+    script: z.string().min(4).max(600),
+    // Composition
+    preset: z.enum(V2_SHOT_PRESETS).default('bedroom-morning-ritual'),
+    vibe: z.enum(V2_VIBES).default('excited'),
+    duration: z
+      .union([z.literal(5), z.literal(10), z.literal(15)])
+      .default(10),
+    // Voice direction (one line, natural language). Pulled from the
+    // character record when character_id is used; user can still
+    // override per-job.
+    voice_brief: z.string().min(4).max(240).optional(),
+    // Subtitles
+    subtitles: z.boolean().default(true),
+  })
+  .superRefine((val, ctx) => {
+    const hasSavedCharacter = !!val.character_id;
+    const hasDescription = !!val.description;
+    // Three valid input paths:
+    //   1. character_id alone (reuse saved character)
+    //   2. description alone (agent-media generates the portrait from text)
+    //   3. photo_url + description (use the user's photo as reference)
+    // Anything else is rejected.
+    if (!hasSavedCharacter && !hasDescription) {
+      ctx.addIssue({
+        code: z.ZodIssueCode.custom,
+        message:
+          'Provide either character_id, OR description (with optional photo_url for a real reference person).',
+      });
+    }
+    if (hasSavedCharacter && (val.photo_url || val.description)) {
+      ctx.addIssue({
+        code: z.ZodIssueCode.custom,
+        message:
+          'Use character_id OR description (+ optional photo_url) — not both.',
+      });
+    }
+    if (val.photo_url && !val.description) {
+      ctx.addIssue({
+        code: z.ZodIssueCode.custom,
+        message:
+          'photo_url requires a description so we know what to emphasize.',
+      });
+    }
+  });
+export type SelfieInput = z.infer<typeof SelfieSchema>;

package/src/v2/subtitle.ts ADDED Viewed

@@ -0,0 +1,62 @@
+// Copyright 2026 agent-media contributors. Apache-2.0 license.
+/**
+ * v2 · Subtitle input schema.
+ *
+ * Burns styled subtitles onto an existing video. Takes a public video
+ * URL, transcribes via Whisper (or accepts a caller-supplied transcript
+ * to skip transcription), generates an ASS subtitle file in the chosen
+ * style, and burns it into a new mp4 via ffmpeg.
+ *
+ * Output: a new mp4 URL on R2.
+ */
+import { z } from 'zod';
+// The 17 styles the ASS generator already supports (mirrors the legacy
+// SUBTITLE_STYLES list in packages/schema/src/video.ts).
+export const V2_SUBTITLE_STYLES = [
+  'hormozi',
+  'minimal',
+  'bold',
+  'karaoke',
+  'clean',
+  'tiktok',
+  'neon',
+  'fire',
+  'glow',
+  'pop',
+  'aesthetic',
+  'impact',
+  'pastel',
+  'electric',
+  'boxed',
+  'gradient',
+  'spotlight',
+] as const;
+export type V2SubtitleStyle = (typeof V2_SUBTITLE_STYLES)[number];
+export const SubtitleSchema = z.object({
+  // The video to subtitle. Must be a publicly-fetchable URL — R2, S3,
+  // any CDN. We download, transcribe, burn, re-host.
+  video_url: z.string().url(),
+  // Visual style. Defaults to hormozi because that's what most users
+  // want for short-form vertical content.
+  style: z.enum(V2_SUBTITLE_STYLES).default('hormozi'),
+  // Optional override. When set, we skip Whisper and use this text as
+  // the transcript. Useful when the caller already has the script
+  // (e.g. they just generated the video from a known script).
+  transcript: z.string().min(1).max(5000).optional(),
+  // Spoken language hint for Whisper. ISO 639-1 (`en`, `es`, `pt`,
+  // …) or null to let Whisper detect. Most callers pass null.
+  language: z
+    .string()
+    .length(2)
+    .regex(/^[a-z]{2}$/, 'language must be a lowercase ISO 639-1 code')
+    .optional(),
+});
+export type SubtitleInput = z.infer<typeof SubtitleSchema>;

package/src/video.ts CHANGED Viewed

@@ -417,3 +417,167 @@ export const ProductActingSchema = z.object({
 });
 export type ProductActingInput = z.infer<typeof ProductActingSchema>;
+// ── Character Video ────────────────────────────────────────────────────────
+//
+// 3-step pipeline:
+//   Step 1: Character reference sheet (PNG)
+//             — POST /v1/character/sheet-generate
+//             — actor_slug returns portrait_url (free), description calls
+//               gpt-image-2 (~$0.04)
+//   Step 2: Storyboard sheet (PNG)
+//             — POST /v1/character/storyboard-generate
+//             — gpt-image-2 paints numbered panels using the character
+//               sheet as a reference so panels stay on-character (~$0.04)
+//   Step 3: Final video (this generator)
+//             — Seedance 2.0 multimodal: image_urls = [character, storyboard]
+//               + short action_prompt + duration/ratio settings
+//
+// All three URLs must be public HTTPS so Seedance can fetch them.
+export const CHARACTER_VIDEO_DURATIONS = [5, 10] as const;
+export const CHARACTER_VIDEO_RATIOS = ['9:16', '16:9', '1:1'] as const;
+export const CharacterVideoSchema = z.object({
+  character_sheet_url: z.string().url().describe(
+    'Public HTTPS URL of the character reference sheet PNG. Generate one via POST /v1/character/sheet-generate or upload your own.',
+  ),
+  storyboard_url: z.string().url().describe(
+    'Public HTTPS URL of the storyboard panels PNG. Generate one via POST /v1/character/storyboard-generate or upload your own.',
+  ),
+  action_prompt: z.string().min(1).max(2000).optional().describe(
+    'Optional scene/action description sent to Seedance as the primary scene driver (e.g. "Marco the chef in his Brooklyn kitchen at golden hour, takes a bite of fresh bread"). When omitted, the api-v2 server backstops it from the same session\'s storyboard job (script or beats) — without a real scene description Seedance picks the location arbitrarily.',
+  ),
+  duration: z.number().refine(
+    (v) => (CHARACTER_VIDEO_DURATIONS as readonly number[]).includes(v),
+    { message: 'duration must be 5 or 10' },
+  ).optional().default(10).describe('Video duration in seconds. Valid values: 5 or 10. Max 10s — Seedance i2v quality degrades past 10s with multimodal inputs.'),
+  aspect_ratio: z.enum(CHARACTER_VIDEO_RATIOS).optional().default('9:16').describe('Output aspect ratio. Default 9:16.'),
+  generate_audio: z.boolean().optional().default(true).describe('Whether Seedance synthesizes synchronized audio (ambient sounds, breath, etc.).'),
+  session_id: z.string().uuid().optional().describe('Optional UUID linking the three pipeline steps (sheet → storyboard → video) into one wizard session. Same value across calls makes inserts idempotent and lets api-v2 backstop action_prompt from the storyboard step.'),
+  webhook_url: z.string().url().optional().describe('HTTPS URL to receive a callback when the job completes or fails.'),
+});
+export type CharacterVideoInput = z.infer<typeof CharacterVideoSchema>;
+// ── Text-to-Video (pure prompt, no character / no storyboard) ──────────────
+//
+// Drives Seedance 2.0 text-to-video. The prompt IS the whole creative —
+// style, subject, mood, composition all baked into one string. Used by
+// the "Use prompts" batch-schedule mode and as a one-shot generator
+// for Claude Code / MCP / CLI workflows where the agent already has a
+// fully-formed video prompt.
+export const TEXT_TO_VIDEO_DURATIONS = [5, 10, 15] as const;
+export const TEXT_TO_VIDEO_RATIOS = ['9:16', '16:9', '1:1'] as const;
+export const TextToVideoSchema = z.object({
+  prompt: z.string().min(20).max(1000).describe(
+    'The full video prompt. Style, subject, mood, composition, lighting, lens, motion — all in one string. Seedance reads this verbatim. Min 20 / max 1000 chars.',
+  ),
+  duration: z.number().refine(
+    (v) => (TEXT_TO_VIDEO_DURATIONS as readonly number[]).includes(v),
+    { message: 'duration must be 5, 10, or 15' },
+  ).optional().default(10).describe('Video duration in seconds. Valid: 5, 10, or 15.'),
+  aspect_ratio: z.enum(TEXT_TO_VIDEO_RATIOS).optional().default('9:16').describe(
+    'Output aspect ratio. 9:16 portrait (TikTok / Reels / Shorts), 1:1 square (feed posts), 16:9 landscape (YouTube / X / LinkedIn). Default 9:16.',
+  ),
+  generate_audio: z.boolean().optional().default(true).describe(
+    'Whether Seedance synthesizes synchronized audio. No extra charge.',
+  ),
+  webhook_url: z.string().url().optional().describe(
+    'HTTPS URL to receive a callback when the job completes or fails.',
+  ),
+  /** When set, on completion the webhook-provider fans the rendered MP4
+   *  out to these Postiz integrations (X, LinkedIn, etc.). Use the IDs
+   *  returned from GET /v1/integrations/postiz/accounts. */
+  postiz_integration_ids: z.array(z.string()).optional().describe(
+    'Postiz integration IDs to auto-publish to once the video is rendered. Get them from GET /v1/integrations/postiz/accounts.',
+  ),
+  /** How to compose the social caption. 'static' = use `caption` verbatim. 'ai' = Claude writes one using `caption_guidance` as bias. */
+  caption_mode: z.enum(['ai', 'static']).optional().default('static').describe(
+    "Caption mode for auto-publish. 'static' (default) uses the literal `caption` string. 'ai' has Claude Opus generate one each time, biased by `caption_guidance`.",
+  ),
+  caption: z.string().min(1).max(2000).optional().describe(
+    'Literal caption text for the social post. Only used when caption_mode is omitted or "static".',
+  ),
+  caption_guidance: z.string().min(1).max(1000).optional().describe(
+    'Tone / hashtag / length hints for the AI caption writer. Only used when caption_mode is "ai".',
+  ),
+});
+export type TextToVideoInput = z.infer<typeof TextToVideoSchema>;
+// ── Step 1 of 3: Character Sheet generation ─────────────────────────────────
+//
+// POST /v1/character/sheet-generate. Async — returns 202 + job_id.
+// Pricing: 12 credits ($0.12) for actor/reference (1 gpt-image-2 call),
+//          20 credits ($0.20) for description-only (paint portrait + sheet).
+export const CharacterSheetSchema = z.object({
+  description: z.string().min(3).max(400).optional().describe(
+    'Free-text description of the character (e.g. "Marco, a 35yo Italian chef with curly black hair, white uniform"). When provided alongside a reference image, it adds context. When provided alone (no actor_slug, no reference_image_url), the worker first paints a portrait via gpt-image-2 text-to-image, then turns it into the sheet.',
+  ),
+  actor_slug: z.string().min(1).optional().describe(
+    'Slug of an actor from the agent-media library. The actor\'s portrait is used as the reference image. List actors via GET /v1/actors. Mutually exclusive with reference_image_url.',
+  ),
+  reference_image_url: z.string().url().optional().describe(
+    'Public HTTPS URL of a portrait/reference image (PNG/JPEG/WebP, <5MB recommended). Mutually exclusive with actor_slug.',
+  ),
+  session_id: z.string().uuid().optional().describe('Optional UUID linking the three pipeline steps. Same value across calls makes inserts idempotent.'),
+}).refine(
+  (v) => Boolean(v.description) || Boolean(v.actor_slug) || Boolean(v.reference_image_url),
+  { message: 'Provide at least one of: description, actor_slug, or reference_image_url' },
+).refine(
+  (v) => !(v.actor_slug && v.reference_image_url),
+  { message: 'actor_slug and reference_image_url are mutually exclusive' },
+);
+export type CharacterSheetInput = z.infer<typeof CharacterSheetSchema>;
+// ── Step 2 of 3: Storyboard generation ──────────────────────────────────────
+//
+// POST /v1/character/storyboard-generate. Async — returns 202 + job_id.
+// Pricing: 12 credits ($0.12).
+// Provide EITHER `beats` (3-10 short strings, gpt-image-2 paints them as
+// numbered panels) OR `script` (free-text up to 1500 chars; gpt-image-2
+// splits it into 4-6 panels itself).
+export const STORYBOARD_RATIOS = ['9:16', '16:9', '1:1'] as const;
+export const CharacterStoryboardSchema = z.object({
+  character_sheet_url: z.string().url().describe(
+    'Public HTTPS URL of the character sheet PNG (from /v1/character/sheet-generate). Used as the visual reference so the character stays on-model across panels.',
+  ),
+  beats: z.array(z.string().min(3).max(200)).min(3).max(10).optional().describe(
+    'Ordered list of 3-10 short beat descriptions, one per panel (e.g. ["Marco walks into kitchen", "pulls bread from oven", "takes a bite", "thumbs up"]). Mutually exclusive with `script`.',
+  ),
+  script: z.string().min(10).max(1500).optional().describe(
+    'Free-text script (10-1500 chars). gpt-image-2 splits it into 4-6 sequential panels itself. Mutually exclusive with `beats`.',
+  ),
+  ratio: z.enum(STORYBOARD_RATIOS).optional().default('9:16').describe('Storyboard sheet aspect ratio. Default 9:16.'),
+  session_id: z.string().uuid().optional().describe('Optional UUID linking the three pipeline steps.'),
+}).refine(
+  (v) => Boolean(v.beats) !== Boolean(v.script),
+  { message: 'Provide exactly one of: beats or script' },
+);
+export type CharacterStoryboardInput = z.infer<typeof CharacterStoryboardSchema>;
+// ── Helper: AI-suggested beats ─────────────────────────────────────────────
+//
+// POST /v1/character/storyboard-suggest. Sync — returns immediately.
+// No credits. Returns 3 distinct beat-sequence options for the wizard.
+export const StoryboardSuggestSchema = z.object({
+  actor_slug: z.string().min(1).optional().describe('Library actor whose persona drives the suggestions. Mutually exclusive with character_description.'),
+  character_description: z.string().min(1).max(200).optional().describe('1-200 char description of the character. Mutually exclusive with actor_slug.'),
+  vibe: z.string().min(1).max(200).optional().describe('Optional vibe note that biases all 3 options (e.g. "wholesome", "chaotic", "cinematic").'),
+  duration: z.number().refine((v) => v === 5 || v === 10, { message: 'duration must be 5 or 10' }).optional().default(10),
+  n_panels: z.number().int().min(4).max(10).optional().default(6).describe('Panels per option (the storyboard sheet will be an n_panels-panel grid).'),
+}).refine(
+  (v) => Boolean(v.actor_slug) !== Boolean(v.character_description),
+  { message: 'Provide exactly one of: actor_slug or character_description' },
+);
+export type StoryboardSuggestInput = z.infer<typeof StoryboardSuggestSchema>;

package/.turbo/turbo-build.log DELETED Viewed

@@ -1,4 +0,0 @@
-> @agentmedia/schema@0.2.2 build /Users/suede/.codex/worktrees/b777/videoagent/packages/schema
-> tsc

package/.turbo/turbo-test.log DELETED Viewed

@@ -1,14 +0,0 @@
-> @agentmedia/schema@0.2.2 test /Users/suede/.codex/worktrees/b777/videoagent/packages/schema
-> vitest run
- RUN  v3.2.4 /Users/suede/.codex/worktrees/b777/videoagent/packages/schema
- ✓ src/__tests__/parity.test.ts (87 tests) 32ms
- Test Files  1 passed (1)
-      Tests  87 passed (87)
-   Start at  20:52:45
-   Duration  1.53s (transform 212ms, setup 0ms, collect 284ms, tests 32ms, environment 0ms, prepare 297ms)

package/.turbo/turbo-typecheck.log DELETED Viewed

@@ -1,4 +0,0 @@
-> @agentmedia/schema@0.2.2 typecheck /Users/suede/.codex/worktrees/b777/videoagent/packages/schema
-> tsc --noEmit