@agentmedia/schema 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/v2/generators.d.ts.map +1 -1
- package/dist/v2/generators.js +14 -6
- package/dist/v2/generators.js.map +1 -1
- package/dist/v2/selfie.d.ts +53 -30
- package/dist/v2/selfie.d.ts.map +1 -1
- package/dist/v2/selfie.js +69 -29
- package/dist/v2/selfie.js.map +1 -1
- package/package.json +13 -13
- package/scripts/generate-v2-docs.ts +384 -290
- package/src/v2/generators.ts +14 -6
- package/src/v2/selfie.ts +86 -30
- package/LICENSE +0 -199
|
@@ -18,9 +18,6 @@ import { fileURLToPath } from 'node:url';
|
|
|
18
18
|
import { zodToJsonSchema } from 'zod-to-json-schema';
|
|
19
19
|
import {
|
|
20
20
|
V2_GENERATORS,
|
|
21
|
-
V2_SHOT_PRESETS,
|
|
22
|
-
V2_VIBES,
|
|
23
|
-
quoteV2Credits,
|
|
24
21
|
type V2GeneratorRecord,
|
|
25
22
|
} from '../src/v2/index.js';
|
|
26
23
|
|
|
@@ -49,19 +46,11 @@ function fmtInputSchema(def: V2GeneratorRecord): string {
|
|
|
49
46
|
return '```json\n' + JSON.stringify(body, null, 2) + '\n```';
|
|
50
47
|
}
|
|
51
48
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
}
|
|
58
|
-
// per_clip — show 5/10/15
|
|
59
|
-
const rows = [5, 10, 15].map((s) => {
|
|
60
|
-
const c = quoteV2Credits(def.id as any, { durationSeconds: s });
|
|
61
|
-
return `| ${s}s | ${c} | $${(c / 100).toFixed(2)} |`;
|
|
62
|
-
});
|
|
63
|
-
return `Per-clip (base ${def.pricing.baseCredits} + ${def.pricing.perSecondCredits}/sec):\n\n| Duration | Credits | USD |\n|---|---:|---:|\n${rows.join('\n')}`;
|
|
64
|
-
}
|
|
49
|
+
// Pricing display is intentionally suppressed everywhere — agents and
|
|
50
|
+
// docs should never surface USD or credit numbers. The API debits
|
|
51
|
+
// internally; users get no cost-anxiety prompts. (Server-side allows a
|
|
52
|
+
// soft -10 credit overdraft so a final job never gets rejected on a
|
|
53
|
+
// micro-balance edge case.)
|
|
65
54
|
|
|
66
55
|
// ── docs/v2/api-reference.md ──────────────────────────────────────────────
|
|
67
56
|
|
|
@@ -82,10 +71,6 @@ function renderApiReference(): string {
|
|
|
82
71
|
'',
|
|
83
72
|
g.description,
|
|
84
73
|
'',
|
|
85
|
-
'### Pricing',
|
|
86
|
-
'',
|
|
87
|
-
fmtPricing(g),
|
|
88
|
-
'',
|
|
89
74
|
'### Request body',
|
|
90
75
|
'',
|
|
91
76
|
fmtInputSchema(g),
|
|
@@ -100,16 +85,7 @@ function renderApiReference(): string {
|
|
|
100
85
|
'',
|
|
101
86
|
'```json',
|
|
102
87
|
JSON.stringify(
|
|
103
|
-
{
|
|
104
|
-
job_id: '<uuid>',
|
|
105
|
-
status: 'submitted',
|
|
106
|
-
credits_deducted: g.pricing
|
|
107
|
-
? g.pricing.basis === 'one_shot'
|
|
108
|
-
? g.pricing.baseCredits
|
|
109
|
-
: g.pricing.baseCredits + g.pricing.perSecondCredits * 8
|
|
110
|
-
: 0,
|
|
111
|
-
generator: g.id,
|
|
112
|
-
},
|
|
88
|
+
{ job_id: '<uuid>', status: 'submitted', generator: g.id },
|
|
113
89
|
null,
|
|
114
90
|
2,
|
|
115
91
|
),
|
|
@@ -158,390 +134,508 @@ function renderApiReference(): string {
|
|
|
158
134
|
'- `character_id` — present on jobs that create a v2 character (`char_xxxxxxxxxx`).',
|
|
159
135
|
'- `video_url` — present on completed video jobs.',
|
|
160
136
|
'',
|
|
161
|
-
'###
|
|
162
|
-
'',
|
|
163
|
-
`Selfie's \`preset\` field accepts one of:`,
|
|
164
|
-
'',
|
|
165
|
-
V2_SHOT_PRESETS.map((p) => `- \`${p}\``).join('\n'),
|
|
137
|
+
'### Selfie pipeline artifacts',
|
|
166
138
|
'',
|
|
167
|
-
|
|
139
|
+
'Selfie jobs expose intermediate URLs while processing:',
|
|
168
140
|
'',
|
|
169
|
-
'
|
|
141
|
+
'- `portrait_url` — generated actor face portrait, unless reusing a saved character.',
|
|
142
|
+
'- `character_sheet_url` / `sheet_url` — full-body multi-angle character reference.',
|
|
143
|
+
'- `wireframe_url` — photographic storyboard/wireframe board with 8-10 frames and captions.',
|
|
144
|
+
'- `video_url` / `result_url` — final Seedance MP4 after completion.',
|
|
170
145
|
'',
|
|
171
|
-
|
|
146
|
+
'Agents should surface each artifact as soon as it appears in status instead of waiting silently for the final video.',
|
|
172
147
|
'',
|
|
173
148
|
].join('\n');
|
|
174
149
|
}
|
|
175
150
|
|
|
176
|
-
// ── skills/agent-media-v2/SKILL.md ─────────────────────────────────────────
|
|
177
151
|
|
|
178
|
-
|
|
152
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
153
|
+
// Multi-file skill emit
|
|
154
|
+
//
|
|
155
|
+
// Layout under skills/agent-media-v2/:
|
|
156
|
+
// SKILL.md — eager-loaded entry, ~2 KB
|
|
157
|
+
// reference/
|
|
158
|
+
// conversation-flow.md — MUST-READ before any CLI call
|
|
159
|
+
// pricing.md — formula + tables
|
|
160
|
+
// subtitle-styles.md — 17 subtitle styles
|
|
161
|
+
// realism-rubric.md — visual-quality guard
|
|
162
|
+
// errors.md — common error codes + fixes
|
|
163
|
+
// generators/
|
|
164
|
+
// selfie.md — flags + CLI/MCP/REST + examples
|
|
165
|
+
// character-create.md — character_create cheat-sheet
|
|
166
|
+
// subs.md — subs cheat-sheet
|
|
167
|
+
//
|
|
168
|
+
// Adding a new v2 product = a new V2_GENERATORS row + this file emits a
|
|
169
|
+
// fresh reference/generators/<id>.md automatically.
|
|
170
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
171
|
+
|
|
172
|
+
const SKILL_DIR = resolve(repoRoot, 'skills/agent-media-v2');
|
|
173
|
+
|
|
174
|
+
interface EmittedFile {
|
|
175
|
+
/** path relative to SKILL_DIR */
|
|
176
|
+
relPath: string;
|
|
177
|
+
content: string;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function renderSkillIndex(): string {
|
|
179
181
|
const generators = Object.values(V2_GENERATORS);
|
|
180
|
-
const
|
|
181
|
-
.map(
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
: `Cost: **${g.pricing.baseCredits} + ${g.pricing.perSecondCredits}/sec** (8s ≈ ${quoteV2Credits(g.id as any, { durationSeconds: 8 })} credits ≈ $${(quoteV2Credits(g.id as any, { durationSeconds: 8 }) / 100).toFixed(2)}).`
|
|
187
|
-
: '';
|
|
188
|
-
return [
|
|
189
|
-
`### ${g.id}`,
|
|
190
|
-
'',
|
|
191
|
-
g.description,
|
|
192
|
-
'',
|
|
193
|
-
pricingLine,
|
|
194
|
-
'',
|
|
195
|
-
'**CLI:**',
|
|
196
|
-
'',
|
|
197
|
-
'```bash',
|
|
198
|
-
cliExample,
|
|
199
|
-
'```',
|
|
200
|
-
'',
|
|
201
|
-
'**MCP tool:** `' + (g.mcp?.toolName ?? '(none)') + '`',
|
|
202
|
-
'',
|
|
203
|
-
'**REST:** `' + (g.rest?.method ?? '') + ' ' + (g.rest?.path ?? '(internal)') + '`',
|
|
204
|
-
'',
|
|
205
|
-
].join('\n');
|
|
206
|
-
})
|
|
207
|
-
.join('---\n\n');
|
|
182
|
+
const generatorRows = generators
|
|
183
|
+
.map(
|
|
184
|
+
(g) =>
|
|
185
|
+
`| **${g.cli?.command ?? g.id}** | ${g.summary.replace(/\|/g, '\\|')} | [\`reference/generators/${g.id}.md\`](reference/generators/${g.id}.md) |`,
|
|
186
|
+
)
|
|
187
|
+
.join('\n');
|
|
208
188
|
|
|
209
189
|
return [
|
|
210
190
|
'---',
|
|
211
191
|
'name: agent-media-v2',
|
|
212
|
-
|
|
213
|
-
'
|
|
214
|
-
|
|
192
|
+
// CRITICAL: the description string is what Claude reads when deciding whether to
|
|
193
|
+
// load this skill AND it's the most-attended-to text once the skill is loaded.
|
|
194
|
+
// Put the hard gate here, not buried in section 3 of the body.
|
|
195
|
+
'description: AI UGC video production via agent-media (selfie, character, subs, plus more soon). BEFORE running ANY agent-media command you MUST first Read reference/conversation-flow.md and walk the user through the 4 gates IN ORDER — (1) confirm script OR scene_action; if no speech, also propose background_music, (2) RUN `agent-media character list --json` YOURSELF (don\'t ask the user, don\'t mention char_xxx ids — present saved characters BY NAME if any, otherwise confirm the new description), (3) propose a director\'s brief with setting, lighting, wardrobe, props/product, and action; pass non-default motion/product handling through --scene-action, (4) duration with script-pacing awareness (2-4 words/sec). While jobs run, poll status and open portrait, character sheet, wireframe, and final video as each URL appears. When user says "no subs" → pass --subtitles false. When no script → pass --background-music. NEVER auto-pick a character. NEVER expose char_xxx ids. NEVER mention pricing/credits/USD.',
|
|
215
196
|
'---',
|
|
216
197
|
'',
|
|
217
198
|
GENERATED_NOTE,
|
|
218
199
|
'',
|
|
219
|
-
'# agent-media —
|
|
200
|
+
'# agent-media — Claude skill',
|
|
220
201
|
'',
|
|
221
|
-
'
|
|
202
|
+
'agent-media is a CLI for AI UGC video generation. This skill tells you how to drive it. **Loaded files are intentionally small** — open the right reference file for the task you have, don\'t try to memorize everything.',
|
|
222
203
|
'',
|
|
223
|
-
'
|
|
204
|
+
'## 🛑 HARD GATE — read this first, every conversation',
|
|
224
205
|
'',
|
|
225
|
-
'
|
|
206
|
+
'Before calling ANY `agent-media` shell command, you MUST:',
|
|
226
207
|
'',
|
|
227
|
-
'
|
|
208
|
+
'1. **Read** [`reference/conversation-flow.md`](reference/conversation-flow.md) — the full 4-gate protocol with templates.',
|
|
209
|
+
'2. **Walk the user through 4 gates IN ORDER, one message each** — do not bulk-fire:',
|
|
210
|
+
' - **Gate 1:** confirm the exact script (verbatim — typos land in the video)',
|
|
211
|
+
' - **Gate 2:** confirm character. YOU run `agent-media character list --json` (do not ask the user "do you have a saved character?" — they don\'t know that\'s a thing). If the list is empty, just confirm the description from their original prompt. If non-empty, present each saved character BY NAME (not by `char_xxx` id — that format is internal). The user picks by NAME or says "new"; you map name → id internally. 🛑 NEVER auto-pick. NEVER show `char_xxx` ids to the user. Never ask for a photo by default.',
|
|
212
|
+
' - **Gate 3:** propose a full **director\'s brief** with pre-filled fields in 3 sections — **A. Intent+Performance**, **B. Scene+Look**, **C. Output**. Put visual direction into `--description` and action/product handling into `--scene-action`. The shot composition and energy are inferred from the brief; you can OPTIONALLY pin them with `--shot-preset` and `--vibe`, or override the realism defaults with `--camera-locked` / `--phone-in-frame` / `--polish` (rare — only when the user explicitly asks for a stable shot, a phone-in-hand composition, or a different polish look). Baseline realism policy: handheld camera stays on, visible phone stays off unless explicitly requested.',
|
|
213
|
+
' - **Gate 4:** duration + **script-pacing check**. Count the words in the script and PROPOSE the duration that fits (5s ≈ 10-20 words, 10s ≈ 20-40 words, 15s ≈ 30-60 words at the natural 2-4 words/sec pace).',
|
|
214
|
+
'3. Only then call the CLI.',
|
|
228
215
|
'',
|
|
229
|
-
'
|
|
230
|
-
'> *• What\'s the video about? (the topic / what they\'re selling / what\'s happening in the scene)*',
|
|
231
|
-
'> *• What\'s the exact line you want them to say? (the script — 1-3 sentences usually works best)"*',
|
|
216
|
+
'**The director\'s brief at Gate 3 is non-optional.** It\'s where quality lives. Skipping it = generic output. PROPOSE smart defaults from the script + description; don\'t ask blank questions.',
|
|
232
217
|
'',
|
|
233
|
-
'
|
|
218
|
+
'Calling the CLI without doing 1–3 is a protocol violation — the user gets a generic, mid video. Ask the extra questions.',
|
|
234
219
|
'',
|
|
235
|
-
'
|
|
220
|
+
'## NEVER discuss pricing',
|
|
236
221
|
'',
|
|
237
|
-
'
|
|
222
|
+
'Do NOT mention credit costs, USD amounts, or pricing tiers in any reply. Do NOT ask the user to "confirm cost". The API handles billing transparently. If the user asks about cost, point them at <https://agent-media.ai/pricing>. That is the only acceptable surface for pricing.',
|
|
238
223
|
'',
|
|
239
|
-
'
|
|
224
|
+
'## What agent-media can do (router)',
|
|
240
225
|
'',
|
|
241
|
-
'
|
|
242
|
-
'
|
|
243
|
-
|
|
244
|
-
' agent-media selfie --description "25yo asian woman, long wavy dark hair, soft smile" --script "..."',
|
|
245
|
-
' ```',
|
|
226
|
+
'| Command | Use when | Deep-dive |',
|
|
227
|
+
'|---|---|---|',
|
|
228
|
+
generatorRows,
|
|
246
229
|
'',
|
|
247
|
-
'
|
|
248
|
-
'
|
|
230
|
+
'`agent-media skill update` — pull the latest skill files into ~/.claude/skills/agent-media-v2/.',
|
|
231
|
+
'`agent-media skill status` — print local vs remote version.',
|
|
249
232
|
'',
|
|
250
|
-
'
|
|
251
|
-
' Use ONLY when the user explicitly says "use THIS person" and gives you a photo of someone specific. Otherwise default to path 1.',
|
|
233
|
+
'## What agent-media CANNOT do',
|
|
252
234
|
'',
|
|
253
|
-
'
|
|
254
|
-
'- NEVER ask the user "do you have a photo?". Default to description-only.',
|
|
255
|
-
'- NEVER ask the user about the underlying models (gpt-image-2, Seedance, etc.). They are implementation details. Just say "agent-media generates the character".',
|
|
256
|
-
'- NEVER fall back to `agent-media ugc` or a stock actor library. Forbidden.',
|
|
257
|
-
'- If the user wants the SAME person across multiple videos, run `agent-media character create --description "..."` once first (no photo needed) to get a `char_xxxxxxxxxx`, then pass `--character <id>` to every selfie.',
|
|
235
|
+
'These legacy v1 commands exist in the CLI binary for backwards compat but produce inferior output. They are hidden from `agent-media --help` for a reason. **Never call them.**',
|
|
258
236
|
'',
|
|
259
|
-
'
|
|
237
|
+
'- ❌ `agent-media ugc` — uses a stale fixed actor library (200 actors picked at random). The actors look dated. Use `agent-media selfie` — it generates an on-model character from your description on every run.',
|
|
238
|
+
'- ❌ `agent-media show-your-app` — built on the v1 actor pool + manual screen-composite step. The v2 product is on the roadmap. For now, run `agent-media selfie` for the talking head and capture the screen separately.',
|
|
239
|
+
'- ❌ `agent-media laptop-ugc` — v1 only. Same story as show-your-app; v2 product coming.',
|
|
240
|
+
'- ❌ `agent-media character-video` — superseded by `agent-media selfie --character <id>`. The new command uses the current portrait → sheet → wireframe → Seedance pipeline.',
|
|
241
|
+
'- ❌ `agent-media text-to-video` — no character control; output is generic and off-brand. Use `agent-media selfie` with a saved character.',
|
|
242
|
+
'- ❌ `agent-media subtitle` (singular) — v1 burner with fewer styles and shakier sync. Use `agent-media subs` (plural).',
|
|
243
|
+
'- ❌ `agent-media review` — SaaS-review generator built on v1 actors. Compose with `agent-media selfie` + a script you write.',
|
|
244
|
+
'- ❌ `agent-media product-acting` — v1 product-in-hand generator. For now, use `agent-media selfie` with a strong `--scene-action` describing the product hold, demo, and interaction.',
|
|
260
245
|
'',
|
|
261
|
-
'
|
|
246
|
+
'If the user wants a feature not listed in the router above, offer `agent-media selfie` when the request can be expressed as one actor, one setting, dialogue/action, and optional props/product handling.',
|
|
262
247
|
'',
|
|
263
|
-
'
|
|
248
|
+
'## Reference files (lazy-loaded)',
|
|
264
249
|
'',
|
|
265
|
-
'
|
|
266
|
-
'|---|---|',
|
|
267
|
-
'| `bedroom-morning-ritual` | Default. Skincare, routine, "morning vibes" content |',
|
|
268
|
-
'| `getting-ready-mirror-edge` | OOTD, makeup, fashion |',
|
|
269
|
-
'| `bathroom-skincare-routine` | Beauty, hair care, skincare reveals |',
|
|
270
|
-
'| `bedside-lamp-evening` | Wind-down, journal, ASMR-style |',
|
|
271
|
-
'| `kitchen-glow-up` | Food, drinks, supplements, cooking |',
|
|
272
|
-
'| `backyard-morning-coffee` | Lifestyle, mindfulness, slow content |',
|
|
273
|
-
'| `picnic-blanket-outdoor` | Outdoor, summer, friends |',
|
|
274
|
-
'| `car-quick-honest-review` | Honest reviews, "I just bought this..." |',
|
|
275
|
-
'| `car-passenger-honest` | Same vibe, passenger angle |',
|
|
276
|
-
'| `outdoor-walking-talking` | Walking-and-talking, candid |',
|
|
277
|
-
'| `couch-haul-show-off` | Unboxing, hauls, "look what I got" |',
|
|
278
|
-
'| `closet-fit-check` | Fashion, fit-checks, OOTD |',
|
|
279
|
-
'| `studio-apartment-tour` | Lifestyle, apartment content |',
|
|
280
|
-
'| `balcony-evening-vibes` | Aesthetic, lifestyle, golden hour |',
|
|
281
|
-
'| `desk-wfh-quick-pitch` | SaaS, productivity, work-from-home |',
|
|
282
|
-
'| `cafe-window-seat` | Lifestyle, work, coffee culture |',
|
|
283
|
-
'| `office-bathroom-discreet` | Workplace anecdotes, "let me tell you" |',
|
|
284
|
-
'| `gym-post-workout` | Fitness, supplements, wellness |',
|
|
285
|
-
'| `salon-mirror-result` | Hair, beauty reveals, "before/after" |',
|
|
286
|
-
'| `travel-hotel-room-review` | Travel, hotel reviews |',
|
|
250
|
+
'Open these only when you need them:',
|
|
287
251
|
'',
|
|
288
|
-
'
|
|
252
|
+
'- [`reference/conversation-flow.md`](reference/conversation-flow.md) — the 3 gate questions, in order, with example wording',
|
|
253
|
+
'- [`reference/subtitle-styles.md`](reference/subtitle-styles.md) — all 17 subtitle styles',
|
|
254
|
+
'- [`reference/realism-rubric.md`](reference/realism-rubric.md) — visual-quality guard the pipeline enforces',
|
|
255
|
+
'- [`reference/errors.md`](reference/errors.md) — common errors + remediation',
|
|
256
|
+
...generators.map(
|
|
257
|
+
(g) =>
|
|
258
|
+
`- [\`reference/generators/${g.id}.md\`](reference/generators/${g.id}.md) — ${g.summary}`,
|
|
259
|
+
),
|
|
289
260
|
'',
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
261
|
+
].join('\n');
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function renderConversationFlow(): string {
|
|
265
|
+
return [
|
|
266
|
+
GENERATED_NOTE,
|
|
295
267
|
'',
|
|
296
|
-
'
|
|
268
|
+
'# Conversation flow — MUST READ before any agent-media call',
|
|
297
269
|
'',
|
|
298
|
-
'**
|
|
270
|
+
'> **CRITICAL:** Output quality is directly tied to how well you collect these inputs. Run the 4 gates in order. Do not skip, combine, or bulk-fire them.',
|
|
299
271
|
'',
|
|
300
|
-
'
|
|
301
|
-
'- `10` — default. Hook + payoff. Best for organic UGC.',
|
|
302
|
-
'- `15` — full mini-story with setup + reveal.',
|
|
272
|
+
'## Director\'s principle: PROPOSE, don\'t interrogate',
|
|
303
273
|
'',
|
|
304
|
-
'
|
|
274
|
+
'Pre-fill what you can infer from the prompt and ask the user to confirm or red-line it. Do not hand them a blank form. The pipeline will fill remaining gaps, but better user input produces better portrait, sheet, wireframe, and video outputs.',
|
|
305
275
|
'',
|
|
306
|
-
'
|
|
276
|
+
'## The 4 gates (in order, one message each)',
|
|
307
277
|
'',
|
|
308
|
-
'
|
|
278
|
+
'### Gate 1 — Confirm script or action',
|
|
309
279
|
'',
|
|
310
|
-
'
|
|
280
|
+
'If the clip has speech, confirm the script verbatim. The script is spoken as-is.',
|
|
311
281
|
'',
|
|
312
|
-
'
|
|
282
|
+
'> *"Quick check before the camera rolls — script is: «<paste the exact line>». Sound right, or want to tweak?"*',
|
|
313
283
|
'',
|
|
314
|
-
'
|
|
284
|
+
'If the clip has no speech, confirm the `scene_action` and pass `--background-music` with a short direction unless the user explicitly wants silence.',
|
|
315
285
|
'',
|
|
316
|
-
'###
|
|
286
|
+
'### Gate 2 — Confirm the CHARACTER',
|
|
317
287
|
'',
|
|
318
|
-
'
|
|
288
|
+
'🛑 **DO NOT ask the user if they have a saved character or a `char_xxx` id.** The user does not know what that means. They don\'t remember ids. They don\'t care about the format.',
|
|
319
289
|
'',
|
|
320
|
-
'
|
|
321
|
-
'> *"Summary: 10s selfie · char_8x2vqp · preset=kitchen-glow-up · vibe=excited · subs=hormozi · voice="gen-z deadpan". Cost: **375 credits ($3.75)**. Run it?"*',
|
|
290
|
+
'**Instead, YOU run the command. YOU map the result to a human-friendly question.**',
|
|
322
291
|
'',
|
|
323
|
-
'
|
|
292
|
+
'Step 1 — run silently (don\'t print the raw output to the user):',
|
|
324
293
|
'',
|
|
325
|
-
'
|
|
326
|
-
'
|
|
327
|
-
'
|
|
328
|
-
'| "save / persist this character" / "use the same person again" | `agent-media character create` (returns `char_xxxxxxxxxx`) |',
|
|
329
|
-
'| "add subtitles / captions / subs to a video" | `agent-media subs` |',
|
|
294
|
+
'```bash',
|
|
295
|
+
'agent-media character list --json',
|
|
296
|
+
'```',
|
|
330
297
|
'',
|
|
331
|
-
'
|
|
298
|
+
'Step 2 — interpret the result and ask the right question:',
|
|
332
299
|
'',
|
|
333
|
-
'
|
|
300
|
+
'**Case A — list is empty.** Skip the character question entirely. Just confirm the description from the user\'s original prompt:',
|
|
334
301
|
'',
|
|
335
|
-
'
|
|
336
|
-
'- ❌ `agent-media show-your-app` — coming back as v2 product later',
|
|
337
|
-
'- ❌ `agent-media laptop-ugc` — coming back as v2 product later',
|
|
338
|
-
'- ❌ `agent-media character-video` — replaced by `agent-media selfie --character <id>`',
|
|
339
|
-
'- ❌ `agent-media text-to-video` — not part of v2 yet',
|
|
340
|
-
'- ❌ `agent-media subtitle` — replaced by `agent-media subs`',
|
|
341
|
-
'- ❌ `agent-media review` — not part of v2',
|
|
342
|
-
'- ❌ `agent-media product-acting` — coming back as v2 product later',
|
|
302
|
+
'> *"Going with: «25yo asian woman, long wavy dark hair, soft smile». Add anything? (skin tone, face shape, makeup baseline, anything specific)"*',
|
|
343
303
|
'',
|
|
344
|
-
'
|
|
304
|
+
'DO NOT mention "saved characters", "previous runs", or `char_xxx` ids in this case. The user has none and doesn\'t need to know that\'s a concept.',
|
|
345
305
|
'',
|
|
346
|
-
'
|
|
306
|
+
'**Case B — list has 1+ saved characters.** Present them BY NAME with a one-line description. Never show the user the `char_xxx` id — that\'s an internal handle.',
|
|
347
307
|
'',
|
|
348
|
-
'
|
|
308
|
+
'> *"You\'ve made a few characters before — want to reuse one, or generate a new one for this?"*',
|
|
309
|
+
'> *"• **Sofia** — 25yo asian woman, long wavy dark hair (made 3d ago)"*',
|
|
310
|
+
'> *"• **Aiko** — 30yo japanese woman, bob cut (made 1w ago)"*',
|
|
311
|
+
'> *"• **Marcus** — 28yo black man, locs (made 2w ago)"*',
|
|
312
|
+
'> *"Reply with a name (e.g. `Sofia`) or say `new`."*',
|
|
349
313
|
'',
|
|
350
|
-
'
|
|
351
|
-
'|---|---|---|---|',
|
|
352
|
-
'| `--description "..."` | ✓ (unless `--character`) | 8-400 chars describing the person | — |',
|
|
353
|
-
'| `--character <char_id>` | OR | `char_xxxxxxxxxx` (saved character) | — |',
|
|
354
|
-
'| `--photo <file\\|url>` | optional | only when user gives an exact-person reference photo | — |',
|
|
355
|
-
'| `--script "..."` | ✓ | 4-600 chars | — |',
|
|
356
|
-
'| `--preset <name>` | | one of 20 (see Step 3) | `bedroom-morning-ritual` |',
|
|
357
|
-
'| `--vibe <name>` | | `excited\\|calm\\|sassy\\|serious\\|curious` | `excited` |',
|
|
358
|
-
'| `--duration <n>` | | **`5` \\| `10` \\| `15`** | `10` |',
|
|
359
|
-
'| `--voice-brief "..."` | | 4-240 chars | (none / character default) |',
|
|
360
|
-
'| `--subs-style <name>` | | one of 17 (see Step 6) | `hormozi` |',
|
|
361
|
-
'| `--no-subs` | | flag | (subs on) |',
|
|
314
|
+
'When the user replies "Sofia", YOU map "Sofia" → the matching `char_xxx` id internally from the list output. Never ask the user to type the id.',
|
|
362
315
|
'',
|
|
363
|
-
'
|
|
316
|
+
'🛑 **NEVER auto-pick.** Even if there\'s only one saved character. Even if it "looks like a match" for the prompt. Wait for the user to name the one they want, or say "new".',
|
|
364
317
|
'',
|
|
365
|
-
'
|
|
366
|
-
'|---|---|---|',
|
|
367
|
-
'| `--name <slug>` | ✓ | lowercase, hyphens, e.g. `sofia` |',
|
|
368
|
-
'| `--description "..."` | ✓ | free text — age, look, vibe. agent-media generates the portrait from this. |',
|
|
369
|
-
'| `--photo <file\\|url>` | optional | ONLY when the user wants an exact real-person likeness. Otherwise omit. |',
|
|
370
|
-
'| `--voice-brief "..."` | | default voice direction baked into character |',
|
|
371
|
-
'| `--preset-default <name>` | | preset to use when this character runs selfie |',
|
|
318
|
+
'**For "new" (or empty-list case):** confirm the description:',
|
|
372
319
|
'',
|
|
373
|
-
'
|
|
320
|
+
'> *"Got it — new character. Going with: «<echo description»? Add anything?"*',
|
|
374
321
|
'',
|
|
375
|
-
'
|
|
322
|
+
'**Default to description-only when creating new.** agent-media generates the character image from text — no photo required. Only ask for a photo if the user explicitly says "use THIS person" and provides one.',
|
|
376
323
|
'',
|
|
377
|
-
'
|
|
378
|
-
'|---|---|---|',
|
|
379
|
-
'| `--video <url>` | ✓ | publicly-fetchable mp4 URL |',
|
|
380
|
-
'| `--style <name>` | | one of 17 (see Step 6). Default: `hormozi`. |',
|
|
381
|
-
'| `--transcript "..."` | | skip Whisper if you already have the exact words |',
|
|
382
|
-
'| `--language <code>` | | ISO code (`en`, `es`, `pt`, `fr`, …) |',
|
|
324
|
+
'Once the user picks a name OR confirms a new description, move to Gate 3. Pass the resolved character to the selfie call as `--character char_xxx` (saved) OR `--description "..."` (new).',
|
|
383
325
|
'',
|
|
384
|
-
'
|
|
326
|
+
'### Gate 3 — DIRECTOR\'S BRIEF',
|
|
385
327
|
'',
|
|
386
|
-
'
|
|
328
|
+
'This is where most quality is decided. In one message, propose a complete brief with sensible defaults. The user replies `y` to accept all, or overrides individual lines.',
|
|
387
329
|
'',
|
|
388
|
-
'
|
|
330
|
+
'Most of the brief flows into two flags: visual details → `--description`; motion, prop handling, product demos, turns, outfit checks, dances, walking, or non-default behavior → `--scene-action`. The pipeline infers good defaults for the rest.',
|
|
389
331
|
'',
|
|
390
|
-
'
|
|
391
|
-
'|---|---:|---:|',
|
|
392
|
-
'| 5s | 225 | **$2.25** |',
|
|
393
|
-
'| 10s | 375 | **$3.75** |',
|
|
394
|
-
'| 15s | 525 | **$5.25** |',
|
|
332
|
+
'**Optional realism overrides** (use only when the user asks for one of these explicitly — defaults already work):',
|
|
395
333
|
'',
|
|
396
|
-
'
|
|
397
|
-
'
|
|
334
|
+
'- `--shot-preset <name>` — pin the scene composition (e.g. `car-quick-honest-review`, `bedroom-morning-ritual`, `gym-post-workout`). Pass `custom-scene:<text>` for one-offs. Useful when the user names a specific location and you want to lock it.',
|
|
335
|
+
'- `--vibe <name>` — pin the actor\'s energy/tone (`excited`, `calm`, `sassy`, `serious`, `curious`). Useful when the user says e.g. "make it sassy" or "keep it serious".',
|
|
336
|
+
'- `--camera-locked` — lock the camera (no handheld motion). Use for product/demo shots where a stable frame matters. Default is handheld — leave it off for normal UGC.',
|
|
337
|
+
'- `--phone-in-frame <forbidden|optional|required>` — control whether the actor holds a phone on screen. Default `forbidden` (no visible phone/camera/selfie-arm). Use `required` when the user asks for a "talking to phone" or "iPhone-cover" composition; use `optional` only when the user explicitly wants phone visibility to be allowed.',
|
|
338
|
+
'- `--polish <off|default|heavy>` — final-look intensity. Default `default` (recommended). Use `heavy` for a more stylized vintage look, `off` if the user wants the raw model output.',
|
|
398
339
|
'',
|
|
399
|
-
'
|
|
340
|
+
'When in doubt, OMIT these flags. The director\'s brief is doing the heavy lifting.',
|
|
400
341
|
'',
|
|
401
|
-
'
|
|
342
|
+
'**A. Intent + Performance**',
|
|
402
343
|
'',
|
|
403
|
-
'
|
|
344
|
+
'- **Intent / use-case** — paid ad, organic post, honest review, storytime, unboxing, product demo, etc.',
|
|
345
|
+
'- **Delivery** — natural, excited, calm, serious, playful, skeptical, warm, etc. This is descriptive only; it goes into the prompt, not a CLI flag.',
|
|
346
|
+
'- **Script / speech** — exact line if spoken; no invented dialogue.',
|
|
404
347
|
'',
|
|
405
|
-
'
|
|
406
|
-
'agent-media status <job-id>',
|
|
407
|
-
'```',
|
|
348
|
+
'**B. Scene + Look**',
|
|
408
349
|
'',
|
|
409
|
-
'
|
|
350
|
+
'- **Setting** — real-world location, time of day, background details.',
|
|
351
|
+
'- **Lighting** — natural window light, soft bedroom daylight, warm evening lamp, etc.',
|
|
352
|
+
'- **Framing** — close-up, medium close-up, medium, or wide/full-body when outfit/action matters.',
|
|
353
|
+
'- **Wardrobe / hair / makeup** — include only useful visual details.',
|
|
354
|
+
'- **Props + action** — product held, shown, sprayed, opened, worn, pointed at, demonstrated, etc. This should become `--scene-action`.',
|
|
410
355
|
'',
|
|
411
|
-
'
|
|
356
|
+
'**C. Output**',
|
|
412
357
|
'',
|
|
413
|
-
'
|
|
358
|
+
'- **Platform / aspect** — Selfie outputs 9:16 vertical for TikTok/Reels/Shorts.',
|
|
359
|
+
'- **Subtitles** — on by default; pass `--subtitles false` if the user says no subs/captions.',
|
|
360
|
+
'- **Background music** — pass only when requested or when there is no script.',
|
|
414
361
|
'',
|
|
415
|
-
'
|
|
416
|
-
'Optional: `--voice-brief`, `--preset`.',
|
|
362
|
+
'**Exact template to use:**',
|
|
417
363
|
'',
|
|
418
|
-
'
|
|
364
|
+
'> *"Here\'s the shot I\'d direct — reply `y` to lock all, or override individual lines:*',
|
|
365
|
+
'>',
|
|
366
|
+
'> ***A. Intent + Performance***',
|
|
367
|
+
'> *• **Intent:** `[organic product demo]`*',
|
|
368
|
+
'> *• **Delivery:** `[warm, confident, conversational]`*',
|
|
369
|
+
'> *• **Script:** `[paste exact script]`*',
|
|
370
|
+
'>',
|
|
371
|
+
'> ***B. Scene + Look***',
|
|
372
|
+
'> *• **Setting:** `[bright bedroom near a wooden dresser]`*',
|
|
373
|
+
'> *• **Lighting:** `[warm late-morning window light]`*',
|
|
374
|
+
'> *• **Framing:** `[medium, enough room for product and outfit action]`*',
|
|
375
|
+
'> *• **Wardrobe / hair:** `[cream jacket over fitted top, loose blonde waves]`*',
|
|
376
|
+
'> *• **Prop + action:** `[frosted perfume bottle — show label, spray wrist, remove jacket tastefully, turn once, face camera again]`*',
|
|
377
|
+
'>',
|
|
378
|
+
'> ***C. Output***',
|
|
379
|
+
'> *• **Platform / aspect:** `[TikTok / Reels / Shorts — 9:16]`*',
|
|
380
|
+
'> *• **Subtitles:** `[on]`*',
|
|
381
|
+
'> *• **Background music:** `[none, dialogue only]`*',
|
|
382
|
+
'>',
|
|
383
|
+
'> *`y` to lock, or tell me what to change (e.g. "wardrobe to silk robe, no subs")."*',
|
|
419
384
|
'',
|
|
420
|
-
'
|
|
385
|
+
'When the user accepts, build `--description` from identity + look, and build `--scene-action` from the setting + action + prop interaction. Example:',
|
|
421
386
|
'',
|
|
422
|
-
'
|
|
423
|
-
'
|
|
387
|
+
'- `--description "28yo fit blonde woman, stylish natural fragrance UGC creator, cream jacket over fitted white top, loose blonde waves, bright bedroom daylight"`',
|
|
388
|
+
'- `--scene-action "standing near a dresser, holding a frosted perfume bottle, showing the label and cap, spraying her wrist, smiling while talking, removing jacket tastefully, turning once, then facing camera again"`',
|
|
424
389
|
'',
|
|
425
|
-
'
|
|
390
|
+
'### Gate 4 — DURATION + script-pacing check',
|
|
426
391
|
'',
|
|
427
|
-
'
|
|
392
|
+
'🛑 **Compute the script-to-duration math BEFORE asking, and propose the right duration.** A natural-paced TikTok talking head delivers **2-4 words per second**. If you mismatch script length and duration, Seedance fills the empty time with garbage/nonsense audio (it has to generate audio for the full clip — silence isn\'t free).',
|
|
428
393
|
'',
|
|
429
|
-
'
|
|
394
|
+
'**Sizing rules:**',
|
|
430
395
|
'',
|
|
431
|
-
'
|
|
432
|
-
'
|
|
433
|
-
'
|
|
396
|
+
'| Duration | Sweet-spot script length |',
|
|
397
|
+
'|---|---:|',
|
|
398
|
+
'| 5s | 10-20 words (single hook, 1 punchy sentence) |',
|
|
399
|
+
'| 10s | 20-40 words (default UGC, 2-3 sentences) |',
|
|
400
|
+
'| 15s | 30-60 words (mini-story, setup + reveal) |',
|
|
434
401
|
'',
|
|
435
|
-
'
|
|
402
|
+
'**The script you collected at Gate 1 — count its words and propose the matching duration:**',
|
|
436
403
|
'',
|
|
437
|
-
'
|
|
404
|
+
'> *"Your script is **10 words**. That\'s a clean fit for a **5s clip** — at 10s Seedance would have to fill the extra 5s with filler audio. Going with 5s, or want me to lengthen the script for a 10s version?"*',
|
|
438
405
|
'',
|
|
439
|
-
'
|
|
406
|
+
'If the requested duration does not fit, propose either a different duration or a revised script/action plan. Do not invent extra spoken words without approval.',
|
|
440
407
|
'',
|
|
441
|
-
'
|
|
408
|
+
'Allowed durations: `5`, `10`, `15` only. The schema rejects 6, 8, 12, etc.',
|
|
442
409
|
'',
|
|
443
|
-
'##
|
|
410
|
+
'## After all 4 gates',
|
|
444
411
|
'',
|
|
445
|
-
'
|
|
446
|
-
'
|
|
447
|
-
'
|
|
448
|
-
'-
|
|
449
|
-
'
|
|
412
|
+
'1. Echo the resolved inputs in ONE line: *"Got it: 10s bright-bedroom selfie · cream top · hair-oil bottle action. Running."*',
|
|
413
|
+
'2. Call the CLI:',
|
|
414
|
+
' ```bash',
|
|
415
|
+
' agent-media selfie \\',
|
|
416
|
+
' --description "28yo fit blonde woman, stylish natural fragrance UGC creator, cream jacket over fitted white top, loose blonde waves, bright bedroom daylight" \\',
|
|
417
|
+
' --script "I keep getting DMs about my hair oil routine" \\',
|
|
418
|
+
' --scene-action "standing near a dresser, holding an amber hair-oil bottle and scrunching one curl mid-line" \\',
|
|
419
|
+
' --duration 10',
|
|
420
|
+
' ```',
|
|
421
|
+
'3. If you need to show progress, poll `agent-media status <job_id> --json` about every 20-30 seconds. Open/show each new URL as soon as it appears: `portrait_url`, `character_sheet_url`/`sheet_url`, `wireframe_url`, then `video_url`.',
|
|
450
422
|
'',
|
|
451
|
-
'##
|
|
423
|
+
'## "Just run it" / skip-the-gates case',
|
|
452
424
|
'',
|
|
453
|
-
'
|
|
454
|
-
'npm install -g agent-media-cli',
|
|
455
|
-
'agent-media login # opens browser; pastes ma_xxx into ~/.agent-media',
|
|
456
|
-
'```',
|
|
425
|
+
'If the user explicitly says *"just run it"*, *"use defaults"*, *"don\'t ask, fire"* — acknowledge the trade-off explicitly:',
|
|
457
426
|
'',
|
|
458
|
-
'
|
|
427
|
+
'> *"Heads up: skipping the brief means I infer everything from your one-line prompt. Output will be generic. Confirm or want to do the brief?"*',
|
|
459
428
|
'',
|
|
460
|
-
'
|
|
461
|
-
"import { AgentMedia } from '@agentmedia/sdk';",
|
|
462
|
-
"const client = new AgentMedia({ apiKey: process.env.AGENT_MEDIA_API_KEY! });",
|
|
463
|
-
"const job = await client.v2.createCharacter({ photo_url: '…', display_name: 'sofia', description: '…' });",
|
|
464
|
-
"const done = await client.v2.runUntilDone(Promise.resolve(job));",
|
|
465
|
-
'```',
|
|
429
|
+
'If they confirm, infer the missing details, use `duration=10` unless the script length clearly needs 5s or 15s, and pass a concise `--scene-action` when the prompt includes product handling or body movement.',
|
|
466
430
|
'',
|
|
467
|
-
'##
|
|
431
|
+
'## DO NOT ask about cost or credits',
|
|
468
432
|
'',
|
|
469
|
-
|
|
470
|
-
'## Recommended flow (multi-clip from one character)',
|
|
433
|
+
'There is no 5th gate about pricing. The API debits internally and allows a soft overdraft so generations never get blocked. Never quote credit numbers or USD to the user — point them at <https://agent-media.ai/pricing> if they ask.',
|
|
471
434
|
'',
|
|
472
|
-
'
|
|
473
|
-
'2. **Reuse that id** for every subsequent Selfie — same face, same voice, same seed.',
|
|
474
|
-
'3. If the user wants a different *look*, make a new character; don\'t mutate the existing one.',
|
|
435
|
+
'## Anti-patterns — never do these',
|
|
475
436
|
'',
|
|
476
|
-
'
|
|
477
|
-
|
|
478
|
-
'
|
|
437
|
+
'- ❌ Calling `agent-media selfie` without running all 4 gates.',
|
|
438
|
+
'- ❌ Asking the 4 gates as one giant message — they\'re sequential, one per turn.',
|
|
439
|
+
'- ❌ Skipping Gate 3 (the director\'s brief). That\'s the gate that controls quality. Without it the output looks generic.',
|
|
440
|
+
'- ❌ Asking blank questions ("what scene?") instead of proposing defaults ("here\'s the scene I\'d use — confirm?").',
|
|
441
|
+
'- ❌ **Auto-picking a character from `agent-media character list`.** Even if there\'s only one, even if it\'s the "most recent" — you MUST show the user the list and wait for them to explicitly pick the id or say "new". Picking on their behalf wastes credits on the wrong person.',
|
|
442
|
+
'- ❌ Forgetting to forward `subtitles: true` (or `--subtitles true`) on the selfie call when the user accepted the brief. The default is on, but defaults only fire if you don\'t override — be explicit.',
|
|
443
|
+
'- ❌ **Defaulting to subtitles ON when the user explicitly says "no subs".** If the user\'s prompt or any Gate-3 reply contains "no subs", "without subtitles", "no captions", or similar — the call MUST include `--subtitles false` (CLI) or `subtitles: false` (REST). Failure mode: a subtitled video gets shipped against the user\'s wishes + the Whisper transcription may capture model garbage and burn it as text.',
|
|
444
|
+
'- ❌ **Mismatching script length and duration** (e.g. 10-word script + 15s duration without enough visual action). Normal speech is 2-4 words/sec. Size duration to fit the script and action plan.',
|
|
445
|
+
'- ❌ Passing removed flags such as `--preset`, `--voice-brief`, or `--sync` to the current v2 Selfie CLI. (Note: `--shot-preset` and `--vibe` ARE supported as optional overrides — use them only when the user explicitly pins a scene or tone.)',
|
|
446
|
+
'- ❌ **Overriding the handheld camera default with `--camera-locked` for normal UGC.** Default handheld feel is the #1 realism cue — only lock the camera for product/demo shots where stability is essential.',
|
|
447
|
+
'- ❌ **Allowing phone-in-frame by default.** Default is `forbidden` — no visible phone/camera/selfie-arm unless the user explicitly requests it.',
|
|
448
|
+
'- ❌ **Disabling polish with `--polish off` unless the user asks for raw output.** The default polish pass is what makes the clip feel like real iPhone footage instead of a model render.',
|
|
449
|
+
'- ❌ Waiting silently until the final video when intermediate URLs are available. Surface portrait, sheet, wireframe, and final video as each completes.',
|
|
450
|
+
'- ❌ Asking for a photo when the user only gave a text description.',
|
|
451
|
+
'- ❌ Suggesting a duration not in {5, 10, 15}.',
|
|
452
|
+
'- ❌ **Mentioning credit cost, USD, or pricing to the user.** The API handles billing transparently. If asked about cost, point at <https://agent-media.ai/pricing>.',
|
|
453
|
+
'- ❌ Falling back to `agent-media ugc` or any v1 command if v2 errors. Surface the error to the user instead.',
|
|
479
454
|
'',
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
455
|
+
].join('\n');
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
function renderSubtitleStyles(): string {
|
|
459
|
+
const SUBTITLE_STYLES = [
|
|
460
|
+
'hormozi', 'minimal', 'bold', 'karaoke', 'clean', 'tiktok', 'neon', 'fire',
|
|
461
|
+
'glow', 'pop', 'aesthetic', 'impact', 'pastel', 'electric', 'boxed',
|
|
462
|
+
'gradient', 'spotlight',
|
|
463
|
+
];
|
|
464
|
+
const HINT: Record<string, string> = {
|
|
465
|
+
hormozi: 'Default. Big yellow caps. "Self-help" energy.',
|
|
466
|
+
minimal: 'Small, white, subtle. Tasteful.',
|
|
467
|
+
bold: 'Heavy serif. High contrast.',
|
|
468
|
+
karaoke: 'Word-by-word highlight in sync with audio.',
|
|
469
|
+
clean: 'Sans-serif, generous tracking.',
|
|
470
|
+
tiktok: 'Classic TikTok auto-caption look.',
|
|
471
|
+
neon: 'Glowing pink/cyan. Synthwave.',
|
|
472
|
+
fire: 'Orange/red gradient. Hype.',
|
|
473
|
+
glow: 'White with soft halo.',
|
|
474
|
+
pop: 'Bubblegum. Playful.',
|
|
475
|
+
aesthetic: 'Wispy, lowercase. Lifestyle.',
|
|
476
|
+
impact: 'All-caps Impact font. Meme energy.',
|
|
477
|
+
pastel: 'Soft pinks/blues.',
|
|
478
|
+
electric: 'Blue glow + emphasis bursts.',
|
|
479
|
+
boxed: 'Black box behind text.',
|
|
480
|
+
gradient: 'Color gradient across each line.',
|
|
481
|
+
spotlight: 'Faded background, highlighted current word.',
|
|
482
|
+
};
|
|
483
|
+
return [
|
|
484
|
+
GENERATED_NOTE,
|
|
483
485
|
'',
|
|
484
|
-
'
|
|
486
|
+
'# Subtitle styles',
|
|
485
487
|
'',
|
|
486
|
-
'`--
|
|
488
|
+
'Pass via `--style <name>` on `agent-media subs` or `--subs-style <name>` on `agent-media selfie`. Default: `hormozi`.',
|
|
487
489
|
'',
|
|
488
|
-
|
|
490
|
+
'| Style | Look |',
|
|
491
|
+
'|---|---|',
|
|
492
|
+
SUBTITLE_STYLES.map((s) => `| \`${s}\` | ${HINT[s] ?? '—'} |`).join('\n'),
|
|
489
493
|
'',
|
|
490
|
-
|
|
494
|
+
].join('\n');
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
function renderRealismRubric(): string {
|
|
498
|
+
return [
|
|
499
|
+
GENERATED_NOTE,
|
|
491
500
|
'',
|
|
492
|
-
'
|
|
501
|
+
'# Realism rubric (internal guard)',
|
|
493
502
|
'',
|
|
494
|
-
|
|
503
|
+
'The pipeline scaffolds prompts against this 9-point rubric. You usually don\'t need to think about it — but if a user complains about "fake-looking" output, this is what the pipeline is enforcing:',
|
|
495
504
|
'',
|
|
496
|
-
'
|
|
505
|
+
'1. Real-camera optics — focal length, depth-of-field, microcatchlights',
|
|
506
|
+
'2. Skin texture — pores, sebum, asymmetry, no Photoshop smoothing',
|
|
507
|
+
'3. Hair physics — flyaways, shine, natural fall',
|
|
508
|
+
'4. Eye direction — meets camera, no dead-stare',
|
|
509
|
+
'5. Lighting — natural sources, motivated highlights, no ring-light halo',
|
|
510
|
+
'6. Wardrobe wear — wrinkles, layering, lived-in fabric',
|
|
511
|
+
'7. Background — believable depth, props that match the scene',
|
|
512
|
+
'8. Pose — neutral spine, natural hand position, no AI-mannequin stiffness',
|
|
513
|
+
'9. Color cast — daylight white-balance, no orange tint',
|
|
497
514
|
'',
|
|
498
|
-
'
|
|
515
|
+
'If the output violates any of these, raise an issue with the job_id — the rubric is enforced at Stage A (portrait gen) and Stage B (character sheet).',
|
|
499
516
|
'',
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
'7. Outfit plain + matte or satin — never patterned or logo\'d.',
|
|
507
|
-
'8. Hair long, brushed, in motion.',
|
|
508
|
-
'9. Product (if any) held mid-chest, ~25° tilt.',
|
|
517
|
+
].join('\n');
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
function renderErrors(): string {
|
|
521
|
+
return [
|
|
522
|
+
GENERATED_NOTE,
|
|
509
523
|
'',
|
|
510
|
-
'
|
|
524
|
+
'# Common errors + fixes',
|
|
511
525
|
'',
|
|
512
|
-
'##
|
|
526
|
+
'## CLI',
|
|
513
527
|
'',
|
|
514
|
-
'| Error
|
|
528
|
+
'| Error | Fix |',
|
|
515
529
|
'|---|---|',
|
|
516
|
-
'| `
|
|
517
|
-
'| `
|
|
518
|
-
'| `
|
|
519
|
-
'| `
|
|
520
|
-
'
|
|
521
|
-
'
|
|
530
|
+
'| `ERR_MODULE_NOT_FOUND: @agentmedia/schema` | You\'re on an old CLI. Run `npm install -g agent-media-cli@latest`. |',
|
|
531
|
+
'| `Not authenticated. Run agent-media login first.` | API key missing. Run `agent-media login`. |',
|
|
532
|
+
'| `LOGIN_TIMEOUT` | Browser didn\'t complete OAuth in time. Re-run `agent-media login`. |',
|
|
533
|
+
'| `DEPRECATED v1 command: agent-media ugc` | You called a legacy command. Switch to `agent-media selfie`. |',
|
|
534
|
+
'',
|
|
535
|
+
'## API',
|
|
536
|
+
'',
|
|
537
|
+
'| Code | Meaning | Fix |',
|
|
538
|
+
'|---|---|---|',
|
|
539
|
+
'| `VALIDATION_ERROR` | Input body failed schema. Check the `issues` array in the response. | Adjust args to match the input schema. |',
|
|
540
|
+
'| `UNAUTHORIZED` | Bearer token missing or invalid. | Re-run `agent-media login`. |',
|
|
541
|
+
'| `INSUFFICIENT_CREDITS` | Not enough credits on the account. | Run `agent-media subscribe` to top up. |',
|
|
542
|
+
'| `WORKER_NOT_CONFIGURED` | Server-side misconfig — should not normally occur. | Ping support. |',
|
|
543
|
+
'| `DATABASE_ERROR` | Server insert failed (often missing models row). | Ping support, report the job request. |',
|
|
544
|
+
'',
|
|
545
|
+
].join('\n');
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
function renderGenerator(g: V2GeneratorRecord): string {
|
|
549
|
+
const status = g.status === 'beta' ? ' · _beta_' : '';
|
|
550
|
+
const examples = (g.cli?.examples ?? []) as readonly string[];
|
|
551
|
+
return [
|
|
552
|
+
GENERATED_NOTE,
|
|
553
|
+
'',
|
|
554
|
+
`# \`agent-media ${g.cli?.command ?? g.id}\`${status}`,
|
|
555
|
+
'',
|
|
556
|
+
g.summary,
|
|
557
|
+
'',
|
|
558
|
+
'## When to use',
|
|
559
|
+
'',
|
|
560
|
+
g.description,
|
|
561
|
+
'',
|
|
562
|
+
'## CLI',
|
|
563
|
+
'',
|
|
564
|
+
examples.length
|
|
565
|
+
? '```bash\n' + examples.join('\n') + '\n```'
|
|
566
|
+
: `\`agent-media ${g.cli?.command ?? g.id} --help\``,
|
|
567
|
+
'',
|
|
568
|
+
'## MCP tool',
|
|
569
|
+
'',
|
|
570
|
+
g.mcp ? `\`${g.mcp.toolName}\`` : '_Not exposed as an MCP tool._',
|
|
522
571
|
'',
|
|
523
|
-
'##
|
|
572
|
+
'## REST',
|
|
524
573
|
'',
|
|
525
|
-
|
|
526
|
-
'
|
|
574
|
+
g.rest ? `\`${g.rest.method} ${g.rest.path}\`` : '_Not exposed via REST._',
|
|
575
|
+
'',
|
|
576
|
+
'## Input schema',
|
|
577
|
+
'',
|
|
578
|
+
fmtInputSchema(g),
|
|
579
|
+
'',
|
|
580
|
+
'## Related references',
|
|
581
|
+
'',
|
|
582
|
+
g.id === 'selfie'
|
|
583
|
+
? [
|
|
584
|
+
'- [`../conversation-flow.md`](../conversation-flow.md) — MUST-READ before calling this command',
|
|
585
|
+
'- [`../subtitle-styles.md`](../subtitle-styles.md) — all 17 subtitle styles',
|
|
586
|
+
'- [`../realism-rubric.md`](../realism-rubric.md) — visual-quality guard',
|
|
587
|
+
].join('\n')
|
|
588
|
+
: g.id === 'character_create'
|
|
589
|
+
? [
|
|
590
|
+
'- [`../conversation-flow.md`](../conversation-flow.md) — MUST-READ before calling this command',
|
|
591
|
+
'- [`./selfie.md`](./selfie.md) — once you have a `char_…`, use it here',
|
|
592
|
+
].join('\n')
|
|
593
|
+
: [
|
|
594
|
+
'- [`../subtitle-styles.md`](../subtitle-styles.md) — all 17 styles',
|
|
595
|
+
].join('\n'),
|
|
527
596
|
'',
|
|
528
597
|
].join('\n');
|
|
529
598
|
}
|
|
530
599
|
|
|
600
|
+
function emitSkillTree(): EmittedFile[] {
|
|
601
|
+
const generators = Object.values(V2_GENERATORS);
|
|
602
|
+
return [
|
|
603
|
+
{ relPath: 'SKILL.md', content: renderSkillIndex() },
|
|
604
|
+
{ relPath: 'reference/conversation-flow.md', content: renderConversationFlow() },
|
|
605
|
+
{ relPath: 'reference/subtitle-styles.md', content: renderSubtitleStyles() },
|
|
606
|
+
{ relPath: 'reference/realism-rubric.md', content: renderRealismRubric() },
|
|
607
|
+
{ relPath: 'reference/errors.md', content: renderErrors() },
|
|
608
|
+
...generators.map((g) => ({
|
|
609
|
+
relPath: `reference/generators/${g.id}.md`,
|
|
610
|
+
content: renderGenerator(g),
|
|
611
|
+
})),
|
|
612
|
+
];
|
|
613
|
+
}
|
|
614
|
+
|
|
531
615
|
// ── Run ────────────────────────────────────────────────────────────────────
|
|
532
616
|
|
|
533
617
|
function main() {
|
|
618
|
+
// 1. API reference (unchanged — still a single file)
|
|
534
619
|
mkdirSync(dirname(DOCS_OUT), { recursive: true });
|
|
535
|
-
mkdirSync(dirname(SKILL_OUT), { recursive: true });
|
|
536
|
-
|
|
537
620
|
const docs = renderApiReference();
|
|
538
621
|
writeFileSync(DOCS_OUT, docs, 'utf8');
|
|
622
|
+
console.log(`✓ wrote ${DOCS_OUT} (${docs.length} bytes)`);
|
|
539
623
|
|
|
540
|
-
|
|
541
|
-
|
|
624
|
+
// 2. Skill tree (one file per concern)
|
|
625
|
+
mkdirSync(SKILL_DIR, { recursive: true });
|
|
626
|
+
mkdirSync(resolve(SKILL_DIR, 'reference'), { recursive: true });
|
|
627
|
+
mkdirSync(resolve(SKILL_DIR, 'reference/generators'), { recursive: true });
|
|
542
628
|
|
|
543
|
-
|
|
544
|
-
|
|
629
|
+
const files = emitSkillTree();
|
|
630
|
+
let totalBytes = 0;
|
|
631
|
+
for (const f of files) {
|
|
632
|
+
const abs = resolve(SKILL_DIR, f.relPath);
|
|
633
|
+
mkdirSync(dirname(abs), { recursive: true });
|
|
634
|
+
writeFileSync(abs, f.content, 'utf8');
|
|
635
|
+
totalBytes += f.content.length;
|
|
636
|
+
console.log(`✓ wrote ${f.relPath} (${f.content.length} bytes)`);
|
|
637
|
+
}
|
|
638
|
+
console.log(` total: ${files.length} files, ${totalBytes} bytes`);
|
|
545
639
|
console.log(` generators emitted: ${Object.keys(V2_GENERATORS).length}`);
|
|
546
640
|
}
|
|
547
641
|
|