@studiomeyer/mcp-video 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. package/.github/ISSUE_TEMPLATE/bug_report.md +31 -0
  2. package/.github/ISSUE_TEMPLATE/feature_request.md +19 -0
  3. package/.github/workflows/ci.yml +34 -0
  4. package/CHANGELOG.md +24 -0
  5. package/CONTRIBUTING.md +75 -0
  6. package/LICENSE +21 -0
  7. package/README.md +198 -0
  8. package/USAGE.md +144 -0
  9. package/dist/handlers/capcut.d.ts +6 -0
  10. package/dist/handlers/capcut.js +229 -0
  11. package/dist/handlers/capcut.js.map +1 -0
  12. package/dist/handlers/editing.d.ts +6 -0
  13. package/dist/handlers/editing.js +242 -0
  14. package/dist/handlers/editing.js.map +1 -0
  15. package/dist/handlers/index.d.ts +2 -0
  16. package/dist/handlers/index.js +33 -0
  17. package/dist/handlers/index.js.map +1 -0
  18. package/dist/handlers/post-production.d.ts +5 -0
  19. package/dist/handlers/post-production.js +109 -0
  20. package/dist/handlers/post-production.js.map +1 -0
  21. package/dist/handlers/smart-screenshot.d.ts +5 -0
  22. package/dist/handlers/smart-screenshot.js +83 -0
  23. package/dist/handlers/smart-screenshot.js.map +1 -0
  24. package/dist/handlers/tts.d.ts +5 -0
  25. package/dist/handlers/tts.js +83 -0
  26. package/dist/handlers/tts.js.map +1 -0
  27. package/dist/handlers/video.d.ts +5 -0
  28. package/dist/handlers/video.js +127 -0
  29. package/dist/handlers/video.js.map +1 -0
  30. package/dist/lib/dual-transport.d.ts +42 -0
  31. package/dist/lib/dual-transport.js +208 -0
  32. package/dist/lib/dual-transport.js.map +1 -0
  33. package/dist/lib/logger.d.ts +12 -0
  34. package/dist/lib/logger.js +42 -0
  35. package/dist/lib/logger.js.map +1 -0
  36. package/dist/lib/types.d.ts +16 -0
  37. package/dist/lib/types.js +15 -0
  38. package/dist/lib/types.js.map +1 -0
  39. package/dist/schemas/capcut.d.ts +608 -0
  40. package/dist/schemas/capcut.js +411 -0
  41. package/dist/schemas/capcut.js.map +1 -0
  42. package/dist/schemas/editing.d.ts +822 -0
  43. package/dist/schemas/editing.js +466 -0
  44. package/dist/schemas/editing.js.map +1 -0
  45. package/dist/schemas/index.d.ts +2366 -0
  46. package/dist/schemas/index.js +15 -0
  47. package/dist/schemas/index.js.map +1 -0
  48. package/dist/schemas/post-production.d.ts +379 -0
  49. package/dist/schemas/post-production.js +268 -0
  50. package/dist/schemas/post-production.js.map +1 -0
  51. package/dist/schemas/smart-screenshot.d.ts +127 -0
  52. package/dist/schemas/smart-screenshot.js +122 -0
  53. package/dist/schemas/smart-screenshot.js.map +1 -0
  54. package/dist/schemas/tts.d.ts +220 -0
  55. package/dist/schemas/tts.js +194 -0
  56. package/dist/schemas/tts.js.map +1 -0
  57. package/dist/schemas/video.d.ts +236 -0
  58. package/dist/schemas/video.js +210 -0
  59. package/dist/schemas/video.js.map +1 -0
  60. package/dist/server.d.ts +11 -0
  61. package/dist/server.js +239 -0
  62. package/dist/server.js.map +1 -0
  63. package/dist/server.test.d.ts +1 -0
  64. package/dist/server.test.js +87 -0
  65. package/dist/server.test.js.map +1 -0
  66. package/dist/tools/engine/audio-mixer.d.ts +40 -0
  67. package/dist/tools/engine/audio-mixer.js +169 -0
  68. package/dist/tools/engine/audio-mixer.js.map +1 -0
  69. package/dist/tools/engine/audio.d.ts +22 -0
  70. package/dist/tools/engine/audio.js +73 -0
  71. package/dist/tools/engine/audio.js.map +1 -0
  72. package/dist/tools/engine/beat-sync.d.ts +31 -0
  73. package/dist/tools/engine/beat-sync.js +270 -0
  74. package/dist/tools/engine/beat-sync.js.map +1 -0
  75. package/dist/tools/engine/capture.d.ts +12 -0
  76. package/dist/tools/engine/capture.js +290 -0
  77. package/dist/tools/engine/capture.js.map +1 -0
  78. package/dist/tools/engine/chroma-key.d.ts +27 -0
  79. package/dist/tools/engine/chroma-key.js +154 -0
  80. package/dist/tools/engine/chroma-key.js.map +1 -0
  81. package/dist/tools/engine/concat.d.ts +49 -0
  82. package/dist/tools/engine/concat.js +149 -0
  83. package/dist/tools/engine/concat.js.map +1 -0
  84. package/dist/tools/engine/cursor.d.ts +26 -0
  85. package/dist/tools/engine/cursor.js +185 -0
  86. package/dist/tools/engine/cursor.js.map +1 -0
  87. package/dist/tools/engine/easing.d.ts +15 -0
  88. package/dist/tools/engine/easing.js +100 -0
  89. package/dist/tools/engine/easing.js.map +1 -0
  90. package/dist/tools/engine/editing.d.ts +158 -0
  91. package/dist/tools/engine/editing.js +541 -0
  92. package/dist/tools/engine/editing.js.map +1 -0
  93. package/dist/tools/engine/encoder.d.ts +31 -0
  94. package/dist/tools/engine/encoder.js +154 -0
  95. package/dist/tools/engine/encoder.js.map +1 -0
  96. package/dist/tools/engine/index.d.ts +30 -0
  97. package/dist/tools/engine/index.js +23 -0
  98. package/dist/tools/engine/index.js.map +1 -0
  99. package/dist/tools/engine/lut-presets.d.ts +25 -0
  100. package/dist/tools/engine/lut-presets.js +141 -0
  101. package/dist/tools/engine/lut-presets.js.map +1 -0
  102. package/dist/tools/engine/narrated-video.d.ts +63 -0
  103. package/dist/tools/engine/narrated-video.js +163 -0
  104. package/dist/tools/engine/narrated-video.js.map +1 -0
  105. package/dist/tools/engine/scenes.d.ts +17 -0
  106. package/dist/tools/engine/scenes.js +223 -0
  107. package/dist/tools/engine/scenes.js.map +1 -0
  108. package/dist/tools/engine/smart-screenshot.d.ts +80 -0
  109. package/dist/tools/engine/smart-screenshot.js +744 -0
  110. package/dist/tools/engine/smart-screenshot.js.map +1 -0
  111. package/dist/tools/engine/social-format.d.ts +66 -0
  112. package/dist/tools/engine/social-format.js +107 -0
  113. package/dist/tools/engine/social-format.js.map +1 -0
  114. package/dist/tools/engine/template-renderer.d.ts +45 -0
  115. package/dist/tools/engine/template-renderer.js +233 -0
  116. package/dist/tools/engine/template-renderer.js.map +1 -0
  117. package/dist/tools/engine/templates.d.ts +87 -0
  118. package/dist/tools/engine/templates.js +272 -0
  119. package/dist/tools/engine/templates.js.map +1 -0
  120. package/dist/tools/engine/text-animations.d.ts +33 -0
  121. package/dist/tools/engine/text-animations.js +192 -0
  122. package/dist/tools/engine/text-animations.js.map +1 -0
  123. package/dist/tools/engine/text-overlay.d.ts +27 -0
  124. package/dist/tools/engine/text-overlay.js +84 -0
  125. package/dist/tools/engine/text-overlay.js.map +1 -0
  126. package/dist/tools/engine/tts.d.ts +54 -0
  127. package/dist/tools/engine/tts.js +186 -0
  128. package/dist/tools/engine/tts.js.map +1 -0
  129. package/dist/tools/engine/types.d.ts +166 -0
  130. package/dist/tools/engine/types.js +13 -0
  131. package/dist/tools/engine/types.js.map +1 -0
  132. package/dist/tools/engine/voice-effects.d.ts +18 -0
  133. package/dist/tools/engine/voice-effects.js +215 -0
  134. package/dist/tools/engine/voice-effects.js.map +1 -0
  135. package/dist/tools/index.d.ts +32 -0
  136. package/dist/tools/index.js +23 -0
  137. package/dist/tools/index.js.map +1 -0
  138. package/package.json +56 -0
  139. package/scripts/check-deps.js +39 -0
  140. package/src/handlers/capcut.ts +245 -0
  141. package/src/handlers/editing.ts +260 -0
  142. package/src/handlers/index.ts +34 -0
  143. package/src/handlers/post-production.ts +136 -0
  144. package/src/handlers/smart-screenshot.ts +86 -0
  145. package/src/handlers/tts.ts +103 -0
  146. package/src/handlers/video.ts +137 -0
  147. package/src/lib/dual-transport.ts +272 -0
  148. package/src/lib/logger.ts +59 -0
  149. package/src/lib/types.ts +25 -0
  150. package/src/schemas/capcut.ts +418 -0
  151. package/src/schemas/editing.ts +476 -0
  152. package/src/schemas/index.ts +15 -0
  153. package/src/schemas/post-production.ts +273 -0
  154. package/src/schemas/smart-screenshot.ts +122 -0
  155. package/src/schemas/tts.ts +197 -0
  156. package/src/schemas/video.ts +211 -0
  157. package/src/server.test.ts +99 -0
  158. package/src/server.ts +289 -0
  159. package/src/tools/engine/audio-mixer.ts +244 -0
  160. package/src/tools/engine/audio.ts +115 -0
  161. package/src/tools/engine/beat-sync.ts +356 -0
  162. package/src/tools/engine/capture.ts +360 -0
  163. package/src/tools/engine/chroma-key.ts +202 -0
  164. package/src/tools/engine/concat.ts +242 -0
  165. package/src/tools/engine/cursor.ts +222 -0
  166. package/src/tools/engine/easing.ts +120 -0
  167. package/src/tools/engine/editing.ts +809 -0
  168. package/src/tools/engine/encoder.ts +208 -0
  169. package/src/tools/engine/index.ts +33 -0
  170. package/src/tools/engine/lut-presets.ts +235 -0
  171. package/src/tools/engine/narrated-video.ts +267 -0
  172. package/src/tools/engine/scenes.ts +309 -0
  173. package/src/tools/engine/smart-screenshot.ts +923 -0
  174. package/src/tools/engine/social-format.ts +146 -0
  175. package/src/tools/engine/template-renderer.ts +294 -0
  176. package/src/tools/engine/templates.ts +370 -0
  177. package/src/tools/engine/text-animations.ts +282 -0
  178. package/src/tools/engine/text-overlay.ts +143 -0
  179. package/src/tools/engine/tts.ts +284 -0
  180. package/src/tools/engine/types.ts +191 -0
  181. package/src/tools/engine/voice-effects.ts +258 -0
  182. package/src/tools/index.ts +67 -0
  183. package/tsconfig.json +19 -0
  184. package/vitest.config.ts +7 -0
@@ -0,0 +1,143 @@
1
+ /**
2
+ * Text overlay engine — animated titles, subtitles, watermarks
3
+ */
4
+
5
+ import { execFile } from 'child_process';
6
+ import * as fs from 'fs';
7
+ import { logger } from '../../lib/logger.js';
8
+
9
+ // ─── Types ──────────────────────────────────────────────────────────
10
+
11
+ export type TextPosition = 'center' | 'top' | 'bottom' | 'top-left' | 'top-right' | 'bottom-left' | 'bottom-right';
12
+
13
+ export interface TextOverlay {
14
+ /** Text to display */
15
+ text: string;
16
+ /** Position on screen (default: center) */
17
+ position?: TextPosition;
18
+ /** Font size (default: 48) */
19
+ fontSize?: number;
20
+ /** Font color (default: white) */
21
+ fontColor?: string;
22
+ /** Start time in seconds */
23
+ startTime: number;
24
+ /** End time in seconds */
25
+ endTime: number;
26
+ /** Fade in duration (default: 0.5s) */
27
+ fadeIn?: number;
28
+ /** Fade out duration (default: 0.5s) */
29
+ fadeOut?: number;
30
+ /** Background box behind text (default: false) */
31
+ showBackground?: boolean;
32
+ /** Background color (default: black@0.6) */
33
+ backgroundColor?: string;
34
+ }
35
+
36
+ // ─── Position Resolver ──────────────────────────────────────────────
37
+
38
+ function resolvePosition(pos: TextPosition): { x: string; y: string } {
39
+ switch (pos) {
40
+ case 'top': return { x: '(w-text_w)/2', y: 'h*0.08' };
41
+ case 'bottom': return { x: '(w-text_w)/2', y: 'h*0.88' };
42
+ case 'top-left': return { x: 'w*0.05', y: 'h*0.05' };
43
+ case 'top-right': return { x: 'w-text_w-w*0.05', y: 'h*0.05' };
44
+ case 'bottom-left': return { x: 'w*0.05', y: 'h-text_h-h*0.05' };
45
+ case 'bottom-right': return { x: 'w-text_w-w*0.05', y: 'h-text_h-h*0.05' };
46
+ default: return { x: '(w-text_w)/2', y: '(h-text_h)/2' };
47
+ }
48
+ }
49
+
50
+ // ─── Filter Builder ─────────────────────────────────────────────────
51
+
52
+ function buildDrawtextFilter(overlay: TextOverlay, fontPath: string): string {
53
+ const {
54
+ text,
55
+ position = 'center',
56
+ fontSize = 48,
57
+ fontColor = 'white',
58
+ startTime,
59
+ endTime,
60
+ fadeIn = 0.5,
61
+ fadeOut = 0.5,
62
+ showBackground = false,
63
+ backgroundColor = 'black@0.6',
64
+ } = overlay;
65
+
66
+ const { x, y } = resolvePosition(position);
67
+ const fadeInEnd = startTime + fadeIn;
68
+ const fadeOutStart = endTime - fadeOut;
69
+
70
+ // Escape for ffmpeg
71
+ const escaped = text.replace(/'/g, "\\\\'").replace(/:/g, '\\:');
72
+
73
+ // Alpha expression: fade in → hold → fade out
74
+ const alpha = `if(lt(t\\,${startTime})\\,0\\,if(lt(t\\,${fadeInEnd})\\,(t-${startTime})/${fadeIn}\\,if(lt(t\\,${fadeOutStart})\\,1\\,if(lt(t\\,${endTime})\\,(${endTime}-t)/${fadeOut}\\,0))))`;
75
+
76
+ let filter = `drawtext=text='${escaped}':fontfile='${fontPath}':fontsize=${fontSize}:fontcolor=${fontColor}:x=${x}:y=${y}:alpha='${alpha}':enable='between(t,${startTime},${endTime})'`;
77
+
78
+ if (showBackground) {
79
+ filter += `:box=1:boxcolor=${backgroundColor}:boxborderw=12`;
80
+ }
81
+
82
+ return filter;
83
+ }
84
+
85
+ // ─── Main Function ──────────────────────────────────────────────────
86
+
87
+ export async function addTextOverlays(
88
+ inputPath: string,
89
+ outputPath: string,
90
+ overlays: TextOverlay[]
91
+ ): Promise<string> {
92
+ if (!fs.existsSync(inputPath)) throw new Error(`Input not found: ${inputPath}`);
93
+ if (overlays.length === 0) throw new Error('No overlays provided');
94
+
95
+ const fontPath = findFont();
96
+ const filters = overlays.map((o) => buildDrawtextFilter(o, fontPath)).join(',');
97
+
98
+ logger.info(`Adding ${overlays.length} text overlay(s)`);
99
+
100
+ const args = [
101
+ '-y',
102
+ '-i', inputPath,
103
+ '-vf', filters,
104
+ '-c:a', 'copy',
105
+ '-c:v', 'libx264',
106
+ '-crf', '18',
107
+ '-pix_fmt', 'yuv420p',
108
+ '-movflags', '+faststart',
109
+ outputPath,
110
+ ];
111
+
112
+ await runFfmpeg(args);
113
+
114
+ logger.info(`Text overlays added: ${outputPath}`);
115
+ return outputPath;
116
+ }
117
+
118
+ // ─── Helpers ────────────────────────────────────────────────────────
119
+
120
+ function findFont(): string {
121
+ const candidates = [
122
+ '/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf',
123
+ '/usr/share/fonts/truetype/liberation/LiberationSans-Bold.ttf',
124
+ '/usr/share/fonts/truetype/ubuntu/Ubuntu-Bold.ttf',
125
+ ];
126
+ for (const f of candidates) {
127
+ if (fs.existsSync(f)) return f;
128
+ }
129
+ return '/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf';
130
+ }
131
+
132
+ function runFfmpeg(args: string[]): Promise<string> {
133
+ return new Promise((resolve, reject) => {
134
+ execFile('ffmpeg', args, { maxBuffer: 50 * 1024 * 1024 }, (error, stdout, stderr) => {
135
+ if (error) {
136
+ logger.error(`ffmpeg failed: ${stderr}`);
137
+ reject(new Error(`ffmpeg failed: ${stderr || error.message}`));
138
+ return;
139
+ }
140
+ resolve(stdout);
141
+ });
142
+ });
143
+ }
@@ -0,0 +1,284 @@
1
+ /**
2
+ * Text-to-Speech Engine
3
+ * Primary: ElevenLabs (best quality, multilingual)
4
+ * Fallback: OpenAI TTS (reliable, already integrated)
5
+ *
6
+ * No extra npm dependencies — ElevenLabs uses native fetch,
7
+ * OpenAI uses the already-installed openai package
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ import { logger } from '../../lib/logger.js';
13
+ import { getMediaDuration } from './audio.js';
14
+
15
+ // ─── Types ──────────────────────────────────────────────────────────
16
+
17
+ export type TTSProvider = 'elevenlabs' | 'openai';
18
+
19
+ export type ElevenLabsModel =
20
+ | 'eleven_multilingual_v2'
21
+ | 'eleven_turbo_v2_5'
22
+ | 'eleven_flash_v2_5';
23
+
24
+ export type ElevenLabsVoice =
25
+ | 'rachel' | 'sarah' | 'emily' | 'charlotte' | 'alice' | 'matilda' | 'lily'
26
+ | 'brian' | 'adam' | 'daniel' | 'josh' | 'james' | 'liam' | 'chris' | 'george';
27
+
28
+ export type OpenAIVoice =
29
+ | 'alloy' | 'ash' | 'coral' | 'echo' | 'fable'
30
+ | 'nova' | 'onyx' | 'sage' | 'shimmer';
31
+
32
+ export type OpenAIModel = 'tts-1' | 'tts-1-hd';
33
+
34
+ export interface TTSConfig {
35
+ /** Text to speak */
36
+ text: string;
37
+ /** Output path for the audio file */
38
+ outputPath: string;
39
+ /** TTS provider (default: elevenlabs) */
40
+ provider?: TTSProvider;
41
+ /** Language code (default: en) */
42
+ language?: string;
43
+ /** Speaking speed (default: 1.0) */
44
+ speed?: number;
45
+
46
+ // ElevenLabs specific
47
+ /** ElevenLabs voice name (default: adam) */
48
+ elevenLabsVoice?: ElevenLabsVoice | string;
49
+ /** ElevenLabs model (default: eleven_multilingual_v2) */
50
+ elevenLabsModel?: ElevenLabsModel;
51
+ /** Voice stability 0-1 (default: 0.5) */
52
+ stability?: number;
53
+ /** Voice similarity 0-1 (default: 0.75) */
54
+ similarityBoost?: number;
55
+
56
+ // OpenAI specific
57
+ /** OpenAI voice (default: onyx) */
58
+ openaiVoice?: OpenAIVoice;
59
+ /** OpenAI model (default: tts-1-hd) */
60
+ openaiModel?: OpenAIModel;
61
+ }
62
+
63
+ export interface TTSResult {
64
+ success: boolean;
65
+ audioPath: string;
66
+ provider: TTSProvider;
67
+ duration: number;
68
+ sizeBytes: number;
69
+ sizeMB: string;
70
+ text: string;
71
+ language: string;
72
+ }
73
+
74
+ // ─── ElevenLabs Voice IDs ───────────────────────────────────────────
75
+
76
+ const ELEVENLABS_VOICE_IDS: Record<string, string> = {
77
+ rachel: '21m00Tcm4TlvDq8ikWAM',
78
+ sarah: 'EXAVITQu4vr4xnSDxMaL',
79
+ emily: 'LcfcDJNUP1GQjkzn1xUU',
80
+ charlotte: 'XB0fDUnXU5powFXDhCwa',
81
+ alice: 'Xb7hH8MSUJpSbSDYk0k2',
82
+ matilda: 'XrExE9yKIg1WjnnlVkGX',
83
+ lily: 'pFZP5JQG7iQjIQuC4Bku',
84
+ brian: 'nPczCjzI2devNBz1zQrb',
85
+ adam: 'pNInz6obpgDQGcFmaJgB',
86
+ daniel: 'onwK4e9ZLuTAKqWW03F9',
87
+ josh: 'TxGEqnHWrfWFTfGW9XjX',
88
+ james: 'ZQe5CZNOzWyzPSCn5a3c',
89
+ liam: 'TX3LPaxmHKxFdv7VOQHJ',
90
+ chris: 'iP95p4xoKVk53GoZ742B',
91
+ george: 'JBFqnCBsd6RMkjVDRZzb',
92
+ };
93
+
94
+ // ─── Main TTS Function ─────────────────────────────────────────────
95
+
96
+ export async function generateSpeech(config: TTSConfig): Promise<TTSResult> {
97
+ const {
98
+ text,
99
+ outputPath,
100
+ provider = getDefaultProvider(),
101
+ language = 'en',
102
+ } = config;
103
+
104
+ // Ensure output dir exists
105
+ const outDir = path.dirname(outputPath);
106
+ if (!fs.existsSync(outDir)) fs.mkdirSync(outDir, { recursive: true });
107
+
108
+ // Ensure .mp3 extension
109
+ const finalPath = outputPath.endsWith('.mp3') ? outputPath : `${outputPath}.mp3`;
110
+
111
+ logger.info(`Generating speech (${provider}, ${language}, ${text.length} chars)`);
112
+
113
+ let audioPath: string;
114
+
115
+ try {
116
+ if (provider === 'elevenlabs') {
117
+ audioPath = await elevenLabsTTS(config, finalPath);
118
+ } else {
119
+ audioPath = await openaiTTS(config, finalPath);
120
+ }
121
+ } catch (error) {
122
+ // Fallback: try the other provider
123
+ const fallback = provider === 'elevenlabs' ? 'openai' : 'elevenlabs';
124
+ const msg = error instanceof Error ? error.message : String(error);
125
+ logger.warn(`${provider} TTS failed (${msg}), falling back to ${fallback}`);
126
+
127
+ try {
128
+ if (fallback === 'elevenlabs') {
129
+ audioPath = await elevenLabsTTS(config, finalPath);
130
+ } else {
131
+ audioPath = await openaiTTS(config, finalPath);
132
+ }
133
+ } catch (fallbackError) {
134
+ const fbMsg = fallbackError instanceof Error ? fallbackError.message : String(fallbackError);
135
+ throw new Error(`Both TTS providers failed. ${provider}: ${msg}, ${fallback}: ${fbMsg}`);
136
+ }
137
+ }
138
+
139
+ // Get audio stats
140
+ const stats = fs.statSync(audioPath);
141
+ let duration = 0;
142
+ try {
143
+ duration = await getMediaDuration(audioPath);
144
+ } catch {
145
+ // ffprobe might not parse the file
146
+ }
147
+
148
+ logger.info(`Speech generated: ${audioPath} (${duration.toFixed(1)}s, ${(stats.size / 1024).toFixed(0)} KB)`);
149
+
150
+ return {
151
+ success: true,
152
+ audioPath,
153
+ provider,
154
+ duration,
155
+ sizeBytes: stats.size,
156
+ sizeMB: (stats.size / 1024 / 1024).toFixed(2),
157
+ text,
158
+ language,
159
+ };
160
+ }
161
+
162
+ // ─── ElevenLabs Implementation ──────────────────────────────────────
163
+
164
+ async function elevenLabsTTS(config: TTSConfig, outputPath: string): Promise<string> {
165
+ const apiKey = process.env.ELEVENLABS_API_KEY;
166
+ if (!apiKey) throw new Error('ELEVENLABS_API_KEY environment variable not set');
167
+
168
+ const voiceName = config.elevenLabsVoice ?? 'adam';
169
+ const voiceId = ELEVENLABS_VOICE_IDS[voiceName] ?? voiceName; // Allow raw voice IDs
170
+ const model = config.elevenLabsModel ?? 'eleven_multilingual_v2';
171
+ const language = config.language ?? 'en';
172
+
173
+ const url = `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}?output_format=mp3_44100_128`;
174
+
175
+ const body: Record<string, unknown> = {
176
+ text: config.text,
177
+ model_id: model,
178
+ language_code: language,
179
+ voice_settings: {
180
+ stability: config.stability ?? 0.5,
181
+ similarity_boost: config.similarityBoost ?? 0.75,
182
+ style: 0.0,
183
+ speed: config.speed ?? 1.0,
184
+ use_speaker_boost: true,
185
+ },
186
+ };
187
+
188
+ logger.info(`ElevenLabs TTS: voice=${voiceName}, model=${model}, lang=${language}`);
189
+
190
+ const response = await fetch(url, {
191
+ method: 'POST',
192
+ headers: {
193
+ 'Content-Type': 'application/json',
194
+ 'xi-api-key': apiKey,
195
+ },
196
+ body: JSON.stringify(body),
197
+ });
198
+
199
+ if (!response.ok) {
200
+ const errorText = await response.text();
201
+ throw new Error(`ElevenLabs API ${response.status}: ${errorText}`);
202
+ }
203
+
204
+ const arrayBuffer = await response.arrayBuffer();
205
+ fs.writeFileSync(outputPath, Buffer.from(arrayBuffer));
206
+
207
+ return outputPath;
208
+ }
209
+
210
+ // ─── OpenAI Implementation ──────────────────────────────────────────
211
+
212
+ async function openaiTTS(config: TTSConfig, outputPath: string): Promise<string> {
213
+ const apiKey = process.env.OPENAI_API_KEY;
214
+ if (!apiKey) throw new Error('OPENAI_API_KEY environment variable not set');
215
+
216
+ const voice = config.openaiVoice ?? 'onyx';
217
+ const model = config.openaiModel ?? 'tts-1-hd';
218
+ const speed = config.speed ?? 1.0;
219
+
220
+ logger.info(`OpenAI TTS: voice=${voice}, model=${model}, speed=${speed}`);
221
+
222
+ // Use fetch directly (avoid OpenAI package version issues)
223
+ const response = await fetch('https://api.openai.com/v1/audio/speech', {
224
+ method: 'POST',
225
+ headers: {
226
+ 'Content-Type': 'application/json',
227
+ 'Authorization': `Bearer ${apiKey}`,
228
+ },
229
+ body: JSON.stringify({
230
+ model,
231
+ voice,
232
+ input: config.text,
233
+ response_format: 'mp3',
234
+ speed,
235
+ }),
236
+ });
237
+
238
+ if (!response.ok) {
239
+ const errorText = await response.text();
240
+ throw new Error(`OpenAI TTS API ${response.status}: ${errorText}`);
241
+ }
242
+
243
+ const arrayBuffer = await response.arrayBuffer();
244
+ fs.writeFileSync(outputPath, Buffer.from(arrayBuffer));
245
+
246
+ return outputPath;
247
+ }
248
+
249
+ // ─── List ElevenLabs Voices ─────────────────────────────────────────
250
+
251
+ export async function listElevenLabsVoices(): Promise<Array<{
252
+ voice_id: string;
253
+ name: string;
254
+ category: string;
255
+ language: string;
256
+ }>> {
257
+ const apiKey = process.env.ELEVENLABS_API_KEY;
258
+ if (!apiKey) throw new Error('ELEVENLABS_API_KEY environment variable not set');
259
+
260
+ const response = await fetch('https://api.elevenlabs.io/v1/voices', {
261
+ headers: { 'xi-api-key': apiKey },
262
+ });
263
+
264
+ if (!response.ok) throw new Error(`ElevenLabs API ${response.status}`);
265
+
266
+ const data = await response.json() as {
267
+ voices: Array<{ voice_id: string; name: string; category: string; labels?: Record<string, string> }>;
268
+ };
269
+
270
+ return data.voices.map((v) => ({
271
+ voice_id: v.voice_id,
272
+ name: v.name,
273
+ category: v.category,
274
+ language: v.labels?.language ?? 'unknown',
275
+ }));
276
+ }
277
+
278
+ // ─── Helper ─────────────────────────────────────────────────────────
279
+
280
+ function getDefaultProvider(): TTSProvider {
281
+ if (process.env.ELEVENLABS_API_KEY) return 'elevenlabs';
282
+ if (process.env.OPENAI_API_KEY) return 'openai';
283
+ throw new Error('No TTS API key found. Set ELEVENLABS_API_KEY or OPENAI_API_KEY.');
284
+ }
@@ -0,0 +1,191 @@
1
+ /**
2
+ * Type definitions for the Cinema Video Engine
3
+ */
4
+
5
+ // ─── Viewport Presets ───────────────────────────────────────────────
6
+ export const VIEWPORTS = {
7
+ desktop: { width: 1920, height: 1080 },
8
+ 'desktop-4k': { width: 3840, height: 2160 },
9
+ tablet: { width: 768, height: 1024 },
10
+ 'tablet-landscape': { width: 1024, height: 768 },
11
+ mobile: { width: 393, height: 852 },
12
+ 'mobile-landscape': { width: 852, height: 393 },
13
+ } as const;
14
+
15
+ export type ViewportPreset = keyof typeof VIEWPORTS;
16
+
17
+ export interface ViewportConfig {
18
+ width: number;
19
+ height: number;
20
+ }
21
+
22
+ // ─── Easing ─────────────────────────────────────────────────────────
23
+ export type EasingName =
24
+ | 'linear'
25
+ | 'easeInQuad'
26
+ | 'easeOutQuad'
27
+ | 'easeInOutQuad'
28
+ | 'easeInCubic'
29
+ | 'easeOutCubic'
30
+ | 'easeInOutCubic'
31
+ | 'easeInQuart'
32
+ | 'easeOutQuart'
33
+ | 'easeInOutQuart'
34
+ | 'easeInQuint'
35
+ | 'easeOutQuint'
36
+ | 'easeInOutQuint'
37
+ | 'easeInOutSine'
38
+ | 'cinematic' // slow start + cruise + slow end
39
+ | 'showcase'; // dramatic slow start, smooth cruise, elegant stop
40
+
41
+ // ─── Scenes ─────────────────────────────────────────────────────────
42
+ export interface SceneBase {
43
+ type: string;
44
+ }
45
+
46
+ export interface ScrollScene extends SceneBase {
47
+ type: 'scroll';
48
+ /** Target: 'bottom', 'top', pixel number, or CSS selector */
49
+ to: 'bottom' | 'top' | number | string;
50
+ /** Duration of the scroll in seconds */
51
+ duration: number;
52
+ /** Easing curve name (default: easeInOutCubic) */
53
+ easing?: EasingName;
54
+ }
55
+
56
+ export interface PauseScene extends SceneBase {
57
+ type: 'pause';
58
+ /** Duration in seconds */
59
+ duration: number;
60
+ }
61
+
62
+ export interface HoverScene extends SceneBase {
63
+ type: 'hover';
64
+ /** CSS selector of element to hover */
65
+ selector: string;
66
+ /** How long to hold the hover (seconds) */
67
+ duration: number;
68
+ /** Move cursor smoothly to element (default: true) */
69
+ animateCursor?: boolean;
70
+ }
71
+
72
+ export interface ClickScene extends SceneBase {
73
+ type: 'click';
74
+ /** CSS selector of element to click */
75
+ selector: string;
76
+ /** Wait strategy after click */
77
+ waitFor?: 'networkidle' | 'load' | number;
78
+ /** Pause after navigation (seconds, default: 1) */
79
+ pauseAfter?: number;
80
+ }
81
+
82
+ export interface TypeScene extends SceneBase {
83
+ type: 'type';
84
+ /** CSS selector of input field */
85
+ selector: string;
86
+ /** Text to type */
87
+ text: string;
88
+ /** Delay between keystrokes in ms (default: 80) */
89
+ delay?: number;
90
+ }
91
+
92
+ export interface WaitScene extends SceneBase {
93
+ type: 'wait';
94
+ /** CSS selector to wait for */
95
+ selector: string;
96
+ /** Max wait time in ms (default: 5000) */
97
+ timeout?: number;
98
+ }
99
+
100
+ export type Scene =
101
+ | ScrollScene
102
+ | PauseScene
103
+ | HoverScene
104
+ | ClickScene
105
+ | TypeScene
106
+ | WaitScene;
107
+
108
+ // ─── Cursor Config ──────────────────────────────────────────────────
109
+ export interface CursorConfig {
110
+ /** Show a visible cursor in the video (default: true) */
111
+ enabled: boolean;
112
+ /** Cursor style */
113
+ style?: 'dot' | 'arrow' | 'pointer' | 'custom';
114
+ /** Cursor color (CSS color string) */
115
+ color?: string;
116
+ /** Cursor size in px (default: 20) */
117
+ size?: number;
118
+ /** Show click animation (default: true) */
119
+ clickAnimation?: boolean;
120
+ }
121
+
122
+ // ─── Encoding Config ────────────────────────────────────────────────
123
+ export type VideoCodec = 'h264' | 'h265' | 'vp9';
124
+ export type VideoFormat = 'mp4' | 'webm';
125
+
126
+ export interface EncodingConfig {
127
+ /** Video codec (default: h264) */
128
+ codec?: VideoCodec;
129
+ /** Output format (default: mp4) */
130
+ format?: VideoFormat;
131
+ /** Constant Rate Factor — quality (0=lossless, 51=worst). Default: 18 */
132
+ crf?: number;
133
+ /** Encoding preset (default: slow for best quality) */
134
+ preset?: 'ultrafast' | 'fast' | 'medium' | 'slow' | 'veryslow';
135
+ /** Frames per second (default: 60) */
136
+ fps?: number;
137
+ }
138
+
139
+ // ─── Main Recording Config ──────────────────────────────────────────
140
+ export interface RecordingConfig {
141
+ /** URL to record */
142
+ url: string;
143
+ /** Output file path (without extension — auto-determined) */
144
+ outputPath: string;
145
+ /** Viewport preset or custom dimensions */
146
+ viewport?: ViewportPreset | ViewportConfig;
147
+ /** Frames per second (default: 60) */
148
+ fps?: number;
149
+ /** Scene definitions — if empty, does a default full-page scroll */
150
+ scenes?: Scene[];
151
+ /** Cursor configuration */
152
+ cursor?: CursorConfig;
153
+ /** Video encoding settings */
154
+ encoding?: EncodingConfig;
155
+ /** Dismiss cookie banners and overlays (default: true) */
156
+ dismissOverlays?: boolean;
157
+ /** Pre-scroll to trigger lazy loading (default: true) */
158
+ preloadContent?: boolean;
159
+ /** Device scale factor for retina (default: 1) */
160
+ deviceScaleFactor?: number;
161
+ /** Dark mode (default: false) */
162
+ darkMode?: boolean;
163
+ /** Custom user agent */
164
+ userAgent?: string;
165
+ /** Disable smooth scroll CSS to prevent double-easing (default: true) */
166
+ disableSmoothScroll?: boolean;
167
+ }
168
+
169
+ // ─── Recording Result ───────────────────────────────────────────────
170
+ export interface RecordingResult {
171
+ success: boolean;
172
+ video: {
173
+ path: string;
174
+ format: string;
175
+ codec: string;
176
+ fps: number;
177
+ duration: number;
178
+ totalFrames: number;
179
+ resolution: { width: number; height: number };
180
+ sizeBytes: number;
181
+ sizeMB: string;
182
+ };
183
+ thumbnail?: {
184
+ path: string;
185
+ width: number;
186
+ height: number;
187
+ };
188
+ scenes: number;
189
+ url: string;
190
+ captureTime: string;
191
+ }