demo-dev 0.0.1-alpha.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +174 -0
- package/bin/demo-cli.js +26 -0
- package/bin/demo-dev.js +26 -0
- package/demo.dev.config.example.json +20 -0
- package/dist/index.d.ts +392 -0
- package/dist/index.js +2116 -0
- package/package.json +76 -0
- package/skills/demo-dev/SKILL.md +153 -0
- package/skills/demo-dev/references/configuration.md +102 -0
- package/skills/demo-dev/references/recipes.md +83 -0
- package/src/ai/provider.ts +254 -0
- package/src/auth/bootstrap.ts +72 -0
- package/src/browser/session.ts +43 -0
- package/src/capture/continuous-capture.ts +739 -0
- package/src/cli.ts +337 -0
- package/src/config/project.ts +183 -0
- package/src/github/comment.ts +134 -0
- package/src/index.ts +10 -0
- package/src/lib/data-uri.ts +21 -0
- package/src/lib/fs.ts +7 -0
- package/src/lib/git.ts +59 -0
- package/src/lib/media.ts +23 -0
- package/src/orchestrate.ts +166 -0
- package/src/planner/heuristic.ts +180 -0
- package/src/planner/index.ts +26 -0
- package/src/planner/llm.ts +85 -0
- package/src/planner/openai.ts +77 -0
- package/src/planner/prompt.ts +331 -0
- package/src/planner/refine.ts +155 -0
- package/src/planner/schema.ts +62 -0
- package/src/presentation/polish.ts +84 -0
- package/src/probe/page-probe.ts +225 -0
- package/src/render/browser-frame.ts +176 -0
- package/src/render/ffmpeg-compose.ts +779 -0
- package/src/render/visual-plan.ts +422 -0
- package/src/setup/doctor.ts +158 -0
- package/src/setup/init.ts +90 -0
- package/src/types.ts +105 -0
- package/src/voice/script.ts +42 -0
- package/src/voice/tts.ts +286 -0
- package/tsconfig.json +16 -0
package/src/types.ts
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
export type ActionTarget =
|
|
2
|
+
| { strategy: "label"; value: string; exact?: boolean }
|
|
3
|
+
| { strategy: "text"; value: string; exact?: boolean }
|
|
4
|
+
| { strategy: "placeholder"; value: string; exact?: boolean }
|
|
5
|
+
| { strategy: "testId"; value: string }
|
|
6
|
+
| { strategy: "css"; value: string }
|
|
7
|
+
| { strategy: "role"; role: string; name?: string; exact?: boolean };
|
|
8
|
+
|
|
9
|
+
export type SceneAction =
|
|
10
|
+
| { type: "navigate"; url: string }
|
|
11
|
+
| { type: "wait"; ms: number }
|
|
12
|
+
| { type: "scroll"; y: number }
|
|
13
|
+
| { type: "scrollIntoView"; target: ActionTarget }
|
|
14
|
+
| { type: "click"; target: ActionTarget }
|
|
15
|
+
| { type: "hover"; target: ActionTarget }
|
|
16
|
+
| { type: "fill"; target: ActionTarget; value: string }
|
|
17
|
+
| { type: "press"; key: string }
|
|
18
|
+
| { type: "select"; target: ActionTarget; value: string }
|
|
19
|
+
| {
|
|
20
|
+
type: "dragSelect";
|
|
21
|
+
target: ActionTarget;
|
|
22
|
+
startX?: number;
|
|
23
|
+
startY?: number;
|
|
24
|
+
endX?: number;
|
|
25
|
+
endY?: number;
|
|
26
|
+
}
|
|
27
|
+
| { type: "waitForText"; value: string; exact?: boolean; timeoutMs?: number }
|
|
28
|
+
| { type: "waitForUrl"; value: string; timeoutMs?: number };
|
|
29
|
+
|
|
30
|
+
export interface DiffContext {
|
|
31
|
+
currentBranch: string;
|
|
32
|
+
baseRef: string;
|
|
33
|
+
changedFiles: string[];
|
|
34
|
+
diffPreview: string;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export interface DemoScene {
|
|
38
|
+
id: string;
|
|
39
|
+
title: string;
|
|
40
|
+
goal: string;
|
|
41
|
+
url: string;
|
|
42
|
+
viewport: {
|
|
43
|
+
width: number;
|
|
44
|
+
height: number;
|
|
45
|
+
};
|
|
46
|
+
actions: SceneAction[];
|
|
47
|
+
narration: string;
|
|
48
|
+
caption: string;
|
|
49
|
+
durationMs: number;
|
|
50
|
+
evidenceHints: string[];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface DemoPlan {
|
|
54
|
+
title: string;
|
|
55
|
+
summary: string;
|
|
56
|
+
branch: string;
|
|
57
|
+
generatedAt: string;
|
|
58
|
+
scenes: DemoScene[];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export interface ProbeElement {
|
|
62
|
+
tag: string;
|
|
63
|
+
role: string;
|
|
64
|
+
name: string;
|
|
65
|
+
text?: string;
|
|
66
|
+
label?: string;
|
|
67
|
+
placeholder?: string;
|
|
68
|
+
type?: string;
|
|
69
|
+
href?: string;
|
|
70
|
+
testId?: string;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export interface ProbeSnapshot {
|
|
74
|
+
resolvedUrl?: string;
|
|
75
|
+
pageTitle?: string;
|
|
76
|
+
headings: string[];
|
|
77
|
+
textPreview: string;
|
|
78
|
+
interactiveElements: ProbeElement[];
|
|
79
|
+
error?: string;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
export interface PageProbe {
|
|
83
|
+
sceneId: string;
|
|
84
|
+
sceneTitle: string;
|
|
85
|
+
requestedUrl: string;
|
|
86
|
+
initial: ProbeSnapshot;
|
|
87
|
+
followUpAction?: SceneAction;
|
|
88
|
+
followUp?: ProbeSnapshot;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export interface VoiceToken {
|
|
92
|
+
text: string;
|
|
93
|
+
startMs: number;
|
|
94
|
+
endMs: number;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
export interface VoiceLine {
|
|
98
|
+
sceneId: string;
|
|
99
|
+
text: string;
|
|
100
|
+
estimatedMs: number;
|
|
101
|
+
audioDurationMs?: number;
|
|
102
|
+
tokens: VoiceToken[];
|
|
103
|
+
audioPath?: string;
|
|
104
|
+
audioSrc?: string;
|
|
105
|
+
}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import type { DemoPlan, VoiceLine, VoiceToken } from "../types.js";
|
|
2
|
+
|
|
3
|
+
const estimateMs = (text: string) => {
|
|
4
|
+
const words = text.trim().split(/\s+/).filter(Boolean).length;
|
|
5
|
+
const chineseChars = text.replace(/\s+/g, "").length;
|
|
6
|
+
return Math.max(2500, Math.round((words > 1 ? words : chineseChars) * 280));
|
|
7
|
+
};
|
|
8
|
+
|
|
9
|
+
const tokenizeNarration = (text: string, estimatedMs: number): VoiceToken[] => {
|
|
10
|
+
const tokens = text.match(/\S+\s*/g) ?? [text];
|
|
11
|
+
const weighted = tokens.map((token) => ({
|
|
12
|
+
text: token,
|
|
13
|
+
weight: Math.max(token.replace(/\s+/g, "").length, 1),
|
|
14
|
+
}));
|
|
15
|
+
const totalWeight = weighted.reduce((sum, token) => sum + token.weight, 0) || 1;
|
|
16
|
+
|
|
17
|
+
let cursor = 0;
|
|
18
|
+
return weighted.map((token, index) => {
|
|
19
|
+
const remaining = estimatedMs - cursor;
|
|
20
|
+
const sliceMs = index === weighted.length - 1 ? remaining : Math.max(120, Math.round((token.weight / totalWeight) * estimatedMs));
|
|
21
|
+
const startMs = cursor;
|
|
22
|
+
const endMs = Math.min(estimatedMs, startMs + sliceMs);
|
|
23
|
+
cursor = endMs;
|
|
24
|
+
return {
|
|
25
|
+
text: token.text,
|
|
26
|
+
startMs,
|
|
27
|
+
endMs,
|
|
28
|
+
};
|
|
29
|
+
});
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
export const buildVoiceScript = (plan: DemoPlan): VoiceLine[] => {
|
|
33
|
+
return plan.scenes.map((scene) => {
|
|
34
|
+
const estimatedMs = estimateMs(scene.narration);
|
|
35
|
+
return {
|
|
36
|
+
sceneId: scene.id,
|
|
37
|
+
text: scene.narration,
|
|
38
|
+
estimatedMs,
|
|
39
|
+
tokens: tokenizeNarration(scene.narration, estimatedMs),
|
|
40
|
+
};
|
|
41
|
+
});
|
|
42
|
+
};
|
package/src/voice/tts.ts
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
2
|
+
import { access, mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { promisify } from "node:util";
|
|
6
|
+
import type { VoiceLine } from "../types.js";
|
|
7
|
+
import { fileToDataUri } from "../lib/data-uri.js";
|
|
8
|
+
import { getMediaDurationMs } from "../lib/media.js";
|
|
9
|
+
|
|
10
|
+
const execFileAsync = promisify(execFile);
|
|
11
|
+
|
|
12
|
+
type TtsProvider = "auto" | "elevenlabs" | "openai" | "local";
|
|
13
|
+
|
|
14
|
+
interface TtsConfig {
|
|
15
|
+
provider: TtsProvider;
|
|
16
|
+
openAiApiKey?: string;
|
|
17
|
+
openAiBaseUrl: string;
|
|
18
|
+
openAiModel: string;
|
|
19
|
+
openAiVoice: string;
|
|
20
|
+
elevenlabsApiKey?: string;
|
|
21
|
+
elevenlabsBaseUrl: string;
|
|
22
|
+
elevenlabsVoiceId?: string;
|
|
23
|
+
elevenlabsModel: string;
|
|
24
|
+
elevenlabsOutputFormat: string;
|
|
25
|
+
elevenlabsStability?: number;
|
|
26
|
+
elevenlabsSimilarityBoost?: number;
|
|
27
|
+
elevenlabsStyle?: number;
|
|
28
|
+
elevenlabsSpeakerBoost?: boolean;
|
|
29
|
+
localVoice: string;
|
|
30
|
+
localRate: string;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const parseOptionalNumber = (value: string | undefined) => {
|
|
34
|
+
if (!value?.trim()) return undefined;
|
|
35
|
+
const parsed = Number(value);
|
|
36
|
+
return Number.isFinite(parsed) ? parsed : undefined;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
const parseOptionalBoolean = (value: string | undefined) => {
|
|
40
|
+
if (!value?.trim()) return undefined;
|
|
41
|
+
if (["1", "true", "yes", "on"].includes(value.toLowerCase())) return true;
|
|
42
|
+
if (["0", "false", "no", "off"].includes(value.toLowerCase())) return false;
|
|
43
|
+
return undefined;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
const normalizeBaseUrl = (baseUrl: string) => baseUrl.replace(/\/+$/, "");
|
|
47
|
+
|
|
48
|
+
const getTtsConfig = (): TtsConfig => {
|
|
49
|
+
return {
|
|
50
|
+
provider: (process.env.DEMO_TTS_PROVIDER as TtsProvider | undefined) ?? "auto",
|
|
51
|
+
openAiApiKey: process.env.DEMO_OPENAI_API_KEY,
|
|
52
|
+
openAiBaseUrl: process.env.DEMO_OPENAI_BASE_URL ?? "https://api.openai.com/v1",
|
|
53
|
+
openAiModel: process.env.DEMO_TTS_MODEL ?? "gpt-4o-mini-tts",
|
|
54
|
+
openAiVoice: process.env.DEMO_TTS_VOICE ?? "alloy",
|
|
55
|
+
elevenlabsApiKey: process.env.DEMO_ELEVENLABS_API_KEY,
|
|
56
|
+
elevenlabsBaseUrl: process.env.DEMO_ELEVENLABS_BASE_URL ?? "https://api.elevenlabs.io/v1",
|
|
57
|
+
elevenlabsVoiceId: process.env.DEMO_ELEVENLABS_VOICE_ID,
|
|
58
|
+
elevenlabsModel: process.env.DEMO_ELEVENLABS_MODEL ?? "eleven_multilingual_v2",
|
|
59
|
+
elevenlabsOutputFormat: process.env.DEMO_ELEVENLABS_OUTPUT_FORMAT ?? "mp3_44100_128",
|
|
60
|
+
elevenlabsStability: parseOptionalNumber(process.env.DEMO_ELEVENLABS_STABILITY),
|
|
61
|
+
elevenlabsSimilarityBoost: parseOptionalNumber(process.env.DEMO_ELEVENLABS_SIMILARITY_BOOST),
|
|
62
|
+
elevenlabsStyle: parseOptionalNumber(process.env.DEMO_ELEVENLABS_STYLE),
|
|
63
|
+
elevenlabsSpeakerBoost: parseOptionalBoolean(process.env.DEMO_ELEVENLABS_SPEAKER_BOOST),
|
|
64
|
+
localVoice: process.env.DEMO_LOCAL_TTS_VOICE ?? "Samantha",
|
|
65
|
+
localRate: process.env.DEMO_LOCAL_TTS_RATE ?? "185",
|
|
66
|
+
};
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
const commandExists = async (command: string) => {
|
|
70
|
+
try {
|
|
71
|
+
await access(command);
|
|
72
|
+
return true;
|
|
73
|
+
} catch {
|
|
74
|
+
try {
|
|
75
|
+
await execFileAsync("which", [command]);
|
|
76
|
+
return true;
|
|
77
|
+
} catch {
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
const retimeLineToAudioDuration = (line: VoiceLine, audioDurationMs?: number): VoiceLine => {
|
|
84
|
+
if (!audioDurationMs || !line.tokens.length || line.estimatedMs <= 0) {
|
|
85
|
+
return {
|
|
86
|
+
...line,
|
|
87
|
+
audioDurationMs,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
const scale = audioDurationMs / line.estimatedMs;
|
|
92
|
+
return {
|
|
93
|
+
...line,
|
|
94
|
+
audioDurationMs,
|
|
95
|
+
tokens: line.tokens.map((token, index) => ({
|
|
96
|
+
...token,
|
|
97
|
+
startMs: Math.max(0, Math.round(token.startMs * scale)),
|
|
98
|
+
endMs:
|
|
99
|
+
index === line.tokens.length - 1 ? audioDurationMs : Math.max(1, Math.round(token.endMs * scale)),
|
|
100
|
+
})),
|
|
101
|
+
};
|
|
102
|
+
};
|
|
103
|
+
|
|
104
|
+
const writeAudioLine = async (line: VoiceLine, audioPath: string, arrayBuffer: ArrayBuffer): Promise<VoiceLine> => {
|
|
105
|
+
await writeFile(audioPath, Buffer.from(arrayBuffer));
|
|
106
|
+
const audioDurationMs = await getMediaDurationMs(audioPath);
|
|
107
|
+
const timedLine = retimeLineToAudioDuration(line, audioDurationMs);
|
|
108
|
+
|
|
109
|
+
return {
|
|
110
|
+
...timedLine,
|
|
111
|
+
audioPath,
|
|
112
|
+
audioSrc: await fileToDataUri(audioPath),
|
|
113
|
+
};
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
const synthesizeOpenAiLine = async (line: VoiceLine, outputDir: string, config: TtsConfig): Promise<VoiceLine> => {
|
|
117
|
+
if (!config.openAiApiKey) throw new Error("OpenAI TTS not configured. Set DEMO_OPENAI_API_KEY.");
|
|
118
|
+
|
|
119
|
+
const response = await fetch(`${normalizeBaseUrl(config.openAiBaseUrl)}/audio/speech`, {
|
|
120
|
+
method: "POST",
|
|
121
|
+
headers: {
|
|
122
|
+
authorization: `Bearer ${config.openAiApiKey}`,
|
|
123
|
+
"content-type": "application/json",
|
|
124
|
+
},
|
|
125
|
+
body: JSON.stringify({
|
|
126
|
+
model: config.openAiModel,
|
|
127
|
+
voice: config.openAiVoice,
|
|
128
|
+
input: line.text,
|
|
129
|
+
format: "mp3",
|
|
130
|
+
}),
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
if (!response.ok) {
|
|
134
|
+
const errorText = await response.text();
|
|
135
|
+
throw new Error(`OpenAI TTS request failed: ${response.status} ${errorText}`);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return writeAudioLine(line, join(outputDir, `${line.sceneId}.mp3`), await response.arrayBuffer());
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
const synthesizeElevenLabsLine = async (line: VoiceLine, outputDir: string, config: TtsConfig): Promise<VoiceLine> => {
|
|
142
|
+
if (!config.elevenlabsApiKey) throw new Error("ElevenLabs TTS not configured. Set DEMO_ELEVENLABS_API_KEY.");
|
|
143
|
+
if (!config.elevenlabsVoiceId) throw new Error("ElevenLabs voice id not configured");
|
|
144
|
+
|
|
145
|
+
const voiceSettings = {
|
|
146
|
+
...(config.elevenlabsStability === undefined ? {} : { stability: config.elevenlabsStability }),
|
|
147
|
+
...(config.elevenlabsSimilarityBoost === undefined
|
|
148
|
+
? {}
|
|
149
|
+
: { similarity_boost: config.elevenlabsSimilarityBoost }),
|
|
150
|
+
...(config.elevenlabsStyle === undefined ? {} : { style: config.elevenlabsStyle }),
|
|
151
|
+
...(config.elevenlabsSpeakerBoost === undefined
|
|
152
|
+
? {}
|
|
153
|
+
: { use_speaker_boost: config.elevenlabsSpeakerBoost }),
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
const response = await fetch(
|
|
157
|
+
`${normalizeBaseUrl(config.elevenlabsBaseUrl)}/text-to-speech/${encodeURIComponent(config.elevenlabsVoiceId)}`,
|
|
158
|
+
{
|
|
159
|
+
method: "POST",
|
|
160
|
+
headers: {
|
|
161
|
+
accept: "audio/mpeg",
|
|
162
|
+
"content-type": "application/json",
|
|
163
|
+
"xi-api-key": config.elevenlabsApiKey,
|
|
164
|
+
},
|
|
165
|
+
body: JSON.stringify({
|
|
166
|
+
text: line.text,
|
|
167
|
+
model_id: config.elevenlabsModel,
|
|
168
|
+
output_format: config.elevenlabsOutputFormat,
|
|
169
|
+
...(Object.keys(voiceSettings).length > 0 ? { voice_settings: voiceSettings } : {}),
|
|
170
|
+
}),
|
|
171
|
+
},
|
|
172
|
+
);
|
|
173
|
+
|
|
174
|
+
if (!response.ok) {
|
|
175
|
+
const errorText = await response.text();
|
|
176
|
+
throw new Error(`ElevenLabs TTS request failed: ${response.status} ${errorText}`);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return writeAudioLine(line, join(outputDir, `${line.sceneId}.mp3`), await response.arrayBuffer());
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
const synthesizeLocalLine = async (line: VoiceLine, outputDir: string, config: TtsConfig): Promise<VoiceLine> => {
|
|
183
|
+
const hasSay = await commandExists("say");
|
|
184
|
+
const hasFfmpeg = await commandExists("ffmpeg");
|
|
185
|
+
if (!hasSay || !hasFfmpeg) {
|
|
186
|
+
throw new Error("Local TTS tools not available");
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
const tempDir = await mkdtemp(join(tmpdir(), "demo-dev-tts-"));
|
|
190
|
+
const aiffPath = join(tempDir, `${line.sceneId}.aiff`);
|
|
191
|
+
const audioPath = join(outputDir, `${line.sceneId}.mp3`);
|
|
192
|
+
|
|
193
|
+
try {
|
|
194
|
+
await execFileAsync("say", ["-v", config.localVoice, "-r", config.localRate, "-o", aiffPath, line.text], {
|
|
195
|
+
maxBuffer: 1024 * 1024 * 10,
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
await execFileAsync(
|
|
199
|
+
"ffmpeg",
|
|
200
|
+
["-y", "-i", aiffPath, "-codec:a", "libmp3lame", "-q:a", "3", audioPath],
|
|
201
|
+
{ maxBuffer: 1024 * 1024 * 10 },
|
|
202
|
+
);
|
|
203
|
+
|
|
204
|
+
const audioDurationMs = await getMediaDurationMs(audioPath);
|
|
205
|
+
const timedLine = retimeLineToAudioDuration(line, audioDurationMs);
|
|
206
|
+
|
|
207
|
+
return {
|
|
208
|
+
...timedLine,
|
|
209
|
+
audioPath,
|
|
210
|
+
audioSrc: await fileToDataUri(audioPath),
|
|
211
|
+
};
|
|
212
|
+
} finally {
|
|
213
|
+
await rm(tempDir, { recursive: true, force: true }).catch(() => undefined);
|
|
214
|
+
}
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
const getProviderOrder = (config: TtsConfig): Array<Exclude<TtsProvider, "auto">> => {
|
|
218
|
+
switch (config.provider) {
|
|
219
|
+
case "elevenlabs":
|
|
220
|
+
return ["elevenlabs"];
|
|
221
|
+
case "openai":
|
|
222
|
+
return ["openai"];
|
|
223
|
+
case "local":
|
|
224
|
+
return ["local"];
|
|
225
|
+
case "auto":
|
|
226
|
+
default: {
|
|
227
|
+
const providers: Array<Exclude<TtsProvider, "auto">> = [];
|
|
228
|
+
if (config.elevenlabsApiKey && config.elevenlabsVoiceId) providers.push("elevenlabs");
|
|
229
|
+
if (config.openAiApiKey) providers.push("openai");
|
|
230
|
+
providers.push("local");
|
|
231
|
+
return providers;
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
};
|
|
235
|
+
|
|
236
|
+
const synthesizeWithProvider = async (
|
|
237
|
+
provider: Exclude<TtsProvider, "auto">,
|
|
238
|
+
line: VoiceLine,
|
|
239
|
+
outputDir: string,
|
|
240
|
+
config: TtsConfig,
|
|
241
|
+
) => {
|
|
242
|
+
switch (provider) {
|
|
243
|
+
case "elevenlabs":
|
|
244
|
+
return synthesizeElevenLabsLine(line, outputDir, config);
|
|
245
|
+
case "openai":
|
|
246
|
+
return synthesizeOpenAiLine(line, outputDir, config);
|
|
247
|
+
case "local":
|
|
248
|
+
return synthesizeLocalLine(line, outputDir, config);
|
|
249
|
+
}
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
export const synthesizeVoice = async (
|
|
253
|
+
lines: VoiceLine[],
|
|
254
|
+
options: { outputDir?: string } = {},
|
|
255
|
+
): Promise<VoiceLine[]> => {
|
|
256
|
+
const outputDir = options.outputDir ?? "artifacts/audio";
|
|
257
|
+
const config = getTtsConfig();
|
|
258
|
+
const providers = getProviderOrder(config);
|
|
259
|
+
await mkdir(outputDir, { recursive: true });
|
|
260
|
+
|
|
261
|
+
const results: VoiceLine[] = [];
|
|
262
|
+
for (const line of lines) {
|
|
263
|
+
let synthesized: VoiceLine | undefined;
|
|
264
|
+
const errors: string[] = [];
|
|
265
|
+
|
|
266
|
+
for (const provider of providers) {
|
|
267
|
+
try {
|
|
268
|
+
synthesized = await synthesizeWithProvider(provider, line, outputDir, config);
|
|
269
|
+
break;
|
|
270
|
+
} catch (error) {
|
|
271
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
272
|
+
errors.push(`${provider}: ${message}`);
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
if (synthesized) {
|
|
277
|
+
results.push(synthesized);
|
|
278
|
+
continue;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
console.warn(`TTS failed for ${line.sceneId}, fallback to text-only`, errors.join(" | "));
|
|
282
|
+
results.push(line);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
return results;
|
|
286
|
+
};
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"compilerOptions": {
|
|
3
|
+
"target": "ES2022",
|
|
4
|
+
"module": "NodeNext",
|
|
5
|
+
"moduleResolution": "NodeNext",
|
|
6
|
+
"strict": true,
|
|
7
|
+
"esModuleInterop": true,
|
|
8
|
+
"forceConsistentCasingInFileNames": true,
|
|
9
|
+
"skipLibCheck": true,
|
|
10
|
+
"resolveJsonModule": true,
|
|
11
|
+
"allowImportingTsExtensions": true,
|
|
12
|
+
"jsx": "react-jsx",
|
|
13
|
+
"types": ["node"]
|
|
14
|
+
},
|
|
15
|
+
"include": ["src/**/*.ts", "src/**/*.tsx"]
|
|
16
|
+
}
|