mia-narrative 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +149 -0
- package/bin/mia-narrative.ts +86 -0
- package/dist/bin/mia-narrative.d.ts +3 -0
- package/dist/bin/mia-narrative.d.ts.map +1 -0
- package/dist/bin/mia-narrative.js +34 -0
- package/dist/bin/mia-narrative.js.map +1 -0
- package/dist/src/audio/processor.d.ts +6 -0
- package/dist/src/audio/processor.d.ts.map +1 -0
- package/dist/src/audio/processor.js +72 -0
- package/dist/src/audio/processor.js.map +1 -0
- package/dist/src/commands/generate.d.ts +19 -0
- package/dist/src/commands/generate.d.ts.map +1 -0
- package/dist/src/commands/generate.js +104 -0
- package/dist/src/commands/generate.js.map +1 -0
- package/dist/src/commands/voices.d.ts +8 -0
- package/dist/src/commands/voices.d.ts.map +1 -0
- package/dist/src/commands/voices.js +60 -0
- package/dist/src/commands/voices.js.map +1 -0
- package/dist/src/config/defaults.d.ts +54 -0
- package/dist/src/config/defaults.d.ts.map +1 -0
- package/dist/src/config/defaults.js +30 -0
- package/dist/src/config/defaults.js.map +1 -0
- package/dist/src/config/voices.d.ts +15 -0
- package/dist/src/config/voices.d.ts.map +1 -0
- package/dist/src/config/voices.js +79 -0
- package/dist/src/config/voices.js.map +1 -0
- package/dist/src/engines/ElevenLabsEngine.d.ts +10 -0
- package/dist/src/engines/ElevenLabsEngine.d.ts.map +1 -0
- package/dist/src/engines/ElevenLabsEngine.js +78 -0
- package/dist/src/engines/ElevenLabsEngine.js.map +1 -0
- package/dist/src/engines/SystemTtsEngine.d.ts +9 -0
- package/dist/src/engines/SystemTtsEngine.d.ts.map +1 -0
- package/dist/src/engines/SystemTtsEngine.js +56 -0
- package/dist/src/engines/SystemTtsEngine.js.map +1 -0
- package/dist/src/engines/base.d.ts +23 -0
- package/dist/src/engines/base.d.ts.map +1 -0
- package/dist/src/engines/base.js +3 -0
- package/dist/src/engines/base.js.map +1 -0
- package/dist/src/engines/factory.d.ts +6 -0
- package/dist/src/engines/factory.d.ts.map +1 -0
- package/dist/src/engines/factory.js +20 -0
- package/dist/src/engines/factory.js.map +1 -0
- package/dist/src/engines/piper.d.ts +12 -0
- package/dist/src/engines/piper.d.ts.map +1 -0
- package/dist/src/engines/piper.js +118 -0
- package/dist/src/engines/piper.js.map +1 -0
- package/dist/src/utils/file-reader.d.ts +5 -0
- package/dist/src/utils/file-reader.d.ts.map +1 -0
- package/dist/src/utils/file-reader.js +26 -0
- package/dist/src/utils/file-reader.js.map +1 -0
- package/dist/src/utils/logger.d.ts +10 -0
- package/dist/src/utils/logger.d.ts.map +1 -0
- package/dist/src/utils/logger.js +27 -0
- package/dist/src/utils/logger.js.map +1 -0
- package/package.json +35 -0
- package/src/audio/processor.ts +94 -0
- package/src/commands/generate.ts +144 -0
- package/src/commands/voices.ts +68 -0
- package/src/config/defaults.ts +41 -0
- package/src/config/voices.ts +89 -0
- package/src/engines/ElevenLabsEngine.ts +81 -0
- package/src/engines/SystemTtsEngine.ts +61 -0
- package/src/engines/base.ts +26 -0
- package/src/engines/factory.ts +28 -0
- package/src/engines/piper.ts +134 -0
- package/src/types/say.d.ts +26 -0
- package/src/utils/file-reader.ts +25 -0
- package/src/utils/logger.ts +33 -0
- package/tsconfig.json +21 -0
package/package.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "mia-narrative",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Text-to-speech CLI for narrative audio generation",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"mia-narrative": "./dist/bin/mia-narrative.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"build": "tsc",
|
|
11
|
+
"dev": "ts-node bin/mia-narrative.ts",
|
|
12
|
+
"test": "node --test dist/**/*.test.js"
|
|
13
|
+
},
|
|
14
|
+
"keywords": [
|
|
15
|
+
"tts",
|
|
16
|
+
"text-to-speech",
|
|
17
|
+
"piper",
|
|
18
|
+
"audio",
|
|
19
|
+
"cli"
|
|
20
|
+
],
|
|
21
|
+
"author": "",
|
|
22
|
+
"license": "MIT",
|
|
23
|
+
"dependencies": {
|
|
24
|
+
"@elevenlabs/elevenlabs-js": "^2.27.0",
|
|
25
|
+
"chalk": "^5.6.2",
|
|
26
|
+
"commander": "^14.0.2",
|
|
27
|
+
"ora": "^9.0.0",
|
|
28
|
+
"say": "^0.16.0"
|
|
29
|
+
},
|
|
30
|
+
"devDependencies": {
|
|
31
|
+
"@types/node": "^24.10.2",
|
|
32
|
+
"ts-node": "^10.9.2",
|
|
33
|
+
"typescript": "^5.9.3"
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { spawn } from 'child_process';
|
|
2
|
+
import { AudioParameters } from '../config/defaults.js';
|
|
3
|
+
import { Logger } from '../utils/logger.js';
|
|
4
|
+
|
|
5
|
+
export class AudioProcessor {
|
|
6
|
+
static buildFFmpegFilter(params: AudioParameters): string {
|
|
7
|
+
const filters: string[] = [];
|
|
8
|
+
|
|
9
|
+
if (params.speed !== 1.0) {
|
|
10
|
+
filters.push(`atempo=${Math.max(0.5, Math.min(2.0, params.speed))}`);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
if (params.volume !== 0.8) {
|
|
14
|
+
filters.push(`volume=${Math.max(0.0, Math.min(1.0, params.volume))}`);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if (params.pitch !== 1.0) {
|
|
18
|
+
const sampleRateMultiplier = Math.max(0.5, Math.min(2.0, params.pitch));
|
|
19
|
+
filters.push(`asetrate=44100*${sampleRateMultiplier}`);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
if (params.reverb > 0.0) {
|
|
23
|
+
const reverb = Math.min(1.0, params.reverb);
|
|
24
|
+
filters.push(
|
|
25
|
+
`aecho=0.8:${0.88 * reverb}:${6 + reverb * 100}:${0.3 + reverb * 0.4}`
|
|
26
|
+
);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (params.echo !== 0.1) {
|
|
30
|
+
const echoDelay = Math.max(0.1, Math.min(2.0, params.echo)) * 100;
|
|
31
|
+
filters.push(`aecho=0.8:0.88:${echoDelay}:0.4`);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
if (params.compression > 0.0) {
|
|
35
|
+
const ratio = 2 + params.compression * 8; // 2:1 to 10:1
|
|
36
|
+
const threshold = Math.max(0.001, Math.min(1, 0.3 + params.compression * 0.5));
|
|
37
|
+
filters.push(
|
|
38
|
+
`acompressor=ratio=${ratio}:threshold=${threshold}:makeup=${params.compression * 5}`
|
|
39
|
+
);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
return filters.length > 0 ? filters.join(',') : '';
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
static async processAudio(
|
|
46
|
+
inputPath: string,
|
|
47
|
+
outputPath: string,
|
|
48
|
+
params: AudioParameters
|
|
49
|
+
): Promise<void> {
|
|
50
|
+
return new Promise((resolve, reject) => {
|
|
51
|
+
const filterComplex = this.buildFFmpegFilter(params);
|
|
52
|
+
const args: string[] = [
|
|
53
|
+
'-i',
|
|
54
|
+
inputPath,
|
|
55
|
+
'-codec:a',
|
|
56
|
+
'libmp3lame',
|
|
57
|
+
'-b:a',
|
|
58
|
+
'192k',
|
|
59
|
+
'-ar',
|
|
60
|
+
'44100',
|
|
61
|
+
'-ac',
|
|
62
|
+
'2',
|
|
63
|
+
];
|
|
64
|
+
|
|
65
|
+
if (filterComplex) {
|
|
66
|
+
Logger.debug(`Applying FFMpeg filters: ${filterComplex}`);
|
|
67
|
+
args.push('-af', filterComplex);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
args.push(outputPath);
|
|
71
|
+
|
|
72
|
+
const ffmpeg = spawn('ffmpeg', args);
|
|
73
|
+
|
|
74
|
+
ffmpeg.on('close', (code) => {
|
|
75
|
+
if (code === 0) {
|
|
76
|
+
Logger.debug('FFmpeg processing completed');
|
|
77
|
+
resolve();
|
|
78
|
+
} else {
|
|
79
|
+
Logger.error(`FFmpeg exited with code ${code}`);
|
|
80
|
+
reject(new Error(`FFmpeg failed with exit code ${code}`));
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
ffmpeg.stderr.on('data', (data) => {
|
|
85
|
+
Logger.debug(`ffmpeg: ${data}`);
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
ffmpeg.on('error', (err) => {
|
|
89
|
+
Logger.error(`Failed to start FFmpeg: ${err.message}`);
|
|
90
|
+
reject(err);
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { writeFileSync, unlinkSync } from 'fs';
|
|
2
|
+
import { join } from 'path';
|
|
3
|
+
import { tmpdir } from 'os';
|
|
4
|
+
import ora from 'ora';
|
|
5
|
+
import {
|
|
6
|
+
DEFAULT_AUDIO_PARAMS,
|
|
7
|
+
AUDIO_PARAM_RANGES,
|
|
8
|
+
AudioParameters,
|
|
9
|
+
ExitCode,
|
|
10
|
+
} from '../config/defaults.js';
|
|
11
|
+
import { FileReader } from '../utils/file-reader.js';
|
|
12
|
+
import { Logger } from '../utils/logger.js';
|
|
13
|
+
import { EngineFactory, EngineType } from '../engines/factory.js';
|
|
14
|
+
import { AudioProcessor } from '../audio/processor.js';
|
|
15
|
+
|
|
16
|
+
interface GenerateOptions {
|
|
17
|
+
text?: string;
|
|
18
|
+
file?: string;
|
|
19
|
+
engine?: EngineType;
|
|
20
|
+
voiceId?: string;
|
|
21
|
+
output?: string;
|
|
22
|
+
speed?: number;
|
|
23
|
+
pitch?: number;
|
|
24
|
+
volume?: number;
|
|
25
|
+
emphasis?: number;
|
|
26
|
+
reverb?: number;
|
|
27
|
+
echo?: number;
|
|
28
|
+
compression?: number;
|
|
29
|
+
pause?: number;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function validateParams(params: Partial<AudioParameters>): AudioParameters {
|
|
33
|
+
// Filter out undefined values to preserve defaults
|
|
34
|
+
const definedParams = Object.fromEntries(
|
|
35
|
+
Object.entries(params).filter(([, v]) => v !== undefined)
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
const validated = { ...DEFAULT_AUDIO_PARAMS, ...definedParams };
|
|
39
|
+
|
|
40
|
+
Object.entries(AUDIO_PARAM_RANGES).forEach(([key, range]) => {
|
|
41
|
+
const param = key as keyof AudioParameters;
|
|
42
|
+
const value = validated[param];
|
|
43
|
+
if (typeof value === 'number') {
|
|
44
|
+
validated[param] = Math.max(range.min, Math.min(range.max, value));
|
|
45
|
+
}
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
return validated;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export async function generateCommand(options: GenerateOptions): Promise<void> {
|
|
52
|
+
const spinner = ora();
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
if (!options.text && !options.file) {
|
|
56
|
+
Logger.error('Please provide either --text or --file');
|
|
57
|
+
process.exit(ExitCode.INVALID_PARAMS);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (options.text && options.file) {
|
|
61
|
+
Logger.error('Please provide either --text or --file, not both');
|
|
62
|
+
process.exit(ExitCode.INVALID_PARAMS);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
let text: string;
|
|
66
|
+
if (options.file) {
|
|
67
|
+
spinner.start('Reading file...');
|
|
68
|
+
text = FileReader.readTextFile(options.file);
|
|
69
|
+
spinner.succeed('File read successfully');
|
|
70
|
+
} else {
|
|
71
|
+
text = options.text!;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const engineType = options.engine || 'system';
|
|
75
|
+
spinner.start(`Initializing ${engineType} TTS engine...`);
|
|
76
|
+
const engine = await EngineFactory.create(engineType, {
|
|
77
|
+
piperPath: process.env.MIA_NARRATIVE_PIPER_PATH,
|
|
78
|
+
modelPath: process.env.MIA_NARRATIVE_PIPER_MODEL,
|
|
79
|
+
});
|
|
80
|
+
spinner.succeed('TTS engine ready');
|
|
81
|
+
|
|
82
|
+
if (!(await engine.isAvailable())) {
|
|
83
|
+
Logger.error(
|
|
84
|
+
`${engine.getName()} engine not available. Please check your configuration.`
|
|
85
|
+
);
|
|
86
|
+
process.exit(ExitCode.TTS_ENGINE_ERROR);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const voices = await engine.getVoices();
|
|
90
|
+
const voiceId = options.voiceId || voices[0]?.id;
|
|
91
|
+
|
|
92
|
+
if (!voiceId) {
|
|
93
|
+
Logger.error(`No voices available for the ${engineType} engine.`);
|
|
94
|
+
process.exit(ExitCode.INVALID_PARAMS);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
Logger.info(`Using voice: ${voiceId}`);
|
|
98
|
+
|
|
99
|
+
const audioParams = validateParams({
|
|
100
|
+
speed: options.speed,
|
|
101
|
+
pitch: options.pitch,
|
|
102
|
+
volume: options.volume,
|
|
103
|
+
emphasis: options.emphasis,
|
|
104
|
+
reverb: options.reverb,
|
|
105
|
+
echo: options.echo,
|
|
106
|
+
compression: options.compression,
|
|
107
|
+
pauseLength: options.pause,
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
spinner.start('Generating audio...');
|
|
111
|
+
const rawAudioBuffer = await engine.generateAudio({
|
|
112
|
+
text,
|
|
113
|
+
voiceId,
|
|
114
|
+
parameters: audioParams,
|
|
115
|
+
});
|
|
116
|
+
spinner.succeed('Audio generated');
|
|
117
|
+
|
|
118
|
+
spinner.start('Applying audio effects...');
|
|
119
|
+
const tempAudioPath = join(tmpdir(), `temp-audio-${Date.now()}.wav`);
|
|
120
|
+
writeFileSync(tempAudioPath, rawAudioBuffer);
|
|
121
|
+
|
|
122
|
+
const outputPath = options.output || 'output.mp3';
|
|
123
|
+
|
|
124
|
+
await AudioProcessor.processAudio(tempAudioPath, outputPath, audioParams);
|
|
125
|
+
spinner.succeed('Audio effects applied');
|
|
126
|
+
|
|
127
|
+
try {
|
|
128
|
+
unlinkSync(tempAudioPath);
|
|
129
|
+
Logger.debug(`Cleaned up temporary file: ${tempAudioPath}`);
|
|
130
|
+
} catch (err) {
|
|
131
|
+
// ignore
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
Logger.success(`Audio generated: ${outputPath}`);
|
|
135
|
+
} catch (error) {
|
|
136
|
+
spinner.fail();
|
|
137
|
+
if (error instanceof Error) {
|
|
138
|
+
Logger.error(error.message);
|
|
139
|
+
} else {
|
|
140
|
+
Logger.error('Unknown error occurred');
|
|
141
|
+
}
|
|
142
|
+
process.exit(ExitCode.GENERAL_ERROR);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import { Logger } from '../utils/logger.js';
|
|
2
|
+
import { EngineFactory, EngineType } from '../engines/factory.js';
|
|
3
|
+
import ora from 'ora';
|
|
4
|
+
import { ExitCode } from '../config/defaults.js';
|
|
5
|
+
|
|
6
|
+
interface VoicesListOptions {
|
|
7
|
+
engine?: EngineType;
|
|
8
|
+
format?: 'json' | 'table' | 'list';
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export async function voicesCommand(options: VoicesListOptions): Promise<void> {
|
|
12
|
+
const spinner = ora();
|
|
13
|
+
try {
|
|
14
|
+
const engineType = options.engine || 'system';
|
|
15
|
+
spinner.start(`Initializing ${engineType} TTS engine...`);
|
|
16
|
+
const engine = await EngineFactory.create(engineType, {
|
|
17
|
+
piperPath: process.env.MIA_NARRATIVE_PIPER_PATH,
|
|
18
|
+
modelPath: process.env.MIA_NARRATIVE_PIPER_MODEL,
|
|
19
|
+
});
|
|
20
|
+
spinner.succeed('TTS engine ready');
|
|
21
|
+
|
|
22
|
+
spinner.start('Fetching voices...');
|
|
23
|
+
const voices = await engine.getVoices();
|
|
24
|
+
spinner.succeed('Voices fetched');
|
|
25
|
+
|
|
26
|
+
if (voices.length === 0) {
|
|
27
|
+
Logger.warn(`No voices found for the ${engineType} engine.`);
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const format = options.format || 'table';
|
|
32
|
+
|
|
33
|
+
if (format === 'json') {
|
|
34
|
+
Logger.log(JSON.stringify(voices, null, 2));
|
|
35
|
+
} else if (format === 'list') {
|
|
36
|
+
voices.forEach((voice) => {
|
|
37
|
+
Logger.log(voice.id);
|
|
38
|
+
});
|
|
39
|
+
} else {
|
|
40
|
+
// Table format
|
|
41
|
+
Logger.log('');
|
|
42
|
+
Logger.log(`Available Voices for ${engineType}:`);
|
|
43
|
+
Logger.log('─'.repeat(80));
|
|
44
|
+
|
|
45
|
+
const idHeader = 'ID'.padEnd(30);
|
|
46
|
+
const nameHeader = 'Name';
|
|
47
|
+
Logger.log(` ${idHeader}${nameHeader}`);
|
|
48
|
+
Logger.log('─'.repeat(80));
|
|
49
|
+
|
|
50
|
+
voices.forEach((voice) => {
|
|
51
|
+
const idStr = (voice.id || 'unknown').padEnd(30);
|
|
52
|
+
const nameStr = voice.name || 'Unknown';
|
|
53
|
+
Logger.log(` ${idStr}${nameStr}`);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
Logger.log('─'.repeat(80));
|
|
57
|
+
Logger.log('');
|
|
58
|
+
}
|
|
59
|
+
} catch (error) {
|
|
60
|
+
spinner.fail();
|
|
61
|
+
if (error instanceof Error) {
|
|
62
|
+
Logger.error(error.message);
|
|
63
|
+
} else {
|
|
64
|
+
Logger.error('Unknown error occurred');
|
|
65
|
+
}
|
|
66
|
+
process.exit(ExitCode.GENERAL_ERROR);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
export interface AudioParameters {
|
|
2
|
+
speed: number;
|
|
3
|
+
pitch: number;
|
|
4
|
+
volume: number;
|
|
5
|
+
emphasis: number;
|
|
6
|
+
reverb: number;
|
|
7
|
+
echo: number;
|
|
8
|
+
compression: number;
|
|
9
|
+
pauseLength: number;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export const DEFAULT_AUDIO_PARAMS: AudioParameters = {
|
|
13
|
+
speed: 1.0,
|
|
14
|
+
pitch: 1.0,
|
|
15
|
+
volume: 0.8,
|
|
16
|
+
emphasis: 0.5,
|
|
17
|
+
reverb: 0.2,
|
|
18
|
+
echo: 0.1,
|
|
19
|
+
compression: 0.3,
|
|
20
|
+
pauseLength: 0.5,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
export const AUDIO_PARAM_RANGES = {
|
|
24
|
+
speed: { min: 0.5, max: 2.0 },
|
|
25
|
+
pitch: { min: 0.5, max: 2.0 },
|
|
26
|
+
volume: { min: 0.0, max: 1.0 },
|
|
27
|
+
emphasis: { min: 0.0, max: 1.0 },
|
|
28
|
+
reverb: { min: 0.0, max: 1.0 },
|
|
29
|
+
echo: { min: 0.1, max: 2.0 },
|
|
30
|
+
compression: { min: 0.0, max: 1.0 },
|
|
31
|
+
pauseLength: { min: 0.1, max: 2.0 },
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
export enum ExitCode {
|
|
35
|
+
SUCCESS = 0,
|
|
36
|
+
GENERAL_ERROR = 1,
|
|
37
|
+
INVALID_PARAMS = 2,
|
|
38
|
+
CONFIG_ERROR = 3,
|
|
39
|
+
TTS_ENGINE_ERROR = 4,
|
|
40
|
+
OUTPUT_ERROR = 5,
|
|
41
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
export interface VoiceProfile {
|
|
2
|
+
id: string;
|
|
3
|
+
name: string;
|
|
4
|
+
gender: "male" | "female" | "neutral";
|
|
5
|
+
style: string;
|
|
6
|
+
description: string;
|
|
7
|
+
piperModel: string;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export const VOICE_PROFILES: Record<string, VoiceProfile> = {
|
|
11
|
+
"mia-default": {
|
|
12
|
+
id: "mia-default",
|
|
13
|
+
name: "Mia",
|
|
14
|
+
gender: "female",
|
|
15
|
+
style: "professional",
|
|
16
|
+
description: "Professional narrator for expert technical content",
|
|
17
|
+
piperModel: `${process.env.HOME}/.local/share/piper-tts/en_US-lessac-medium.onnx`,
|
|
18
|
+
},
|
|
19
|
+
"miette-default": {
|
|
20
|
+
id: "miette-default",
|
|
21
|
+
name: "Miette",
|
|
22
|
+
gender: "female",
|
|
23
|
+
style: "conversational",
|
|
24
|
+
description: "Friendly conversational narrator",
|
|
25
|
+
piperModel: "en_US-amy-medium",
|
|
26
|
+
},
|
|
27
|
+
"seraphine-default": {
|
|
28
|
+
id: "seraphine-default",
|
|
29
|
+
name: "Seraphine",
|
|
30
|
+
gender: "female",
|
|
31
|
+
style: "narrative",
|
|
32
|
+
description: "Expressive narrative and storytelling",
|
|
33
|
+
piperModel: "en_US-libritts-high",
|
|
34
|
+
},
|
|
35
|
+
"jeremy-default": {
|
|
36
|
+
id: "jeremy-default",
|
|
37
|
+
name: "Jeremy",
|
|
38
|
+
gender: "male",
|
|
39
|
+
style: "professional",
|
|
40
|
+
description: "Professional male narrator",
|
|
41
|
+
piperModel: "en_US-ryan-medium",
|
|
42
|
+
},
|
|
43
|
+
"resonova-default": {
|
|
44
|
+
id: "resonova-default",
|
|
45
|
+
name: "ResoNova",
|
|
46
|
+
gender: "neutral",
|
|
47
|
+
style: "expressive",
|
|
48
|
+
description: "Varied and experimental voice",
|
|
49
|
+
piperModel: "en_US-hfc-female",
|
|
50
|
+
},
|
|
51
|
+
"zephyr-default": {
|
|
52
|
+
id: "zephyr-default",
|
|
53
|
+
name: "Zephyr",
|
|
54
|
+
gender: "neutral",
|
|
55
|
+
style: "narrative",
|
|
56
|
+
description: "Contemplative nature and narrative voice",
|
|
57
|
+
piperModel: "en_US-kusal-medium",
|
|
58
|
+
},
|
|
59
|
+
"echo-default": {
|
|
60
|
+
id: "echo-default",
|
|
61
|
+
name: "Echo",
|
|
62
|
+
gender: "female",
|
|
63
|
+
style: "expressive",
|
|
64
|
+
description: "Expressive character and dialogue voice",
|
|
65
|
+
piperModel: "en_US-libritts-high",
|
|
66
|
+
},
|
|
67
|
+
"atlas-default": {
|
|
68
|
+
id: "atlas-default",
|
|
69
|
+
name: "Atlas",
|
|
70
|
+
gender: "male",
|
|
71
|
+
style: "conversational",
|
|
72
|
+
description: "Casual and approachable male voice",
|
|
73
|
+
piperModel: "en_US-ryan-medium",
|
|
74
|
+
},
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
export function getVoiceProfile(id: string): VoiceProfile | undefined {
|
|
78
|
+
return VOICE_PROFILES[id];
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export function listVoices(
|
|
82
|
+
filter?: { gender?: string; style?: string }
|
|
83
|
+
): VoiceProfile[] {
|
|
84
|
+
return Object.values(VOICE_PROFILES).filter((voice) => {
|
|
85
|
+
if (filter?.gender && voice.gender !== filter.gender) return false;
|
|
86
|
+
if (filter?.style && voice.style !== filter.style) return false;
|
|
87
|
+
return true;
|
|
88
|
+
});
|
|
89
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js';
|
|
2
|
+
import { TTSEngine, GenerateAudioOptions, Voice, TTSEngineConfig } from './base.js';
|
|
3
|
+
import { Logger } from '../utils/logger.js';
|
|
4
|
+
import { Readable } from 'stream';
|
|
5
|
+
|
|
6
|
+
export class ElevenLabsEngine extends TTSEngine {
|
|
7
|
+
private client: ElevenLabsClient | undefined;
|
|
8
|
+
|
|
9
|
+
async initialize(config: TTSEngineConfig): Promise<void> {
|
|
10
|
+
const apiKey = config.apiKey || process.env.ELEVENLABS_API_KEY;
|
|
11
|
+
if (!apiKey) {
|
|
12
|
+
throw new Error('ElevenLabs API key not provided. Please set the ELEVENLABS_API_KEY environment variable or provide it in the config.');
|
|
13
|
+
}
|
|
14
|
+
this.client = new ElevenLabsClient({ apiKey });
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
async generateAudio(options: GenerateAudioOptions): Promise<Buffer> {
|
|
18
|
+
if (!this.client) {
|
|
19
|
+
throw new Error('ElevenLabs engine not initialized.');
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
const { text, voiceId } = options;
|
|
23
|
+
const audioStream = await this.client.textToSpeech.convert(voiceId, {
|
|
24
|
+
text,
|
|
25
|
+
modelId: 'eleven_multilingual_v2',
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
// Handle multiple response types from ElevenLabs API
|
|
29
|
+
try {
|
|
30
|
+
if (audioStream instanceof Readable) {
|
|
31
|
+
const chunks: Buffer[] = [];
|
|
32
|
+
for await (const chunk of audioStream) {
|
|
33
|
+
chunks.push(Buffer.from(chunk));
|
|
34
|
+
}
|
|
35
|
+
return Buffer.concat(chunks);
|
|
36
|
+
} else if (Buffer.isBuffer(audioStream)) {
|
|
37
|
+
return audioStream;
|
|
38
|
+
} else if (audioStream && typeof audioStream === 'object' && Symbol.asyncIterator in audioStream) {
|
|
39
|
+
// Handle async iterable streams
|
|
40
|
+
const chunks: Buffer[] = [];
|
|
41
|
+
for await (const chunk of audioStream as any) {
|
|
42
|
+
if (Buffer.isBuffer(chunk)) {
|
|
43
|
+
chunks.push(chunk);
|
|
44
|
+
} else if (chunk instanceof Uint8Array) {
|
|
45
|
+
chunks.push(Buffer.from(chunk));
|
|
46
|
+
} else {
|
|
47
|
+
chunks.push(Buffer.from(chunk));
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
return Buffer.concat(chunks);
|
|
51
|
+
} else {
|
|
52
|
+
// Fallback: try to convert to Buffer
|
|
53
|
+
return Buffer.from(audioStream as any);
|
|
54
|
+
}
|
|
55
|
+
} catch (error) {
|
|
56
|
+
throw new Error(`Failed to process ElevenLabs audio response: ${error instanceof Error ? error.message : String(error)}`);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
async getVoices(): Promise<Voice[]> {
|
|
61
|
+
if (!this.client) {
|
|
62
|
+
throw new Error('ElevenLabs engine not initialized.');
|
|
63
|
+
}
|
|
64
|
+
const availableVoices = await this.client.voices.getAll();
|
|
65
|
+
return availableVoices.voices.map((v: any) => {
|
|
66
|
+
const voiceId = v.voice_id || (v as any).voiceId || v.id;
|
|
67
|
+
return {
|
|
68
|
+
id: voiceId,
|
|
69
|
+
name: `${v.name}`,
|
|
70
|
+
};
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async isAvailable(): Promise<boolean> {
|
|
75
|
+
return !!(process.env.ELEVENLABS_API_KEY);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
getName(): string {
|
|
79
|
+
return 'elevenlabs';
|
|
80
|
+
}
|
|
81
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { TTSEngine, GenerateAudioOptions, Voice, TTSEngineConfig } from './base.js';
|
|
2
|
+
import { AudioParameters } from '../config/defaults.js';
|
|
3
|
+
import say from 'say';
|
|
4
|
+
import { tmpdir } from 'os';
|
|
5
|
+
import { join } from 'path';
|
|
6
|
+
import { randomUUID } from 'crypto';
|
|
7
|
+
import { unlinkSync, readFileSync } from 'fs';
|
|
8
|
+
import { Logger } from '../utils/logger.js';
|
|
9
|
+
|
|
10
|
+
export class SystemTtsEngine extends TTSEngine {
|
|
11
|
+
async initialize(config: TTSEngineConfig): Promise<void> {
|
|
12
|
+
// No initialization needed for system TTS
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
async generateAudio(options: GenerateAudioOptions): Promise<Buffer> {
|
|
16
|
+
const { text, voiceId, parameters } = options;
|
|
17
|
+
const tempDir = tmpdir();
|
|
18
|
+
const uid = randomUUID();
|
|
19
|
+
const audioFile = join(tempDir, `mia-narrative-output-${uid}.wav`);
|
|
20
|
+
|
|
21
|
+
return new Promise((resolve, reject) => {
|
|
22
|
+
say.export(text, voiceId, parameters.speed, audioFile, (err) => {
|
|
23
|
+
if (err) {
|
|
24
|
+
Logger.error(`System TTS error: ${err}`);
|
|
25
|
+
return reject(new Error(err));
|
|
26
|
+
}
|
|
27
|
+
try {
|
|
28
|
+
const audioBuffer = readFileSync(audioFile);
|
|
29
|
+
unlinkSync(audioFile); // Clean up the temp file
|
|
30
|
+
resolve(audioBuffer);
|
|
31
|
+
} catch (readErr: unknown) {
|
|
32
|
+
if (readErr instanceof Error) {
|
|
33
|
+
Logger.error(`Error processing system TTS audio file: ${readErr.message}`);
|
|
34
|
+
}
|
|
35
|
+
reject(readErr);
|
|
36
|
+
}
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async getVoices(): Promise<Voice[]> {
|
|
42
|
+
return new Promise((resolve, reject) => {
|
|
43
|
+
say.getInstalledVoices(((err: string | null, voices: string[]) => {
|
|
44
|
+
if (err) {
|
|
45
|
+
return reject(new Error(err || 'Failed to get voices'));
|
|
46
|
+
}
|
|
47
|
+
resolve((voices || []).map((v: string) => ({ id: v, name: v })));
|
|
48
|
+
}) as any);
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async isAvailable(): Promise<boolean> {
|
|
53
|
+
// The 'say' package should work on most systems with built-in TTS.
|
|
54
|
+
// A more robust check could be added here if needed.
|
|
55
|
+
return true;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
getName(): string {
|
|
59
|
+
return 'system';
|
|
60
|
+
}
|
|
61
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { AudioParameters } from '../config/defaults.js';
|
|
2
|
+
|
|
3
|
+
export interface TTSEngineConfig {
|
|
4
|
+
apiKey?: string;
|
|
5
|
+
piperPath?: string;
|
|
6
|
+
modelPath?: string;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export interface Voice {
|
|
10
|
+
id: string;
|
|
11
|
+
name: string;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface GenerateAudioOptions {
|
|
15
|
+
text: string;
|
|
16
|
+
voiceId: string;
|
|
17
|
+
parameters: AudioParameters;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export abstract class TTSEngine {
|
|
21
|
+
abstract initialize(config: TTSEngineConfig): Promise<void>;
|
|
22
|
+
abstract generateAudio(options: GenerateAudioOptions): Promise<Buffer>;
|
|
23
|
+
abstract getVoices(): Promise<Voice[]>;
|
|
24
|
+
abstract isAvailable(): Promise<boolean>;
|
|
25
|
+
abstract getName(): string;
|
|
26
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { TTSEngine, TTSEngineConfig } from './base.js';
|
|
2
|
+
import { PiperEngine } from './piper.js';
|
|
3
|
+
import { SystemTtsEngine } from './SystemTtsEngine.js';
|
|
4
|
+
import { ElevenLabsEngine } from './ElevenLabsEngine.js';
|
|
5
|
+
|
|
6
|
+
export type EngineType = 'piper' | 'system' | 'elevenlabs';
|
|
7
|
+
|
|
8
|
+
const engineMap: Record<EngineType, new () => TTSEngine> = {
|
|
9
|
+
piper: PiperEngine,
|
|
10
|
+
system: SystemTtsEngine,
|
|
11
|
+
elevenlabs: ElevenLabsEngine,
|
|
12
|
+
};
|
|
13
|
+
|
|
14
|
+
export class EngineFactory {
|
|
15
|
+
static async create(
|
|
16
|
+
engineType: EngineType,
|
|
17
|
+
config: TTSEngineConfig = {}
|
|
18
|
+
): Promise<TTSEngine> {
|
|
19
|
+
const EngineClass = engineMap[engineType];
|
|
20
|
+
if (!EngineClass) {
|
|
21
|
+
throw new Error(`Unknown engine type: ${engineType}`);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const engine = new EngineClass();
|
|
25
|
+
await engine.initialize(config);
|
|
26
|
+
return engine;
|
|
27
|
+
}
|
|
28
|
+
}
|