@contractspec/lib.voice 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/audio/audio-concatenator.d.ts +15 -0
- package/dist/audio/audio-concatenator.js +57 -0
- package/dist/audio/duration-estimator.d.ts +31 -0
- package/dist/audio/duration-estimator.js +22 -0
- package/dist/audio/format-converter.d.ts +17 -0
- package/dist/audio/format-converter.js +28 -0
- package/dist/audio/index.d.ts +4 -0
- package/dist/audio/index.js +121 -0
- package/dist/audio/silence-generator.d.ts +16 -0
- package/dist/audio/silence-generator.js +20 -0
- package/dist/browser/audio/audio-concatenator.js +56 -0
- package/dist/browser/audio/duration-estimator.js +21 -0
- package/dist/browser/audio/format-converter.js +27 -0
- package/dist/browser/audio/index.js +120 -0
- package/dist/browser/audio/silence-generator.js +19 -0
- package/dist/browser/conversational/index.js +241 -0
- package/dist/browser/conversational/response-orchestrator.js +62 -0
- package/dist/browser/conversational/transcript-builder.js +63 -0
- package/dist/browser/conversational/turn-detector.js +43 -0
- package/dist/browser/conversational/types.js +0 -0
- package/dist/browser/conversational/voice-session-manager.js +137 -0
- package/dist/browser/docs/conversational.docblock.js +5 -0
- package/dist/browser/docs/stt.docblock.js +5 -0
- package/dist/browser/docs/sync.docblock.js +5 -0
- package/dist/browser/docs/tts.docblock.js +5 -0
- package/dist/browser/docs/voice.docblock.js +5 -0
- package/dist/browser/i18n/catalogs/en.js +91 -0
- package/dist/browser/i18n/catalogs/es.js +91 -0
- package/dist/browser/i18n/catalogs/fr.js +91 -0
- package/dist/browser/i18n/catalogs/index.js +271 -0
- package/dist/browser/i18n/index.js +335 -0
- package/dist/browser/i18n/keys.js +38 -0
- package/dist/browser/i18n/locale.js +13 -0
- package/dist/browser/i18n/messages.js +283 -0
- package/dist/browser/index.js +1070 -0
- package/dist/browser/stt/diarization-mapper.js +42 -0
- package/dist/browser/stt/index.js +222 -0
- package/dist/browser/stt/segment-splitter.js +36 -0
- package/dist/browser/stt/subtitle-formatter.js +51 -0
- package/dist/browser/stt/transcriber.js +219 -0
- package/dist/browser/stt/types.js +0 -0
- package/dist/browser/sync/duration-negotiator.js +69 -0
- package/dist/browser/sync/index.js +165 -0
- package/dist/browser/sync/scene-adapter.js +52 -0
- package/dist/browser/sync/timing-calculator.js +46 -0
- package/dist/browser/tts/audio-assembler.js +120 -0
- package/dist/browser/tts/emphasis-planner.js +134 -0
- package/dist/browser/tts/index.js +439 -0
- package/dist/browser/tts/pace-analyzer.js +67 -0
- package/dist/browser/tts/segment-synthesizer.js +36 -0
- package/dist/browser/tts/types.js +0 -0
- package/dist/browser/tts/voice-synthesizer.js +435 -0
- package/dist/browser/types.js +0 -0
- package/dist/conversational/index.d.ts +5 -0
- package/dist/conversational/index.js +242 -0
- package/dist/conversational/response-orchestrator.d.ts +26 -0
- package/dist/conversational/response-orchestrator.js +63 -0
- package/dist/conversational/transcript-builder.d.ts +25 -0
- package/dist/conversational/transcript-builder.js +64 -0
- package/dist/conversational/turn-detector.d.ts +31 -0
- package/dist/conversational/turn-detector.js +44 -0
- package/dist/conversational/types.d.ts +55 -0
- package/dist/conversational/types.js +1 -0
- package/dist/conversational/voice-session-manager.d.ts +17 -0
- package/dist/conversational/voice-session-manager.js +138 -0
- package/dist/docs/conversational.docblock.d.ts +14 -0
- package/dist/docs/conversational.docblock.js +6 -0
- package/dist/docs/stt.docblock.d.ts +12 -0
- package/dist/docs/stt.docblock.js +6 -0
- package/dist/docs/sync.docblock.d.ts +12 -0
- package/dist/docs/sync.docblock.js +6 -0
- package/dist/docs/tts.docblock.d.ts +12 -0
- package/dist/docs/tts.docblock.js +6 -0
- package/dist/docs/voice.docblock.d.ts +22 -0
- package/dist/docs/voice.docblock.js +6 -0
- package/dist/i18n/catalogs/en.d.ts +6 -0
- package/dist/i18n/catalogs/en.js +92 -0
- package/dist/i18n/catalogs/es.d.ts +4 -0
- package/dist/i18n/catalogs/es.js +92 -0
- package/dist/i18n/catalogs/fr.d.ts +4 -0
- package/dist/i18n/catalogs/fr.js +92 -0
- package/dist/i18n/catalogs/index.d.ts +3 -0
- package/dist/i18n/catalogs/index.js +272 -0
- package/dist/i18n/index.d.ts +20 -0
- package/dist/i18n/index.js +336 -0
- package/dist/i18n/keys.d.ts +50 -0
- package/dist/i18n/keys.js +39 -0
- package/dist/i18n/locale.d.ts +6 -0
- package/dist/i18n/locale.js +14 -0
- package/dist/i18n/messages.d.ts +13 -0
- package/dist/i18n/messages.js +284 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +1071 -0
- package/dist/node/audio/audio-concatenator.js +56 -0
- package/dist/node/audio/duration-estimator.js +21 -0
- package/dist/node/audio/format-converter.js +27 -0
- package/dist/node/audio/index.js +120 -0
- package/dist/node/audio/silence-generator.js +19 -0
- package/dist/node/conversational/index.js +241 -0
- package/dist/node/conversational/response-orchestrator.js +62 -0
- package/dist/node/conversational/transcript-builder.js +63 -0
- package/dist/node/conversational/turn-detector.js +43 -0
- package/dist/node/conversational/types.js +0 -0
- package/dist/node/conversational/voice-session-manager.js +137 -0
- package/dist/node/docs/conversational.docblock.js +5 -0
- package/dist/node/docs/stt.docblock.js +5 -0
- package/dist/node/docs/sync.docblock.js +5 -0
- package/dist/node/docs/tts.docblock.js +5 -0
- package/dist/node/docs/voice.docblock.js +5 -0
- package/dist/node/i18n/catalogs/en.js +91 -0
- package/dist/node/i18n/catalogs/es.js +91 -0
- package/dist/node/i18n/catalogs/fr.js +91 -0
- package/dist/node/i18n/catalogs/index.js +271 -0
- package/dist/node/i18n/index.js +335 -0
- package/dist/node/i18n/keys.js +38 -0
- package/dist/node/i18n/locale.js +13 -0
- package/dist/node/i18n/messages.js +283 -0
- package/dist/node/index.js +1070 -0
- package/dist/node/stt/diarization-mapper.js +42 -0
- package/dist/node/stt/index.js +222 -0
- package/dist/node/stt/segment-splitter.js +36 -0
- package/dist/node/stt/subtitle-formatter.js +51 -0
- package/dist/node/stt/transcriber.js +219 -0
- package/dist/node/stt/types.js +0 -0
- package/dist/node/sync/duration-negotiator.js +69 -0
- package/dist/node/sync/index.js +165 -0
- package/dist/node/sync/scene-adapter.js +52 -0
- package/dist/node/sync/timing-calculator.js +46 -0
- package/dist/node/tts/audio-assembler.js +120 -0
- package/dist/node/tts/emphasis-planner.js +134 -0
- package/dist/node/tts/index.js +439 -0
- package/dist/node/tts/pace-analyzer.js +67 -0
- package/dist/node/tts/segment-synthesizer.js +36 -0
- package/dist/node/tts/types.js +0 -0
- package/dist/node/tts/voice-synthesizer.js +435 -0
- package/dist/node/types.js +0 -0
- package/dist/stt/diarization-mapper.d.ts +19 -0
- package/dist/stt/diarization-mapper.js +43 -0
- package/dist/stt/index.d.ts +5 -0
- package/dist/stt/index.js +223 -0
- package/dist/stt/segment-splitter.d.ts +19 -0
- package/dist/stt/segment-splitter.js +37 -0
- package/dist/stt/subtitle-formatter.d.ts +19 -0
- package/dist/stt/subtitle-formatter.js +52 -0
- package/dist/stt/transcriber.d.ts +21 -0
- package/dist/stt/transcriber.js +220 -0
- package/dist/stt/types.d.ts +44 -0
- package/dist/stt/types.js +1 -0
- package/dist/sync/duration-negotiator.d.ts +37 -0
- package/dist/sync/duration-negotiator.js +70 -0
- package/dist/sync/index.d.ts +3 -0
- package/dist/sync/index.js +166 -0
- package/dist/sync/scene-adapter.d.ts +29 -0
- package/dist/sync/scene-adapter.js +53 -0
- package/dist/sync/timing-calculator.d.ts +21 -0
- package/dist/sync/timing-calculator.js +47 -0
- package/dist/tts/audio-assembler.d.ts +19 -0
- package/dist/tts/audio-assembler.js +121 -0
- package/dist/tts/emphasis-planner.d.ts +24 -0
- package/dist/tts/emphasis-planner.js +135 -0
- package/dist/tts/index.d.ts +6 -0
- package/dist/tts/index.js +440 -0
- package/dist/tts/pace-analyzer.d.ts +30 -0
- package/dist/tts/pace-analyzer.js +68 -0
- package/dist/tts/segment-synthesizer.d.ts +21 -0
- package/dist/tts/segment-synthesizer.js +37 -0
- package/dist/tts/types.d.ts +76 -0
- package/dist/tts/types.js +1 -0
- package/dist/tts/voice-synthesizer.d.ts +28 -0
- package/dist/tts/voice-synthesizer.js +436 -0
- package/dist/types.d.ts +12 -0
- package/dist/types.js +1 -0
- package/package.json +760 -0
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { AudioData } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* Concatenate multiple AudioData segments into a single buffer.
|
|
4
|
+
*
|
|
5
|
+
* All segments must share the same format and sample rate.
|
|
6
|
+
* Use FormatConverter first if segments have mixed formats.
|
|
7
|
+
*/
|
|
8
|
+
export declare class AudioConcatenator {
|
|
9
|
+
/**
|
|
10
|
+
* Concatenate audio segments in order.
|
|
11
|
+
*
|
|
12
|
+
* @throws If segments have mismatched formats or sample rates
|
|
13
|
+
*/
|
|
14
|
+
concatenate(segments: AudioData[]): AudioData;
|
|
15
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
// src/audio/audio-concatenator.ts
|
|
3
|
+
class AudioConcatenator {
|
|
4
|
+
concatenate(segments) {
|
|
5
|
+
if (segments.length === 0) {
|
|
6
|
+
return {
|
|
7
|
+
data: new Uint8Array(0),
|
|
8
|
+
format: "wav",
|
|
9
|
+
sampleRateHz: 44100,
|
|
10
|
+
durationMs: 0,
|
|
11
|
+
channels: 1
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
const [firstSegment] = segments;
|
|
15
|
+
if (!firstSegment) {
|
|
16
|
+
return {
|
|
17
|
+
data: new Uint8Array(0),
|
|
18
|
+
format: "wav",
|
|
19
|
+
sampleRateHz: 44100,
|
|
20
|
+
durationMs: 0,
|
|
21
|
+
channels: 1
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
if (segments.length === 1) {
|
|
25
|
+
return { ...firstSegment };
|
|
26
|
+
}
|
|
27
|
+
const referenceFormat = firstSegment.format;
|
|
28
|
+
const referenceSampleRate = firstSegment.sampleRateHz;
|
|
29
|
+
const referenceChannels = firstSegment.channels ?? 1;
|
|
30
|
+
for (const seg of segments) {
|
|
31
|
+
if (seg.format !== referenceFormat) {
|
|
32
|
+
throw new Error(`Format mismatch: expected ${referenceFormat}, got ${seg.format}`);
|
|
33
|
+
}
|
|
34
|
+
if (seg.sampleRateHz !== referenceSampleRate) {
|
|
35
|
+
throw new Error(`Sample rate mismatch: expected ${referenceSampleRate}, got ${seg.sampleRateHz}`);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
const totalBytes = segments.reduce((sum, s) => sum + s.data.length, 0);
|
|
39
|
+
const combined = new Uint8Array(totalBytes);
|
|
40
|
+
let offset = 0;
|
|
41
|
+
for (const seg of segments) {
|
|
42
|
+
combined.set(seg.data, offset);
|
|
43
|
+
offset += seg.data.length;
|
|
44
|
+
}
|
|
45
|
+
const totalDurationMs = segments.reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
|
|
46
|
+
return {
|
|
47
|
+
data: combined,
|
|
48
|
+
format: referenceFormat,
|
|
49
|
+
sampleRateHz: referenceSampleRate,
|
|
50
|
+
durationMs: totalDurationMs,
|
|
51
|
+
channels: referenceChannels
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
export {
|
|
56
|
+
AudioConcatenator
|
|
57
|
+
};
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Estimate speech duration from text.
|
|
3
|
+
*
|
|
4
|
+
* Uses word count and an assumed speaking rate.
|
|
5
|
+
* This is a deterministic fallback -- actual duration comes from TTS provider.
|
|
6
|
+
*/
|
|
7
|
+
export declare class DurationEstimator {
|
|
8
|
+
/** Words per minute at normal speaking rate */
|
|
9
|
+
private static readonly DEFAULT_WPM;
|
|
10
|
+
/**
|
|
11
|
+
* Estimate speaking duration in seconds from text.
|
|
12
|
+
*
|
|
13
|
+
* @param text - The text to estimate duration for
|
|
14
|
+
* @param wordsPerMinute - Speaking rate (default 150 WPM)
|
|
15
|
+
*/
|
|
16
|
+
estimateSeconds(text: string, wordsPerMinute?: number): number;
|
|
17
|
+
/**
|
|
18
|
+
* Estimate speaking duration in milliseconds from text.
|
|
19
|
+
*
|
|
20
|
+
* @param text - The text to estimate duration for
|
|
21
|
+
* @param wordsPerMinute - Speaking rate (default 150 WPM)
|
|
22
|
+
*/
|
|
23
|
+
estimateMs(text: string, wordsPerMinute?: number): number;
|
|
24
|
+
/**
|
|
25
|
+
* Estimate word count from duration.
|
|
26
|
+
*
|
|
27
|
+
* @param durationSeconds - Duration in seconds
|
|
28
|
+
* @param wordsPerMinute - Speaking rate (default 150 WPM)
|
|
29
|
+
*/
|
|
30
|
+
estimateWordCount(durationSeconds: number, wordsPerMinute?: number): number;
|
|
31
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
// src/audio/duration-estimator.ts
|
|
3
|
+
class DurationEstimator {
|
|
4
|
+
static DEFAULT_WPM = 150;
|
|
5
|
+
estimateSeconds(text, wordsPerMinute) {
|
|
6
|
+
const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
|
|
7
|
+
const wordCount = text.split(/\s+/).filter(Boolean).length;
|
|
8
|
+
return Math.ceil(wordCount / wpm * 60);
|
|
9
|
+
}
|
|
10
|
+
estimateMs(text, wordsPerMinute) {
|
|
11
|
+
const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
|
|
12
|
+
const wordCount = text.split(/\s+/).filter(Boolean).length;
|
|
13
|
+
return Math.ceil(wordCount / wpm * 60 * 1000);
|
|
14
|
+
}
|
|
15
|
+
estimateWordCount(durationSeconds, wordsPerMinute) {
|
|
16
|
+
const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
|
|
17
|
+
return Math.round(durationSeconds / 60 * wpm);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
export {
|
|
21
|
+
DurationEstimator
|
|
22
|
+
};
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { AudioData, AudioFormat } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* Convert audio data between formats.
|
|
4
|
+
*
|
|
5
|
+
* In a real implementation, this would use ffmpeg or a similar tool.
|
|
6
|
+
* For now, provides passthrough when formats match and throws for
|
|
7
|
+
* unsupported conversions.
|
|
8
|
+
*/
|
|
9
|
+
export declare class FormatConverter {
|
|
10
|
+
/**
|
|
11
|
+
* Convert audio to a target format.
|
|
12
|
+
* Returns unchanged data if already in the target format.
|
|
13
|
+
*/
|
|
14
|
+
convert(audio: AudioData, targetFormat: AudioFormat): AudioData;
|
|
15
|
+
/** Check if a conversion path is supported */
|
|
16
|
+
isSupported(from: AudioFormat, to: AudioFormat): boolean;
|
|
17
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
// src/audio/format-converter.ts
|
|
3
|
+
class FormatConverter {
|
|
4
|
+
convert(audio, targetFormat) {
|
|
5
|
+
if (audio.format === targetFormat) {
|
|
6
|
+
return audio;
|
|
7
|
+
}
|
|
8
|
+
return {
|
|
9
|
+
...audio,
|
|
10
|
+
format: targetFormat
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
isSupported(from, to) {
|
|
14
|
+
if (from === to)
|
|
15
|
+
return true;
|
|
16
|
+
const supportedPaths = {
|
|
17
|
+
wav: ["mp3", "ogg", "pcm", "opus"],
|
|
18
|
+
mp3: ["wav"],
|
|
19
|
+
ogg: ["wav"],
|
|
20
|
+
pcm: ["wav"],
|
|
21
|
+
opus: ["wav"]
|
|
22
|
+
};
|
|
23
|
+
return supportedPaths[from]?.includes(to) ?? false;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
export {
|
|
27
|
+
FormatConverter
|
|
28
|
+
};
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
// src/audio/audio-concatenator.ts
|
|
3
|
+
class AudioConcatenator {
|
|
4
|
+
concatenate(segments) {
|
|
5
|
+
if (segments.length === 0) {
|
|
6
|
+
return {
|
|
7
|
+
data: new Uint8Array(0),
|
|
8
|
+
format: "wav",
|
|
9
|
+
sampleRateHz: 44100,
|
|
10
|
+
durationMs: 0,
|
|
11
|
+
channels: 1
|
|
12
|
+
};
|
|
13
|
+
}
|
|
14
|
+
const [firstSegment] = segments;
|
|
15
|
+
if (!firstSegment) {
|
|
16
|
+
return {
|
|
17
|
+
data: new Uint8Array(0),
|
|
18
|
+
format: "wav",
|
|
19
|
+
sampleRateHz: 44100,
|
|
20
|
+
durationMs: 0,
|
|
21
|
+
channels: 1
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
if (segments.length === 1) {
|
|
25
|
+
return { ...firstSegment };
|
|
26
|
+
}
|
|
27
|
+
const referenceFormat = firstSegment.format;
|
|
28
|
+
const referenceSampleRate = firstSegment.sampleRateHz;
|
|
29
|
+
const referenceChannels = firstSegment.channels ?? 1;
|
|
30
|
+
for (const seg of segments) {
|
|
31
|
+
if (seg.format !== referenceFormat) {
|
|
32
|
+
throw new Error(`Format mismatch: expected ${referenceFormat}, got ${seg.format}`);
|
|
33
|
+
}
|
|
34
|
+
if (seg.sampleRateHz !== referenceSampleRate) {
|
|
35
|
+
throw new Error(`Sample rate mismatch: expected ${referenceSampleRate}, got ${seg.sampleRateHz}`);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
const totalBytes = segments.reduce((sum, s) => sum + s.data.length, 0);
|
|
39
|
+
const combined = new Uint8Array(totalBytes);
|
|
40
|
+
let offset = 0;
|
|
41
|
+
for (const seg of segments) {
|
|
42
|
+
combined.set(seg.data, offset);
|
|
43
|
+
offset += seg.data.length;
|
|
44
|
+
}
|
|
45
|
+
const totalDurationMs = segments.reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
|
|
46
|
+
return {
|
|
47
|
+
data: combined,
|
|
48
|
+
format: referenceFormat,
|
|
49
|
+
sampleRateHz: referenceSampleRate,
|
|
50
|
+
durationMs: totalDurationMs,
|
|
51
|
+
channels: referenceChannels
|
|
52
|
+
};
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// src/audio/duration-estimator.ts
|
|
57
|
+
class DurationEstimator {
|
|
58
|
+
static DEFAULT_WPM = 150;
|
|
59
|
+
estimateSeconds(text, wordsPerMinute) {
|
|
60
|
+
const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
|
|
61
|
+
const wordCount = text.split(/\s+/).filter(Boolean).length;
|
|
62
|
+
return Math.ceil(wordCount / wpm * 60);
|
|
63
|
+
}
|
|
64
|
+
estimateMs(text, wordsPerMinute) {
|
|
65
|
+
const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
|
|
66
|
+
const wordCount = text.split(/\s+/).filter(Boolean).length;
|
|
67
|
+
return Math.ceil(wordCount / wpm * 60 * 1000);
|
|
68
|
+
}
|
|
69
|
+
estimateWordCount(durationSeconds, wordsPerMinute) {
|
|
70
|
+
const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
|
|
71
|
+
return Math.round(durationSeconds / 60 * wpm);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// src/audio/format-converter.ts
|
|
76
|
+
class FormatConverter {
|
|
77
|
+
convert(audio, targetFormat) {
|
|
78
|
+
if (audio.format === targetFormat) {
|
|
79
|
+
return audio;
|
|
80
|
+
}
|
|
81
|
+
return {
|
|
82
|
+
...audio,
|
|
83
|
+
format: targetFormat
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
isSupported(from, to) {
|
|
87
|
+
if (from === to)
|
|
88
|
+
return true;
|
|
89
|
+
const supportedPaths = {
|
|
90
|
+
wav: ["mp3", "ogg", "pcm", "opus"],
|
|
91
|
+
mp3: ["wav"],
|
|
92
|
+
ogg: ["wav"],
|
|
93
|
+
pcm: ["wav"],
|
|
94
|
+
opus: ["wav"]
|
|
95
|
+
};
|
|
96
|
+
return supportedPaths[from]?.includes(to) ?? false;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// src/audio/silence-generator.ts
|
|
101
|
+
class SilenceGenerator {
|
|
102
|
+
generate(durationMs, format = "wav", sampleRateHz = 44100, channels = 1) {
|
|
103
|
+
const totalSamples = Math.ceil(sampleRateHz * durationMs / 1000);
|
|
104
|
+
const bytesPerSample = 2;
|
|
105
|
+
const dataSize = totalSamples * bytesPerSample * channels;
|
|
106
|
+
const data = new Uint8Array(dataSize);
|
|
107
|
+
return {
|
|
108
|
+
data,
|
|
109
|
+
format,
|
|
110
|
+
sampleRateHz,
|
|
111
|
+
durationMs,
|
|
112
|
+
channels
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
export {
|
|
117
|
+
SilenceGenerator,
|
|
118
|
+
FormatConverter,
|
|
119
|
+
DurationEstimator,
|
|
120
|
+
AudioConcatenator
|
|
121
|
+
};
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { AudioData, AudioFormat } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* Generate silence buffers in various audio formats.
|
|
4
|
+
* Used by AudioAssembler to insert pauses between TTS segments.
|
|
5
|
+
*/
|
|
6
|
+
export declare class SilenceGenerator {
|
|
7
|
+
/**
|
|
8
|
+
* Generate a silence buffer of the given duration.
|
|
9
|
+
*
|
|
10
|
+
* @param durationMs - Silence duration in milliseconds
|
|
11
|
+
* @param format - Target audio format
|
|
12
|
+
* @param sampleRateHz - Sample rate (default 44100)
|
|
13
|
+
* @param channels - Number of channels (default 1)
|
|
14
|
+
*/
|
|
15
|
+
generate(durationMs: number, format?: AudioFormat, sampleRateHz?: number, channels?: 1 | 2): AudioData;
|
|
16
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
// @bun
|
|
2
|
+
// src/audio/silence-generator.ts
|
|
3
|
+
class SilenceGenerator {
|
|
4
|
+
generate(durationMs, format = "wav", sampleRateHz = 44100, channels = 1) {
|
|
5
|
+
const totalSamples = Math.ceil(sampleRateHz * durationMs / 1000);
|
|
6
|
+
const bytesPerSample = 2;
|
|
7
|
+
const dataSize = totalSamples * bytesPerSample * channels;
|
|
8
|
+
const data = new Uint8Array(dataSize);
|
|
9
|
+
return {
|
|
10
|
+
data,
|
|
11
|
+
format,
|
|
12
|
+
sampleRateHz,
|
|
13
|
+
durationMs,
|
|
14
|
+
channels
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
export {
|
|
19
|
+
SilenceGenerator
|
|
20
|
+
};
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
// src/audio/audio-concatenator.ts
|
|
2
|
+
class AudioConcatenator {
|
|
3
|
+
concatenate(segments) {
|
|
4
|
+
if (segments.length === 0) {
|
|
5
|
+
return {
|
|
6
|
+
data: new Uint8Array(0),
|
|
7
|
+
format: "wav",
|
|
8
|
+
sampleRateHz: 44100,
|
|
9
|
+
durationMs: 0,
|
|
10
|
+
channels: 1
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
const [firstSegment] = segments;
|
|
14
|
+
if (!firstSegment) {
|
|
15
|
+
return {
|
|
16
|
+
data: new Uint8Array(0),
|
|
17
|
+
format: "wav",
|
|
18
|
+
sampleRateHz: 44100,
|
|
19
|
+
durationMs: 0,
|
|
20
|
+
channels: 1
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
if (segments.length === 1) {
|
|
24
|
+
return { ...firstSegment };
|
|
25
|
+
}
|
|
26
|
+
const referenceFormat = firstSegment.format;
|
|
27
|
+
const referenceSampleRate = firstSegment.sampleRateHz;
|
|
28
|
+
const referenceChannels = firstSegment.channels ?? 1;
|
|
29
|
+
for (const seg of segments) {
|
|
30
|
+
if (seg.format !== referenceFormat) {
|
|
31
|
+
throw new Error(`Format mismatch: expected ${referenceFormat}, got ${seg.format}`);
|
|
32
|
+
}
|
|
33
|
+
if (seg.sampleRateHz !== referenceSampleRate) {
|
|
34
|
+
throw new Error(`Sample rate mismatch: expected ${referenceSampleRate}, got ${seg.sampleRateHz}`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const totalBytes = segments.reduce((sum, s) => sum + s.data.length, 0);
|
|
38
|
+
const combined = new Uint8Array(totalBytes);
|
|
39
|
+
let offset = 0;
|
|
40
|
+
for (const seg of segments) {
|
|
41
|
+
combined.set(seg.data, offset);
|
|
42
|
+
offset += seg.data.length;
|
|
43
|
+
}
|
|
44
|
+
const totalDurationMs = segments.reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
|
|
45
|
+
return {
|
|
46
|
+
data: combined,
|
|
47
|
+
format: referenceFormat,
|
|
48
|
+
sampleRateHz: referenceSampleRate,
|
|
49
|
+
durationMs: totalDurationMs,
|
|
50
|
+
channels: referenceChannels
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
export {
|
|
55
|
+
AudioConcatenator
|
|
56
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
// src/audio/duration-estimator.ts
|
|
2
|
+
class DurationEstimator {
|
|
3
|
+
static DEFAULT_WPM = 150;
|
|
4
|
+
estimateSeconds(text, wordsPerMinute) {
|
|
5
|
+
const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
|
|
6
|
+
const wordCount = text.split(/\s+/).filter(Boolean).length;
|
|
7
|
+
return Math.ceil(wordCount / wpm * 60);
|
|
8
|
+
}
|
|
9
|
+
estimateMs(text, wordsPerMinute) {
|
|
10
|
+
const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
|
|
11
|
+
const wordCount = text.split(/\s+/).filter(Boolean).length;
|
|
12
|
+
return Math.ceil(wordCount / wpm * 60 * 1000);
|
|
13
|
+
}
|
|
14
|
+
estimateWordCount(durationSeconds, wordsPerMinute) {
|
|
15
|
+
const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
|
|
16
|
+
return Math.round(durationSeconds / 60 * wpm);
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
export {
|
|
20
|
+
DurationEstimator
|
|
21
|
+
};
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
// src/audio/format-converter.ts
|
|
2
|
+
class FormatConverter {
|
|
3
|
+
convert(audio, targetFormat) {
|
|
4
|
+
if (audio.format === targetFormat) {
|
|
5
|
+
return audio;
|
|
6
|
+
}
|
|
7
|
+
return {
|
|
8
|
+
...audio,
|
|
9
|
+
format: targetFormat
|
|
10
|
+
};
|
|
11
|
+
}
|
|
12
|
+
isSupported(from, to) {
|
|
13
|
+
if (from === to)
|
|
14
|
+
return true;
|
|
15
|
+
const supportedPaths = {
|
|
16
|
+
wav: ["mp3", "ogg", "pcm", "opus"],
|
|
17
|
+
mp3: ["wav"],
|
|
18
|
+
ogg: ["wav"],
|
|
19
|
+
pcm: ["wav"],
|
|
20
|
+
opus: ["wav"]
|
|
21
|
+
};
|
|
22
|
+
return supportedPaths[from]?.includes(to) ?? false;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
export {
|
|
26
|
+
FormatConverter
|
|
27
|
+
};
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
// src/audio/audio-concatenator.ts
|
|
2
|
+
class AudioConcatenator {
|
|
3
|
+
concatenate(segments) {
|
|
4
|
+
if (segments.length === 0) {
|
|
5
|
+
return {
|
|
6
|
+
data: new Uint8Array(0),
|
|
7
|
+
format: "wav",
|
|
8
|
+
sampleRateHz: 44100,
|
|
9
|
+
durationMs: 0,
|
|
10
|
+
channels: 1
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
const [firstSegment] = segments;
|
|
14
|
+
if (!firstSegment) {
|
|
15
|
+
return {
|
|
16
|
+
data: new Uint8Array(0),
|
|
17
|
+
format: "wav",
|
|
18
|
+
sampleRateHz: 44100,
|
|
19
|
+
durationMs: 0,
|
|
20
|
+
channels: 1
|
|
21
|
+
};
|
|
22
|
+
}
|
|
23
|
+
if (segments.length === 1) {
|
|
24
|
+
return { ...firstSegment };
|
|
25
|
+
}
|
|
26
|
+
const referenceFormat = firstSegment.format;
|
|
27
|
+
const referenceSampleRate = firstSegment.sampleRateHz;
|
|
28
|
+
const referenceChannels = firstSegment.channels ?? 1;
|
|
29
|
+
for (const seg of segments) {
|
|
30
|
+
if (seg.format !== referenceFormat) {
|
|
31
|
+
throw new Error(`Format mismatch: expected ${referenceFormat}, got ${seg.format}`);
|
|
32
|
+
}
|
|
33
|
+
if (seg.sampleRateHz !== referenceSampleRate) {
|
|
34
|
+
throw new Error(`Sample rate mismatch: expected ${referenceSampleRate}, got ${seg.sampleRateHz}`);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const totalBytes = segments.reduce((sum, s) => sum + s.data.length, 0);
|
|
38
|
+
const combined = new Uint8Array(totalBytes);
|
|
39
|
+
let offset = 0;
|
|
40
|
+
for (const seg of segments) {
|
|
41
|
+
combined.set(seg.data, offset);
|
|
42
|
+
offset += seg.data.length;
|
|
43
|
+
}
|
|
44
|
+
const totalDurationMs = segments.reduce((sum, s) => sum + (s.durationMs ?? 0), 0);
|
|
45
|
+
return {
|
|
46
|
+
data: combined,
|
|
47
|
+
format: referenceFormat,
|
|
48
|
+
sampleRateHz: referenceSampleRate,
|
|
49
|
+
durationMs: totalDurationMs,
|
|
50
|
+
channels: referenceChannels
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// src/audio/duration-estimator.ts
|
|
56
|
+
class DurationEstimator {
|
|
57
|
+
static DEFAULT_WPM = 150;
|
|
58
|
+
estimateSeconds(text, wordsPerMinute) {
|
|
59
|
+
const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
|
|
60
|
+
const wordCount = text.split(/\s+/).filter(Boolean).length;
|
|
61
|
+
return Math.ceil(wordCount / wpm * 60);
|
|
62
|
+
}
|
|
63
|
+
estimateMs(text, wordsPerMinute) {
|
|
64
|
+
const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
|
|
65
|
+
const wordCount = text.split(/\s+/).filter(Boolean).length;
|
|
66
|
+
return Math.ceil(wordCount / wpm * 60 * 1000);
|
|
67
|
+
}
|
|
68
|
+
estimateWordCount(durationSeconds, wordsPerMinute) {
|
|
69
|
+
const wpm = wordsPerMinute ?? DurationEstimator.DEFAULT_WPM;
|
|
70
|
+
return Math.round(durationSeconds / 60 * wpm);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// src/audio/format-converter.ts
|
|
75
|
+
class FormatConverter {
|
|
76
|
+
convert(audio, targetFormat) {
|
|
77
|
+
if (audio.format === targetFormat) {
|
|
78
|
+
return audio;
|
|
79
|
+
}
|
|
80
|
+
return {
|
|
81
|
+
...audio,
|
|
82
|
+
format: targetFormat
|
|
83
|
+
};
|
|
84
|
+
}
|
|
85
|
+
isSupported(from, to) {
|
|
86
|
+
if (from === to)
|
|
87
|
+
return true;
|
|
88
|
+
const supportedPaths = {
|
|
89
|
+
wav: ["mp3", "ogg", "pcm", "opus"],
|
|
90
|
+
mp3: ["wav"],
|
|
91
|
+
ogg: ["wav"],
|
|
92
|
+
pcm: ["wav"],
|
|
93
|
+
opus: ["wav"]
|
|
94
|
+
};
|
|
95
|
+
return supportedPaths[from]?.includes(to) ?? false;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// src/audio/silence-generator.ts
|
|
100
|
+
class SilenceGenerator {
|
|
101
|
+
generate(durationMs, format = "wav", sampleRateHz = 44100, channels = 1) {
|
|
102
|
+
const totalSamples = Math.ceil(sampleRateHz * durationMs / 1000);
|
|
103
|
+
const bytesPerSample = 2;
|
|
104
|
+
const dataSize = totalSamples * bytesPerSample * channels;
|
|
105
|
+
const data = new Uint8Array(dataSize);
|
|
106
|
+
return {
|
|
107
|
+
data,
|
|
108
|
+
format,
|
|
109
|
+
sampleRateHz,
|
|
110
|
+
durationMs,
|
|
111
|
+
channels
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
export {
|
|
116
|
+
SilenceGenerator,
|
|
117
|
+
FormatConverter,
|
|
118
|
+
DurationEstimator,
|
|
119
|
+
AudioConcatenator
|
|
120
|
+
};
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
// src/audio/silence-generator.ts
|
|
2
|
+
class SilenceGenerator {
|
|
3
|
+
generate(durationMs, format = "wav", sampleRateHz = 44100, channels = 1) {
|
|
4
|
+
const totalSamples = Math.ceil(sampleRateHz * durationMs / 1000);
|
|
5
|
+
const bytesPerSample = 2;
|
|
6
|
+
const dataSize = totalSamples * bytesPerSample * channels;
|
|
7
|
+
const data = new Uint8Array(dataSize);
|
|
8
|
+
return {
|
|
9
|
+
data,
|
|
10
|
+
format,
|
|
11
|
+
sampleRateHz,
|
|
12
|
+
durationMs,
|
|
13
|
+
channels
|
|
14
|
+
};
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
export {
|
|
18
|
+
SilenceGenerator
|
|
19
|
+
};
|