@pie-players/tts-server-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ /**
2
+ * Speech marks utilities
3
+ * @module @pie-players/tts-server-core
4
+ */
5
+ import type { SpeechMark } from "./types.js";
6
+ /**
7
+ * Estimate speech marks for text when provider doesn't support them
8
+ *
9
+ * Uses average speaking rate to estimate word timing.
10
+ * Not as accurate as provider-generated marks, but better than nothing.
11
+ *
12
+ * @param text - Text to generate marks for
13
+ * @param avgWordsPerMinute - Average speaking rate (default 150)
14
+ * @returns Estimated speech marks
15
+ */
16
+ export declare function estimateSpeechMarks(text: string, avgWordsPerMinute?: number): SpeechMark[];
17
+ /**
18
+ * Adjust speech marks timing for different speaking rates
19
+ *
20
+ * @param marks - Original speech marks
21
+ * @param rate - Speech rate multiplier (0.25 to 4.0)
22
+ * @returns Adjusted speech marks
23
+ */
24
+ export declare function adjustSpeechMarksForRate(marks: SpeechMark[], rate: number): SpeechMark[];
25
+ /**
26
+ * Validate speech marks
27
+ * Ensures marks are properly ordered and have valid data
28
+ *
29
+ * @param marks - Speech marks to validate
30
+ * @returns Validation errors (empty array if valid)
31
+ */
32
+ export declare function validateSpeechMarks(marks: SpeechMark[]): string[];
33
+ /**
34
+ * Merge overlapping or adjacent speech marks
35
+ * Useful when combining marks from multiple sources
36
+ *
37
+ * @param marks - Speech marks to merge
38
+ * @returns Merged speech marks
39
+ */
40
+ export declare function mergeSpeechMarks(marks: SpeechMark[]): SpeechMark[];
41
+ /**
42
+ * Filter speech marks by type
43
+ *
44
+ * @param marks - Speech marks to filter
45
+ * @param type - Type to filter by
46
+ * @returns Filtered speech marks
47
+ */
48
+ export declare function filterSpeechMarksByType(marks: SpeechMark[], type: "word" | "sentence" | "ssml"): SpeechMark[];
49
+ /**
50
+ * Get speech mark at specific time
51
+ *
52
+ * @param marks - Speech marks
53
+ * @param time - Time in milliseconds
54
+ * @returns Speech mark at time, or null if none found
55
+ */
56
+ export declare function getSpeechMarkAtTime(marks: SpeechMark[], time: number): SpeechMark | null;
57
+ /**
58
+ * Calculate statistics for speech marks
59
+ *
60
+ * @param marks - Speech marks
61
+ * @returns Statistics about the marks
62
+ */
63
+ export declare function getSpeechMarksStats(marks: SpeechMark[]): {
64
+ count: number;
65
+ totalDuration: number;
66
+ avgWordDuration: number;
67
+ wordsPerMinute: number;
68
+ wordCount?: undefined;
69
+ } | {
70
+ count: number;
71
+ wordCount: number;
72
+ totalDuration: number;
73
+ avgWordDuration: number;
74
+ wordsPerMinute: number;
75
+ };
76
+ //# sourceMappingURL=speech-marks.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"speech-marks.d.ts","sourceRoot":"","sources":["../src/speech-marks.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAE7C;;;;;;;;;GASG;AACH,wBAAgB,mBAAmB,CAClC,IAAI,EAAE,MAAM,EACZ,iBAAiB,SAAM,GACrB,UAAU,EAAE,CA8Bd;AAED;;;;;;GAMG;AACH,wBAAgB,wBAAwB,CACvC,KAAK,EAAE,UAAU,EAAE,EACnB,IAAI,EAAE,MAAM,GACV,UAAU,EAAE,CASd;AAED;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,MAAM,EAAE,CAoCjE;AAED;;;;;;GAMG;AACH,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,UAAU,EAAE,GAAG,UAAU,EAAE,CA0BlE;AAED;;;;;;GAMG;AACH,wBAAgB,uBAAuB,CACtC,KAAK,EAAE,UAAU,EAAE,EACnB,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,MAAM,GAChC,UAAU,EAAE,CAEd;AAED;;;;;;GAMG;AACH,wBAAgB,mBAAmB,CAClC,KAAK,EAAE,UAAU,EAAE,EACnB,IAAI,EAAE,MAAM,GACV,UAAU,GAAG,IAAI,CAmCnB;AAED;;;;;GAKG;AACH,wBAAgB,mBAAmB,CAAC,KAAK,EAAE,UAAU,EAAE;;;;;;;;;;;;EAsBtD"}
@@ -0,0 +1,194 @@
1
+ /**
2
+ * Speech marks utilities
3
+ * @module @pie-players/tts-server-core
4
+ */
5
+ /**
6
+ * Estimate speech marks for text when provider doesn't support them
7
+ *
8
+ * Uses average speaking rate to estimate word timing.
9
+ * Not as accurate as provider-generated marks, but better than nothing.
10
+ *
11
+ * @param text - Text to generate marks for
12
+ * @param avgWordsPerMinute - Average speaking rate (default 150)
13
+ * @returns Estimated speech marks
14
+ */
15
+ export function estimateSpeechMarks(text, avgWordsPerMinute = 150) {
16
+ const words = text.split(/\s+/).filter((w) => w.length > 0);
17
+ const msPerWord = (60 * 1000) / avgWordsPerMinute;
18
+ const marks = [];
19
+ let charIndex = 0;
20
+ for (let i = 0; i < words.length; i++) {
21
+ const word = words[i];
22
+ // Find word position in original text (preserves spacing)
23
+ const wordStart = text.indexOf(word, charIndex);
24
+ if (wordStart === -1) {
25
+ // Word not found (shouldn't happen), skip
26
+ charIndex += word.length + 1;
27
+ continue;
28
+ }
29
+ marks.push({
30
+ time: Math.round(i * msPerWord),
31
+ type: "word",
32
+ start: wordStart,
33
+ end: wordStart + word.length,
34
+ value: word,
35
+ });
36
+ charIndex = wordStart + word.length;
37
+ }
38
+ return marks;
39
+ }
40
+ /**
41
+ * Adjust speech marks timing for different speaking rates
42
+ *
43
+ * @param marks - Original speech marks
44
+ * @param rate - Speech rate multiplier (0.25 to 4.0)
45
+ * @returns Adjusted speech marks
46
+ */
47
+ export function adjustSpeechMarksForRate(marks, rate) {
48
+ if (rate === 1.0) {
49
+ return marks; // No adjustment needed
50
+ }
51
+ return marks.map((mark) => ({
52
+ ...mark,
53
+ time: Math.round(mark.time / rate),
54
+ }));
55
+ }
56
+ /**
57
+ * Validate speech marks
58
+ * Ensures marks are properly ordered and have valid data
59
+ *
60
+ * @param marks - Speech marks to validate
61
+ * @returns Validation errors (empty array if valid)
62
+ */
63
+ export function validateSpeechMarks(marks) {
64
+ const errors = [];
65
+ if (!marks || marks.length === 0) {
66
+ return errors; // Empty is valid
67
+ }
68
+ for (let i = 0; i < marks.length; i++) {
69
+ const mark = marks[i];
70
+ // Check required fields
71
+ if (typeof mark.time !== "number" || mark.time < 0) {
72
+ errors.push(`Mark ${i}: invalid time (${mark.time})`);
73
+ }
74
+ if (typeof mark.start !== "number" || mark.start < 0) {
75
+ errors.push(`Mark ${i}: invalid start (${mark.start})`);
76
+ }
77
+ if (typeof mark.end !== "number" || mark.end <= mark.start) {
78
+ errors.push(`Mark ${i}: invalid end (${mark.end}, start: ${mark.start})`);
79
+ }
80
+ if (!mark.value || typeof mark.value !== "string") {
81
+ errors.push(`Mark ${i}: invalid value`);
82
+ }
83
+ // Check ordering (time should be monotonically increasing)
84
+ if (i > 0 && mark.time < marks[i - 1].time) {
85
+ errors.push(`Mark ${i}: time (${mark.time}) is less than previous mark (${marks[i - 1].time})`);
86
+ }
87
+ }
88
+ return errors;
89
+ }
90
+ /**
91
+ * Merge overlapping or adjacent speech marks
92
+ * Useful when combining marks from multiple sources
93
+ *
94
+ * @param marks - Speech marks to merge
95
+ * @returns Merged speech marks
96
+ */
97
+ export function mergeSpeechMarks(marks) {
98
+ if (marks.length <= 1) {
99
+ return marks;
100
+ }
101
+ // Sort by start position
102
+ const sorted = [...marks].sort((a, b) => a.start - b.start);
103
+ const merged = [sorted[0]];
104
+ for (let i = 1; i < sorted.length; i++) {
105
+ const current = sorted[i];
106
+ const previous = merged[merged.length - 1];
107
+ // Check if marks overlap or are adjacent
108
+ if (current.start <= previous.end) {
109
+ // Merge with previous mark
110
+ previous.end = Math.max(previous.end, current.end);
111
+ previous.value = previous.value + " " + current.value;
112
+ previous.time = Math.min(previous.time, current.time); // Use earlier time
113
+ }
114
+ else {
115
+ // No overlap, add as new mark
116
+ merged.push(current);
117
+ }
118
+ }
119
+ return merged;
120
+ }
121
+ /**
122
+ * Filter speech marks by type
123
+ *
124
+ * @param marks - Speech marks to filter
125
+ * @param type - Type to filter by
126
+ * @returns Filtered speech marks
127
+ */
128
+ export function filterSpeechMarksByType(marks, type) {
129
+ return marks.filter((mark) => mark.type === type);
130
+ }
131
+ /**
132
+ * Get speech mark at specific time
133
+ *
134
+ * @param marks - Speech marks
135
+ * @param time - Time in milliseconds
136
+ * @returns Speech mark at time, or null if none found
137
+ */
138
+ export function getSpeechMarkAtTime(marks, time) {
139
+ // Binary search for efficiency
140
+ let left = 0;
141
+ let right = marks.length - 1;
142
+ let closest = null;
143
+ while (left <= right) {
144
+ const mid = Math.floor((left + right) / 2);
145
+ const mark = marks[mid];
146
+ if (mark.time === time) {
147
+ return mark;
148
+ }
149
+ // Track closest mark
150
+ if (!closest ||
151
+ Math.abs(mark.time - time) < Math.abs(closest.time - time)) {
152
+ closest = mark;
153
+ }
154
+ if (mark.time < time) {
155
+ left = mid + 1;
156
+ }
157
+ else {
158
+ right = mid - 1;
159
+ }
160
+ }
161
+ // Return closest mark if within reasonable threshold (500ms)
162
+ if (closest && Math.abs(closest.time - time) <= 500) {
163
+ return closest;
164
+ }
165
+ return null;
166
+ }
167
+ /**
168
+ * Calculate statistics for speech marks
169
+ *
170
+ * @param marks - Speech marks
171
+ * @returns Statistics about the marks
172
+ */
173
+ export function getSpeechMarksStats(marks) {
174
+ if (marks.length === 0) {
175
+ return {
176
+ count: 0,
177
+ totalDuration: 0,
178
+ avgWordDuration: 0,
179
+ wordsPerMinute: 0,
180
+ };
181
+ }
182
+ const wordMarks = filterSpeechMarksByType(marks, "word");
183
+ const totalDuration = marks[marks.length - 1].time;
184
+ const avgWordDuration = totalDuration / wordMarks.length;
185
+ const wordsPerMinute = (wordMarks.length / totalDuration) * 60 * 1000;
186
+ return {
187
+ count: marks.length,
188
+ wordCount: wordMarks.length,
189
+ totalDuration,
190
+ avgWordDuration,
191
+ wordsPerMinute,
192
+ };
193
+ }
194
+ //# sourceMappingURL=speech-marks.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"speech-marks.js","sourceRoot":"","sources":["../src/speech-marks.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAIH;;;;;;;;;GASG;AACH,MAAM,UAAU,mBAAmB,CAClC,IAAY,EACZ,iBAAiB,GAAG,GAAG;IAEvB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC5D,MAAM,SAAS,GAAG,CAAC,EAAE,GAAG,IAAI,CAAC,GAAG,iBAAiB,CAAC;IAElD,MAAM,KAAK,GAAiB,EAAE,CAAC;IAC/B,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEtB,0DAA0D;QAC1D,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;QAChD,IAAI,SAAS,KAAK,CAAC,CAAC,EAAE,CAAC;YACtB,0CAA0C;YAC1C,SAAS,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC;YAC7B,SAAS;QACV,CAAC;QAED,KAAK,CAAC,IAAI,CAAC;YACV,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,SAAS,CAAC;YAC/B,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE,SAAS;YAChB,GAAG,EAAE,SAAS,GAAG,IAAI,CAAC,MAAM;YAC5B,KAAK,EAAE,IAAI;SACX,CAAC,CAAC;QAEH,SAAS,GAAG,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC;IACrC,CAAC;IAED,OAAO,KAAK,CAAC;AACd,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,wBAAwB,CACvC,KAAmB,EACnB,IAAY;IAEZ,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;QAClB,OAAO,KAAK,CAAC,CAAC,uBAAuB;IACtC,CAAC;IAED,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QAC3B,GAAG,IAAI;QACP,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC;KAClC,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAmB;IACtD,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,IAAI,CAAC,KAAK,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAClC,OAAO,MAAM,CAAC,CAAC,iBAAiB;IACjC,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QAEtB,wBAAwB;QACxB,IAAI,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YACpD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,mBAAmB,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;QACvD,CAAC;QAED,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,IAAI,IAAI,CAAC,KAAK,GAAG,CAAC,EAAE,CAAC;YACtD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,oBAAoB,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC;QACzD,CAAC;QAED,IAAI,OAAO,IAAI,CAAC,GAAG,KAAK,QAAQ,IAAI,IAAI,CAAC,GAAG,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YAC5D,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,kBAAkB,IAAI,CAAC,GAAG,YAAY,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC;QAC3E,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,KAAK,IAAI,OAAO,IAAI,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACnD,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QACzC,CAAC;QAED,2DAA2D;QAC3D,IAAI,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC5C,MAAM,CAAC,IAAI,CACV,QAAQ,CAAC,WAAW,IAAI,CAAC,IAAI,iCAAiC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,GAAG,CAClF,CAAC;QACH,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAmB;IACnD,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACvB,OAAO,KAAK,CAAC;IACd,CAAC;IAED,yBAAyB;IACzB,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAC5D,MAAM,MAAM,GAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAEzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,MAAM,OAAO,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAC1B,MAAM,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAE3C,yCAAyC;QACzC,IAAI,OAAO,CAAC,KAAK,IAAI,QAAQ,CAAC,GAAG,EAAE,CAAC;YACnC,2BAA2B;YAC3B,QAAQ,CAAC,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,GAAG,EAAE,OAAO,CAAC,GAAG,CAAC,CAAC;YACnD,QAAQ,CAAC,KAAK,GAAG,QAAQ,CAAC,KAAK,GAAG,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC;YACtD,QAAQ,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,mBAAmB;QAC3E,CAAC;aAAM,CAAC;YACP,8BAA8B;YAC9B,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACtB,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,uBAAuB,CACtC,KAAmB,EACnB,IAAkC;IAElC,OAAO,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;AACnD,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,mBAAmB,CAClC,KAAmB,EACnB,IAAY;IAEZ,+BAA+B;IAC/B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,KAAK,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC;IAC7B,IAAI,OAAO,GAAsB,IAAI,CAAC;IAEtC,OAAO,IAAI,IAAI,KAAK,EAAE,CAAC;QACtB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;QAC3C,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC;QAExB,IAAI,IAAI,CAAC,IAAI,KAAK,IAAI,EAAE,CAAC;YACxB,OAAO,IAAI,CAAC;QACb,CAAC;QAED,qBAAqB;QACrB,IACC,CAAC,OAAO;YACR,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,EACzD,CAAC;YACF,OAAO,GAAG,IAAI,CAAC;QAChB,CAAC;QAED,IAAI,IAAI,CAAC,IAAI,GAAG,IAAI,EAAE,CAAC;YACtB,IAAI,GAAG,GAAG,GAAG,CAAC,CAAC;QAChB,CAAC;aAAM,CAAC;YACP,KAAK,GAAG,GAAG,GAAG,CAAC,CAAC;QACjB,CAAC;IACF,CAAC;IAED,6DAA6D;IAC7D,IAAI,OAAO,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,EAAE,CAAC;QACrD,OAAO,OAAO,CAAC;IAChB,CAAC;IAED,OAAO,IAAI,CAAC;AACb,CAAC;AAED;;;;;GAKG;AACH,MAAM,UAAU,mBAAmB,CAAC,KAAmB;IACtD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO;YACN,KAAK,EAAE,CAAC;YACR,aAAa,EAAE,CAAC;YAChB,eAAe,EAAE,CAAC;YAClB,cAAc,EAAE,CAAC;SACjB,CAAC;IACH,CAAC;IAED,MAAM,SAAS,GAAG,uBAAuB,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC;IACzD,MAAM,aAAa,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC;IACnD,MAAM,eAAe,GAAG,aAAa,GAAG,SAAS,CAAC,MAAM,CAAC;IACzD,MAAM,cAAc,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,aAAa,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC;IAEtE,OAAO;QACN,KAAK,EAAE,KAAK,CAAC,MAAM;QACnB,SAAS,EAAE,SAAS,CAAC,MAAM;QAC3B,aAAa;QACb,eAAe;QACf,cAAc;KACd,CAAC;AACH,CAAC"}
@@ -0,0 +1,360 @@
1
+ /**
2
+ * Core types for server-side TTS providers
3
+ * @module @pie-players/tts-server-core
4
+ */
5
+ /**
6
+ * Speech mark representing a timing event in synthesized speech
7
+ * Unified format across all TTS providers
8
+ */
9
+ export interface SpeechMark {
10
+ /** Milliseconds from start of audio */
11
+ time: number;
12
+ /** Type of speech mark */
13
+ type: "word" | "sentence" | "ssml";
14
+ /** Character index in original text (inclusive) */
15
+ start: number;
16
+ /** Character index in original text (exclusive) */
17
+ end: number;
18
+ /** The actual word or text */
19
+ value: string;
20
+ }
21
+ /**
22
+ * Standard TTS parameters based on W3C Web Speech API and SSML specifications.
23
+ *
24
+ * These parameters are widely supported across TTS providers (browsers, cloud services)
25
+ * and follow established standards:
26
+ * - W3C Web Speech API (SpeechSynthesisUtterance)
27
+ * - W3C SSML 1.1 specification
28
+ * - BCP47 language tags (RFC 5646)
29
+ *
30
+ * @see https://w3c.github.io/speech-api/
31
+ * @see https://www.w3.org/TR/speech-synthesis/
32
+ */
33
+ export interface StandardTTSParameters {
34
+ /**
35
+ * Text to synthesize (plain text or SSML markup)
36
+ *
37
+ * @standard W3C Web Speech API
38
+ */
39
+ text: string;
40
+ /**
41
+ * Voice identifier (provider-specific voice names)
42
+ * Examples: "Joanna" (Polly), "en-US-Standard-A" (Google), browser voice names
43
+ *
44
+ * @standard W3C Web Speech API (concept)
45
+ * @note Voice names are provider-specific but the concept is standard
46
+ */
47
+ voice?: string;
48
+ /**
49
+ * Language code using BCP47 format (e.g., 'en-US', 'es-ES', 'fr-FR')
50
+ *
51
+ * @standard BCP47 (RFC 5646), W3C Web Speech API
52
+ * @see https://tools.ietf.org/html/rfc5646
53
+ */
54
+ language?: string;
55
+ /**
56
+ * Speech rate (speed multiplier)
57
+ * - Range: 0.25 to 4.0
58
+ * - Default: 1.0 (normal speed)
59
+ * - 0.5 = half speed, 2.0 = double speed
60
+ *
61
+ * @standard W3C Web Speech API, SSML <prosody rate>
62
+ */
63
+ rate?: number;
64
+ /**
65
+ * Pitch adjustment
66
+ * - Range: -20 to +20 semitones (or 0 to 2 as multiplier depending on provider)
67
+ * - Default: 0 (or 1.0 as multiplier)
68
+ * - Negative values = lower pitch, positive = higher pitch
69
+ *
70
+ * @standard W3C Web Speech API, SSML <prosody pitch>
71
+ * @note Some providers use semitones (-20 to +20), others use multipliers (0 to 2)
72
+ */
73
+ pitch?: number;
74
+ /**
75
+ * Volume level
76
+ * - Range: 0.0 to 1.0
77
+ * - Default: 1.0 (full volume)
78
+ * - 0.0 = silent, 0.5 = half volume
79
+ *
80
+ * @standard W3C Web Speech API, SSML <prosody volume>
81
+ */
82
+ volume?: number;
83
+ }
84
+ /**
85
+ * Provider-specific extensions for advanced TTS control.
86
+ *
87
+ * These parameters are NOT part of W3C standards and have varying support
88
+ * across providers. Use with caution for portability.
89
+ *
90
+ * Common extensions include:
91
+ * - Audio format selection (mp3, wav, ogg)
92
+ * - Sample rate control
93
+ * - Engine selection (neural vs standard)
94
+ * - Regional endpoints
95
+ * - Speech marks / word timing
96
+ *
97
+ * @note Providers may ignore unsupported extensions silently or throw errors
98
+ */
99
+ export interface TTSProviderExtensions {
100
+ /**
101
+ * Audio format for output
102
+ *
103
+ * @extension Common across providers but values vary
104
+ * @support AWS Polly (mp3, ogg, pcm), Google Cloud TTS (mp3, wav, ogg), Azure (mp3, wav, ogg)
105
+ */
106
+ format?: "mp3" | "wav" | "ogg" | "pcm";
107
+ /**
108
+ * Sample rate in Hz (e.g., 8000, 16000, 22050, 24000)
109
+ *
110
+ * @extension Common audio parameter
111
+ * @note Higher sample rates = better quality but larger file sizes
112
+ */
113
+ sampleRate?: number;
114
+ /**
115
+ * Request word-level timing data (speech marks)
116
+ *
117
+ * @extension Provider-specific but common pattern
118
+ * @support AWS Polly (SpeechMarks), Google Cloud TTS (timepoints), Azure (word boundaries)
119
+ * @default true
120
+ */
121
+ includeSpeechMarks?: boolean;
122
+ /**
123
+ * Provider-specific options (extensibility point)
124
+ *
125
+ * Examples:
126
+ * - AWS Polly: { engine: 'neural' | 'standard', lexiconNames: string[] }
127
+ * - Google Cloud TTS: { audioEncoding: string, effectsProfileId: string[] }
128
+ * - Azure: { voiceType: string, stylesList: string[] }
129
+ *
130
+ * @extension Arbitrary provider-specific data
131
+ */
132
+ providerOptions?: Record<string, unknown>;
133
+ }
134
+ /**
135
+ * Complete synthesis request combining standard parameters and extensions.
136
+ *
137
+ * This interface provides the full set of options for text-to-speech synthesis,
138
+ * clearly separating W3C-standard parameters from provider-specific extensions.
139
+ *
140
+ * @example Basic usage (portable across providers)
141
+ * ```typescript
142
+ * const request: SynthesizeRequest = {
143
+ * text: "Hello world",
144
+ * voice: "Joanna",
145
+ * rate: 1.0,
146
+ * language: "en-US"
147
+ * };
148
+ * ```
149
+ *
150
+ * @example Advanced usage with extensions (provider-specific)
151
+ * ```typescript
152
+ * const request: SynthesizeRequest = {
153
+ * text: "Hello world",
154
+ * voice: "Joanna",
155
+ * rate: 1.0,
156
+ * // Extensions - may not be portable
157
+ * format: 'mp3',
158
+ * sampleRate: 24000,
159
+ * includeSpeechMarks: true,
160
+ * providerOptions: {
161
+ * engine: 'neural' // AWS Polly specific
162
+ * }
163
+ * };
164
+ * ```
165
+ */
166
+ export interface SynthesizeRequest extends StandardTTSParameters, TTSProviderExtensions {
167
+ }
168
+ /**
169
+ * Response from speech synthesis
170
+ */
171
+ export interface SynthesizeResponse {
172
+ /** Audio data (Buffer for server, base64 string for client) */
173
+ audio: Buffer | string;
174
+ /** MIME type of audio (e.g., 'audio/mpeg') */
175
+ contentType: string;
176
+ /** Speech marks for word-level timing */
177
+ speechMarks: SpeechMark[];
178
+ /** Metadata about the synthesis */
179
+ metadata: SynthesizeMetadata;
180
+ }
181
+ /**
182
+ * Metadata about synthesized speech
183
+ */
184
+ export interface SynthesizeMetadata {
185
+ /** Provider that generated the audio */
186
+ providerId: string;
187
+ /** Voice ID used */
188
+ voice: string;
189
+ /** Audio duration in seconds */
190
+ duration: number;
191
+ /** Character count of input text */
192
+ charCount: number;
193
+ /** Whether response was served from cache */
194
+ cached: boolean;
195
+ /** ISO timestamp of synthesis */
196
+ timestamp?: string;
197
+ }
198
+ /**
199
+ * Voice definition
200
+ */
201
+ export interface Voice {
202
+ /** Unique voice identifier */
203
+ id: string;
204
+ /** Human-readable name */
205
+ name: string;
206
+ /** Language name (e.g., "English", "Spanish") */
207
+ language: string;
208
+ /** Language code (e.g., "en-US", "es-ES") */
209
+ languageCode: string;
210
+ /** Gender of voice */
211
+ gender?: "male" | "female" | "neutral";
212
+ /** Voice quality level */
213
+ quality: "standard" | "premium" | "neural";
214
+ /** Supported features */
215
+ supportedFeatures: VoiceFeatures;
216
+ /** Provider-specific metadata */
217
+ providerMetadata?: Record<string, unknown>;
218
+ }
219
+ /**
220
+ * Voice feature flags
221
+ */
222
+ export interface VoiceFeatures {
223
+ /** Supports SSML markup */
224
+ ssml: boolean;
225
+ /** Supports emotional expression */
226
+ emotions: boolean;
227
+ /** Supports speaking styles */
228
+ styles: boolean;
229
+ }
230
+ /**
231
+ * Options for listing voices
232
+ */
233
+ export interface GetVoicesOptions {
234
+ /** Filter by language code */
235
+ language?: string;
236
+ /** Filter by quality level */
237
+ quality?: "standard" | "premium" | "neural";
238
+ /** Filter by gender */
239
+ gender?: "male" | "female" | "neutral";
240
+ }
241
+ /**
242
+ * Provider capabilities split into standard features and extensions.
243
+ *
244
+ * This interface helps consumers understand what features are universally
245
+ * supported (W3C standards) vs provider-specific extensions.
246
+ */
247
+ export interface ServerProviderCapabilities {
248
+ /**
249
+ * Standard W3C features that should be widely supported
250
+ */
251
+ standard: {
252
+ /**
253
+ * Supports SSML markup (W3C SSML 1.1)
254
+ *
255
+ * @standard W3C SSML 1.1
256
+ * @support Most cloud TTS providers, limited browser support
257
+ */
258
+ supportsSSML: boolean;
259
+ /**
260
+ * Supports pitch control via rate parameter or SSML <prosody>
261
+ *
262
+ * @standard W3C Web Speech API, SSML <prosody pitch>
263
+ * @note May be via API parameter or SSML only
264
+ */
265
+ supportsPitch: boolean;
266
+ /**
267
+ * Supports rate (speed) control via rate parameter or SSML <prosody>
268
+ *
269
+ * @standard W3C Web Speech API, SSML <prosody rate>
270
+ */
271
+ supportsRate: boolean;
272
+ /**
273
+ * Supports volume control via volume parameter or SSML <prosody>
274
+ *
275
+ * @standard W3C Web Speech API, SSML <prosody volume>
276
+ * @note Often better handled client-side for server TTS
277
+ */
278
+ supportsVolume: boolean;
279
+ /**
280
+ * Supports multiple voices (voice selection)
281
+ *
282
+ * @standard W3C Web Speech API (concept)
283
+ */
284
+ supportsMultipleVoices: boolean;
285
+ /**
286
+ * Maximum text length in characters
287
+ *
288
+ * @note Varies by provider: Polly=3000, Google=5000, browser=~32k
289
+ */
290
+ maxTextLength: number;
291
+ };
292
+ /**
293
+ * Provider-specific extensions
294
+ */
295
+ extensions: {
296
+ /**
297
+ * Supports word-level timing data (speech marks)
298
+ *
299
+ * @extension Provider-specific but common
300
+ * @support AWS Polly ✅, Google Cloud TTS ✅, Azure TTS ✅, Browser ⚠️
301
+ * @note Format and precision vary by provider
302
+ */
303
+ supportsSpeechMarks: boolean;
304
+ /**
305
+ * Supported audio output formats
306
+ *
307
+ * @extension Common but not standardized
308
+ */
309
+ supportedFormats: ("mp3" | "wav" | "ogg" | "pcm")[];
310
+ /**
311
+ * Supports sample rate configuration
312
+ *
313
+ * @extension Common audio parameter
314
+ */
315
+ supportsSampleRate: boolean;
316
+ /**
317
+ * Provider-specific features (extensibility point)
318
+ *
319
+ * Examples:
320
+ * - AWS Polly: { engines: ['neural', 'standard'], lexicons: true }
321
+ * - Google Cloud TTS: { audioProfiles: true, voiceEffects: true }
322
+ * - Azure: { styles: true, emotions: true }
323
+ *
324
+ * @extension Arbitrary provider capabilities
325
+ */
326
+ providerSpecific?: Record<string, unknown>;
327
+ };
328
+ }
329
+ /**
330
+ * TTS error codes
331
+ */
332
+ export declare enum TTSErrorCode {
333
+ INVALID_REQUEST = "INVALID_REQUEST",
334
+ INVALID_VOICE = "INVALID_VOICE",
335
+ INVALID_PROVIDER = "INVALID_PROVIDER",
336
+ TEXT_TOO_LONG = "TEXT_TOO_LONG",
337
+ PROVIDER_ERROR = "PROVIDER_ERROR",
338
+ NETWORK_ERROR = "NETWORK_ERROR",
339
+ AUTHENTICATION_ERROR = "AUTHENTICATION_ERROR",
340
+ RATE_LIMIT_EXCEEDED = "RATE_LIMIT_EXCEEDED",
341
+ INITIALIZATION_ERROR = "INITIALIZATION_ERROR"
342
+ }
343
+ /**
344
+ * TTS error with structured information
345
+ */
346
+ export declare class TTSError extends Error {
347
+ code: TTSErrorCode;
348
+ details?: Record<string, unknown> | undefined;
349
+ providerId?: string | undefined;
350
+ constructor(code: TTSErrorCode, message: string, details?: Record<string, unknown> | undefined, providerId?: string | undefined);
351
+ toJSON(): {
352
+ error: {
353
+ code: TTSErrorCode;
354
+ message: string;
355
+ details: Record<string, unknown> | undefined;
356
+ provider: string | undefined;
357
+ };
358
+ };
359
+ }
360
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH;;;GAGG;AACH,MAAM,WAAW,UAAU;IAC1B,uCAAuC;IACvC,IAAI,EAAE,MAAM,CAAC;IAEb,0BAA0B;IAC1B,IAAI,EAAE,MAAM,GAAG,UAAU,GAAG,MAAM,CAAC;IAEnC,mDAAmD;IACnD,KAAK,EAAE,MAAM,CAAC;IAEd,mDAAmD;IACnD,GAAG,EAAE,MAAM,CAAC;IAEZ,8BAA8B;IAC9B,KAAK,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;;;;GAWG;AACH,MAAM,WAAW,qBAAqB;IACrC;;;;OAIG;IACH,IAAI,EAAE,MAAM,CAAC;IAEb;;;;;;OAMG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;;;;;OAOG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IAEd;;;;;;;;OAQG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;;;;;;OAOG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;CAChB;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,WAAW,qBAAqB;IACrC;;;;;OAKG;IACH,MAAM,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,KAAK,GAAG,KAAK,CAAC;IAEvC;;;;;OAKG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IAEpB;;;;;;OAMG;IACH,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAE7B;;;;;;;;;OASG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC1C;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,MAAM,WAAW,iBAChB,SAAQ,qBAAqB,EAC5B,qBAAqB;CAAG;AAE1B;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAClC,+DAA+D;IAC/D,KAAK,EAAE,MAAM,GAAG,MAAM,CAAC;IAEvB,8CAA8C;IAC9C,WAAW,EAAE,MAAM,CAAC;IAEpB,yCAAyC;IACzC,WAAW,EAAE,UAAU,EAAE,CAAC;IAE1B,mCAAmC;IACnC,QAAQ,EAAE,kBAAkB,CAAC;CAC7B;AAED;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAClC,wCAAwC;IACxC,UAAU,EAAE,MAAM,CAAC;IAEnB,oBAAoB;IACpB,KAAK,EAAE,MAAM,CAAC;IAEd,gCAAgC;IAChC,QAAQ,EAAE,MAAM,CAAC;IAEjB,oCAAoC;IACpC,SAAS,EAAE,MAAM,CAAC;IAElB,6CAA6C;IAC7C,MAAM,EAAE,OAAO,CAAC;IAEhB,iCAAiC;IACjC,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,MAAM,WAAW,KAAK;IACrB,8BAA8B;IAC9B,EAAE,EAAE,MAAM,CAAC;IAEX,0BAA0B;IAC1B,IAAI,EAAE,MAAM,CAAC;IAEb,iDAAiD;IACjD,QAAQ,EAAE,MAAM,CAAC;IAEjB,6CAA6C;IAC7C,YAAY,EAAE,MAAM,CAAC;IAErB,sBAAsB;IACtB,MAAM,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,SAAS,CAAC;IAEvC,0BAA0B;IAC1B,OAAO,EAAE,UAAU,GAAG,SAAS,GAAG,QAAQ,CAAC;IAE3C,yBAAyB;IACzB,iBAAiB,EAAE,aAAa,CAAC;IAEjC,iCAAiC;IACjC,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CAC3C;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC7B,2BAA2B;IAC3B,IAAI,EAAE,OAAO,CAAC;IAEd,oCAAoC;IACpC,QAAQ,EAAE,OAAO,CAAC;IAElB,+BAA+B;IAC/B,MAAM,EAAE,OAAO,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAChC,8BAA8B;IAC9B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB,8BAA8B;IAC9B,OAAO,CAAC,EAAE,UAAU,GAAG,SAAS,GAAG,QAAQ,CAAC;IAE5C,uBAAuB;IACvB,MAAM,CAAC,EAAE,MAAM,GAAG,QAAQ,GAAG,SAAS,CAAC;CACvC;AAED;;;;;GAKG;AACH,MAAM,WAAW,0BAA0B;IAC1C;;OAEG;IACH,QAAQ,EAAE;QACT;;;;;WAKG;QACH,YAAY,EAAE,OAAO,CAAC;QAEtB;;;;;WAKG;QACH,aAAa,EAAE,OAAO,CAAC;QAEvB;;;;WAIG;QACH,YAAY,EAAE,OAAO,CAAC;QAEtB;;;;;WAKG;QACH,cAAc,EAAE,OAAO,CAAC;QAExB;;;;WAIG;QACH,sBAAsB,EAAE,OAAO,CAAC;QAEhC;;;;WAIG;QACH,aAAa,EAAE,MAAM,CAAC;KACtB,CAAC;IAEF;;OAEG;IACH,UAAU,EAAE;QACX;;;;;;WAMG;QACH,mBAAmB,EAAE,OAAO,CAAC;QAE7B;;;;WAIG;QACH,gBAAgB,EAAE,CAAC,KAAK,GAAG,KAAK,GAAG,KAAK,GAAG,KAAK,CAAC,EAAE,CAAC;QAEpD;;;;WAIG;QACH,kBAAkB,EAAE,OAAO,CAAC;QAE5B;;;;;;;;;WASG;QACH,gBAAgB,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;KAC3C,CAAC;CACF;AAED;;GAEG;AACH,oBAAY,YAAY;IACvB,eAAe,oBAAoB;IACnC,aAAa,kBAAkB;IAC/B,gBAAgB,qBAAqB;IACrC,aAAa,kBAAkB;IAC/B,cAAc,mBAAmB;IACjC,aAAa,kBAAkB;IAC/B,oBAAoB,yBAAyB;IAC7C,mBAAmB,wBAAwB;IAC3C,oBAAoB,yBAAyB;CAC7C;AAED;;GAEG;AACH,qBAAa,QAAS,SAAQ,KAAK;IAE1B,IAAI,EAAE,YAAY;IAElB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM;gBAHnB,IAAI,EAAE,YAAY,EACzB,OAAO,EAAE,MAAM,EACR,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,YAAA,EACjC,UAAU,CAAC,EAAE,MAAM,YAAA;IAW3B,MAAM;;;;;;;;CAUN"}